release 2015.02.11

[youtube] Extract average rating (closes #2362 )
flake8: Ignore some error added in pep8 1.6
2015-02-11 19:02:39 +01:00 · 2015-02-11 18:39:31 +01:00 · 2015-02-11 18:15:15 +01:00 · 2015-02-11 18:15:15 +01:00 · 2015-02-11 22:33:03 +06:00 · 2015-02-11 17:25:04 +01:00
23 changed files with 319 additions and 146 deletions
--- a/README.md
+++ b/README.md
@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     on Windows)
    --flat-playlist                  Do not extract the videos of a playlist,
                                     only list them.
+    --no-color                       Do not emit color codes in output.

 ## Network Options:
    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -225,6 +225,7 @@
 - **mailru**: Видео@Mail.Ru
 - **Malemotion**
 - **MDR**
+ - **media.ccc.de**
 - **metacafe**
 - **Metacritic**
 - **Mgoon**
--- a/setup.cfg
+++ b/setup.cfg
@ -3,4 +3,4 @@ universal = True

 [flake8]
 exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
-ignore = E501
+ignore = E402,E501,E731
--- a/test/parameters.json
+++ b/test/parameters.json
@ -39,5 +39,6 @@
    "writesubtitles": false,
    "allsubtitles": false,
    "listssubtitles": false,
-    "socket_timeout": 20
+    "socket_timeout": 20,
+    "fixup": "never"
 }
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -8,11 +8,11 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-
 import io
 import re
 import string

+from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.compat import compat_str, compat_urlretrieve

@ -88,7 +88,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
        if not os.path.exists(fn):
            compat_urlretrieve(url, fn)

-        ie = YoutubeIE()
+        ydl = FakeYDL()
+        ie = YoutubeIE(ydl)
        if stype == 'js':
            with io.open(fn, encoding='utf-8') as testf:
                jscode = testf.read()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -233,6 +233,7 @@ class YoutubeDL(object):
                       If it returns a message, the video is ignored.
                       If it returns None, the video is downloaded.
                       match_filter_func in utils.py is one example for this.
+    no_color:          Do not emit color codes in output.


    The following parameters are not used by YoutubeDL itself, they are used by
@ -490,7 +491,7 @@ class YoutubeDL(object):
        else:
            if self.params.get('no_warnings'):
                return
-            if self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
                _msg_header = '\033[0;33mWARNING:\033[0m'
            else:
                _msg_header = 'WARNING:'
@ -502,7 +503,7 @@ class YoutubeDL(object):
        Do the same as trouble, but prefixes the message with 'ERROR:', colored
        in red if stderr is a tty file.
        '''
-        if self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
            _msg_header = '\033[0;31mERROR:\033[0m'
        else:
            _msg_header = 'ERROR:'
@ -559,7 +560,7 @@ class YoutubeDL(object):
            self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
            return None

-    def _match_entry(self, info_dict):
+    def _match_entry(self, info_dict, incomplete):
        """ Returns None iff the file should be downloaded """

        video_title = info_dict.get('title', info_dict.get('id', 'video'))
@ -592,11 +593,12 @@ class YoutubeDL(object):
        if self.in_download_archive(info_dict):
            return '%s has already been recorded in archive' % video_title

-        match_filter = self.params.get('match_filter')
-        if match_filter is not None:
-            ret = match_filter(info_dict)
-            if ret is not None:
-                return ret
+        if not incomplete:
+            match_filter = self.params.get('match_filter')
+            if match_filter is not None:
+                ret = match_filter(info_dict)
+                if ret is not None:
+                    return ret

        return None

@ -791,7 +793,7 @@ class YoutubeDL(object):
                    'extractor_key': ie_result['extractor_key'],
                }

-                reason = self._match_entry(entry)
+                reason = self._match_entry(entry, incomplete=True)
                if reason is not None:
                    self.to_screen('[download] ' + reason)
                    continue
@ -1165,7 +1167,7 @@ class YoutubeDL(object):
        if 'format' not in info_dict:
            info_dict['format'] = info_dict['ext']

-        reason = self._match_entry(info_dict)
+        reason = self._match_entry(info_dict, incomplete=False)
        if reason is not None:
            self.to_screen('[download] ' + reason)
            return
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -349,6 +349,7 @@ def _real_main(argv=None):
        'playlist_items': opts.playlist_items,
        'xattr_set_filesize': opts.xattr_set_filesize,
        'match_filter': match_filter,
+        'no_color': opts.no_color,
    }

    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
-
 import base64
 from math import ceil

@ -329,3 +327,5 @@ def inc(data):
            data[i] = data[i] + 1
            break
    return data
+
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -54,6 +54,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
+from .ccc import CCCIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@ -15,12 +15,13 @@ from ..utils import (

 class CanalplusIE(InfoExtractor):
    IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
-    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
+    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
    _SITE_ID_MAP = {
        'canalplus.fr': 'cplus',
        'piwiplus.fr': 'teletoon',
        'd8.tv': 'd8',
+        'itele.fr': 'itele',
    }

    _TESTS = [{
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
            'upload_date': '20131108',
        },
        'skip': 'videos get deleted after a while',
+    }, {
+        'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
+        'md5': '65aa83ad62fe107ce29e564bb8712580',
+        'info_dict': {
+            'id': '1213714',
+            'ext': 'flv',
+            'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
+            'description': 'md5:8216206ec53426ea6321321f3b3c16db',
+            'upload_date': '20150211',
+        },
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@ -0,0 +1,99 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    qualities,
+    unified_strdate,
+)
+
+
+class CCCIE(InfoExtractor):
+    IE_NAME = 'media.ccc.de'
+    _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
+
+    _TEST = {
+        'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
+        'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
+        'info_dict': {
+            'id': '20131228183',
+            'ext': 'mp4',
+            'title': 'Introduction to Processor Design',
+            'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'view_count': int,
+            'upload_date': '20131229',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        if self._downloader.params.get('prefer_free_formats'):
+            preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
+        else:
+            preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
+
+        title = self._html_search_regex(
+            r'(?s)<h1>(.*?)</h1>', webpage, 'title')
+        description = self._html_search_regex(
+            r"(?s)<p class='description'>(.*?)</p>",
+            webpage, 'description', fatal=False)
+        upload_date = unified_strdate(self._html_search_regex(
+            r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
+            webpage, 'upload date', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
+            webpage, 'view count', fatal=False))
+
+        matches = re.finditer(r'''(?xs)
+            <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
+            <a\s+href='(?P<http_url>[^']+)'>\s*
+            (?:
+                .*?
+                <a\s+href='(?P<torrent_url>[^']+\.torrent)'
+            )?''', webpage)
+        formats = []
+        for m in matches:
+            format = m.group('format')
+            format_id = self._search_regex(
+                r'.*/([a-z0-9_-]+)/[^/]*$',
+                m.group('http_url'), 'format id', default=None)
+            vcodec = 'h264' if 'h264' in format_id else (
+                'none' if format_id in ('mp3', 'opus') else None
+            )
+            formats.append({
+                'format_id': format_id,
+                'format': format,
+                'url': m.group('http_url'),
+                'vcodec': vcodec,
+                'preference': preference(format_id),
+            })
+
+            if m.group('torrent_url'):
+                formats.append({
+                    'format_id': 'torrent-%s' % (format if format_id is None else format_id),
+                    'format': '%s (torrent)' % format,
+                    'proto': 'torrent',
+                    'format_note': '(unsupported; will just download the .torrent file)',
+                    'vcodec': vcodec,
+                    'preference': -100 + preference(format_id),
+                    'url': m.group('torrent_url'),
+                })
+        self._sort_formats(formats)
+
+        thumbnail = self._html_search_regex(
+            r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'view_count': view_count,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -157,6 +157,7 @@ class InfoExtractor(object):
    view_count:     How many users have watched the video on the platform.
    like_count:     Number of positive ratings of the video
    dislike_count:  Number of negative ratings of the video
+    average_rating: Average rating give by users, the scale used depends on the webpage
    comment_count:  Number of comments on the video
    comments:       A list of comments, each with one or more of the following
                    properties (all but one of text or html optional):
@ -271,7 +272,7 @@ class InfoExtractor(object):
            raise
        except compat_http_client.IncompleteRead as e:
            raise ExtractorError('A network error has occured.', cause=e, expected=True)
-        except (KeyError,) as e:
+        except (KeyError, StopIteration) as e:
            raise ExtractorError('An extractor error has occured.', cause=e)

    def set_downloader(self, downloader):
@ -514,7 +515,7 @@ class InfoExtractor(object):
                if mobj:
                    break

-        if os.name != 'nt' and sys.stderr.isatty():
+        if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
            _name = '\033[0;34m%s\033[0m' % name
        else:
            _name = name
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
            'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
            'ext': 'flv',
            'title': 'Videoinstallation für eine Kaufhausfassade'
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
        }
    }

--- a/youtube_dl/extractor/dotsub.py
+++ b/youtube_dl/extractor/dotsub.py
@ -1,13 +1,14 @@
 from __future__ import unicode_literals

-import re
-import time
-
 from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+)


 class DotsubIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
    _TEST = {
        'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
        'md5': '0914d4d69605090f623b7ac329fea66e',
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
            'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
            'ext': 'flv',
            'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
+            'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
+            'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+            'duration': 3169,
            'uploader': '4v4l0n42',
-            'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism  and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
-            'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+            'timestamp': 1292248482.625,
            'upload_date': '20101213',
+            'view_count': int,
        }
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
-        info = self._download_json(info_url, video_id)
-        date = time.gmtime(info['dateCreated'] / 1000)  # The timestamp is in miliseconds
+        video_id = self._match_id(url)
+
+        info = self._download_json(
+            'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
+        video_url = info.get('mediaURI')
+
+        if not video_url:
+            webpage = self._download_webpage(url, video_id)
+            video_url = self._search_regex(
+                r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')

        return {
            'id': video_id,
-            'url': info['mediaURI'],
+            'url': video_url,
            'ext': 'flv',
            'title': info['title'],
-            'thumbnail': info['screenshotURI'],
-            'description': info['description'],
-            'uploader': info['user'],
-            'view_count': info['numberOfViews'],
-            'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
+            'description': info.get('description'),
+            'thumbnail': info.get('screenshotURI'),
+            'duration': int_or_none(info.get('duration'), 1000),
+            'uploader': info.get('user'),
+            'timestamp': float_or_none(info.get('dateCreated'), 1000),
+            'view_count': int_or_none(info.get('numberOfViews')),
        }
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@ -1,18 +1,17 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
 )
 from ..utils import (
    ExtractorError,
+    js_to_json,
 )


 class EscapistIE(InfoExtractor):
-    _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
+    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
    _TEST = {
        'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
        'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -20,31 +19,30 @@ class EscapistIE(InfoExtractor):
            'id': '6618',
            'ext': 'mp4',
            'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
-            'uploader': 'the-escapist-presents',
+            'uploader_id': 'the-escapist-presents',
+            'uploader': 'The Escapist Presents',
            'title': "Breaking Down Baldur's Gate",
        }
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        showName = mobj.group('showname')
-        video_id = mobj.group('id')
-
-        self.report_extraction(video_id)
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        videoDesc = self._html_search_regex(
-            r'<meta name="description" content="([^"]*)"',
-            webpage, 'description', fatal=False)
+        uploader_id = self._html_search_regex(
+            r"<h1 class='headline'><a href='/videos/view/(.*?)'",
+            webpage, 'uploader ID', fatal=False)
+        uploader = self._html_search_regex(
+            r"<h1 class='headline'>(.*?)</a>",
+            webpage, 'uploader', fatal=False)
+        description = self._html_search_meta('description', webpage)

-        playerUrl = self._og_search_video_url(webpage, name='player URL')
+        raw_title = self._html_search_meta('title', webpage, fatal=True)
+        title = raw_title.partition(' : ')[2]

-        title = self._html_search_regex(
-            r'<meta name="title" content="([^"]*)"',
-            webpage, 'title').split(' : ')[-1]
-
-        configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
-        configUrl = compat_urllib_parse.unquote(configUrl)
+        player_url = self._og_search_video_url(webpage, name='player URL')
+        config_url = compat_urllib_parse.unquote(self._search_regex(
+            r'config=(.*)$', player_url, 'config URL'))

        formats = []

@ -53,18 +51,21 @@ class EscapistIE(InfoExtractor):
                cfgurl, video_id,
                'Downloading ' + name + ' configuration',
                'Unable to download ' + name + ' configuration',
-                transform_source=lambda s: s.replace("'", '"'))
+                transform_source=js_to_json)

            playlist = config['playlist']
+            video_url = next(
+                p['url'] for p in playlist
+                if p.get('eventCategory') == 'Video')
            formats.append({
-                'url': playlist[1]['url'],
+                'url': video_url,
                'format_id': name,
                'quality': quality,
            })

-        _add_format('normal', configUrl, quality=0)
-        hq_url = (configUrl +
-                  ('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
+        _add_format('normal', config_url, quality=0)
+        hq_url = (config_url +
+                  ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
        try:
            _add_format('hq', hq_url, quality=1)
        except ExtractorError:
@ -75,9 +76,10 @@ class EscapistIE(InfoExtractor):
        return {
            'id': video_id,
            'formats': formats,
-            'uploader': showName,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage),
-            'description': videoDesc,
-            'player_url': playerUrl,
+            'description': description,
+            'player_url': player_url,
        }
--- a/youtube_dl/extractor/firstpost.py
+++ b/youtube_dl/extractor/firstpost.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        page = self._download_webpage(url, video_id)
-        title = self._html_search_meta('twitter:title', page, 'title')
+
+        title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
        description = self._html_search_meta('twitter:description', page, 'title')

        data = self._download_xml(
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
                'height': int(details.find('./height').text.strip()),
            } for details in item.findall('./source/file_details') if details.find('./file').text
        ]
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/ntvru.py
+++ b/youtube_dl/extractor/ntvru.py
@ -3,7 +3,9 @@ from __future__ import unicode_literals

 from .common import InfoExtractor
 from ..utils import (
-    unescapeHTML
+    clean_html,
+    xpath_text,
+    int_or_none,
 )


@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://www.ntv.ru/novosti/863142/',
+            'md5': 'ba7ea172a91cb83eb734cad18c10e723',
            'info_dict': {
                'id': '746000',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 136,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
        {
            'url': 'http://www.ntv.ru/video/novosti/750370/',
+            'md5': 'adecff79691b4d71e25220a191477124',
            'info_dict': {
                'id': '750370',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 172,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
        {
            'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+            'md5': '82dbd49b38e3af1d00df16acbeab260c',
            'info_dict': {
                'id': '747480',
-                'ext': 'flv',
-                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
-                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'ext': 'mp4',
+                'title': '«Сегодня». 21 марта 2014 года. 16:00',
+                'description': '«Сегодня». 21 марта 2014 года. 16:00',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 1496,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
        {
            'url': 'http://www.ntv.ru/kino/Koma_film',
+            'md5': 'f825770930937aa7e5aca0dc0d29319a',
            'info_dict': {
-                'id': '758100',
-                'ext': 'flv',
+                'id': '1007609',
+                'ext': 'mp4',
                'title': 'Остросюжетный фильм «Кома»',
                'description': 'Остросюжетный фильм «Кома»',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 5592,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
        {
            'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+            'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
            'info_dict': {
                'id': '751482',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': '«Дело врачей»: «Деревце жизни»',
                'description': '«Дело врачей»: «Деревце жизни»',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 2590,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
    ]

@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        page = self._download_webpage(url, video_id)

-        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
+        webpage = self._download_webpage(url, video_id)

-        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
-        title = unescapeHTML(player.find('./data/title').text)
-        description = unescapeHTML(player.find('./data/description').text)
+        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
+
+        player = self._download_xml(
+            'http://www.ntv.ru/vi%s/' % video_id,
+            video_id, 'Downloading video XML')
+        title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
+        description = clean_html(xpath_text(player, './data/description', 'description'))

        video = player.find('./data/video')
-        video_id = video.find('./id').text
-        thumbnail = video.find('./splash').text
-        duration = int(video.find('./totaltime').text)
-        view_count = int(video.find('./views').text)
-        puid22 = video.find('./puid22').text
+        video_id = xpath_text(video, './id', 'video id')
+        thumbnail = xpath_text(video, './splash', 'thumbnail')
+        duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
+        view_count = int_or_none(xpath_text(video, './views', 'view count'))

-        apps = {
-            '4': 'video1',
-            '7': 'video2',
-        }
-
-        app = apps.get(puid22, apps['4'])
+        token = self._download_webpage(
+            'http://stat.ntv.ru/services/access/token',
+            video_id, 'Downloading access token')

        formats = []
        for format_id in ['', 'hi', 'webm']:
-            file = video.find('./%sfile' % format_id)
-            if file is None:
+            file_ = video.find('./%sfile' % format_id)
+            if file_ is None:
                continue
            size = video.find('./%ssize' % format_id)
            formats.append({
-                'url': 'rtmp://media.ntv.ru/%s' % app,
-                'app': app,
-                'play_path': file.text,
-                'rtmp_conn': 'B:1',
-                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
-                'page_url': 'http://www.ntv.ru',
-                'flash_version': 'LNX 11,2,202,341',
-                'rtmp_live': True,
-                'ext': 'flv',
-                'filesize': int(size.text),
+                'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
+                'filesize': int_or_none(size.text if size is not None else None),
            })
        self._sort_formats(formats)

--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):

        quality = qualities(['sd', 'hd'])
        sources = json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
+            webpage, 'sources')))
        formats = []
-        for container, s in sources.items():
-            for qname, video_url in s.items():
-                formats.append({
-                    'url': video_url,
-                    'container': container,
-                    'format_id': '%s-%s' % (container, qname),
-                    'quality': quality(qname),
-                })
+        for qname, video_url in sources.items():
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'format_id': qname,
+                'quality': quality(qname),
+            })
        self._sort_formats(formats)

        return {
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@ -30,6 +30,11 @@ class TeamcocoIE(InfoExtractor):
            }
        }
    ]
+    _VIDEO_ID_REGEXES = (
+        r'"eVar42"\s*:\s*(\d+)',
+        r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
+        r'"id_not"\s*:\s*(\d+)'
+    )

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -40,8 +45,7 @@ class TeamcocoIE(InfoExtractor):
        video_id = mobj.group("video_id")
        if not video_id:
            video_id = self._html_search_regex(
-                r'<div\s+class="player".*?data-id="(\d+?)"',
-                webpage, 'video id')
+                self._VIDEO_ID_REGEXES, webpage, 'video id')

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
        data = self._download_xml(
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        password_request = compat_urllib_request.Request(pass_url + '/password', data)
        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        password_request.add_header('Cookie', 'xsrft=%s' % token)
-        self._download_webpage(password_request, video_id,
-                               'Verifying the password',
-                               'Wrong password')
+        return self._download_webpage(
+            password_request, video_id,
+            'Verifying the password', 'Wrong password')

    def _verify_player_video_password(self, url, video_id):
        password = self._downloader.params.get('videopassword', None)
@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')

-            if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
+            if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
                self._verify_video_password(url, video_id, webpage)
                return self._real_extract(url)
            else:
@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor):
    def _extract_list_title(self, webpage):
        return self._html_search_regex(self._TITLE_RE, webpage, 'list title')

+    def _login_list_password(self, page_url, list_id, webpage):
+        login_form = self._search_regex(
+            r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
+            webpage, 'login form', default=None)
+        if not login_form:
+            return webpage
+
+        password = self._downloader.params.get('videopassword', None)
+        if password is None:
+            raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            value="([^"]*)"
+            ''', login_form))
+        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+        fields['token'] = token
+        fields['password'] = password
+        post = compat_urllib_parse.urlencode(fields)
+        password_path = self._search_regex(
+            r'action="([^"]+)"', login_form, 'password URL')
+        password_url = compat_urlparse.urljoin(page_url, password_path)
+        password_request = compat_urllib_request.Request(password_url, post)
+        password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
+        self._set_cookie('vimeo.com', 'xsrft', token)
+
+        return self._download_webpage(
+            password_request, list_id,
+            'Verifying the password', 'Wrong password')
+
    def _extract_videos(self, list_id, base_url):
        video_ids = []
        for pagenum in itertools.count(1):
+            page_url = self._page_url(base_url, pagenum)
            webpage = self._download_webpage(
-                self._page_url(base_url, pagenum), list_id,
+                page_url, list_id,
                'Downloading page %s' % pagenum)
+
+            if pagenum == 1:
+                webpage = self._login_list_password(page_url, list_id, webpage)
+
            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                break
@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE):
            'title': 'Staff Favorites: November 2013',
        },
        'playlist_mincount': 13,
+    }, {
+        'note': 'Password-protected album',
+        'url': 'https://vimeo.com/album/3253534',
+        'info_dict': {
+            'title': 'test',
+            'id': '3253534',
+        },
+        'playlist_count': 1,
+        'params': {
+            'videopassword': 'youtube-dl',
+        }
    }]

    def _page_url(self, base_url, pagenum):
        return '%s/page:%d/' % (base_url, pagenum)

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        album_id = mobj.group('id')
+        album_id = self._match_id(url)
        return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)


--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -25,6 +25,7 @@ from ..compat import (
 from ..utils import (
    clean_html,
    ExtractorError,
+    float_or_none,
    get_element_by_attribute,
    get_element_by_id,
    int_or_none,
@ -1124,6 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            'view_count': view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
+            'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
            'formats': formats,
        }

--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None):
        action='store_const', dest='extract_flat', const='in_playlist',
        default=False,
        help='Do not extract the videos of a playlist, only list them.')
+    general.add_option(
+        '--no-color', '--no-colors',
+        action='store_true', dest='no_color',
+        default=False,
+        help='Do not emit color codes in output.')

    network = optparse.OptionGroup(parser, 'Network Options')
    network.add_option(
@ -552,7 +557,7 @@ def parseOpts(overrideArguments=None):
        action='store_true', dest='youtube_print_sig_code', default=False,
        help=optparse.SUPPRESS_HELP)
    verbosity.add_option(
-        '--print-traffic',
+        '--print-traffic', '--dump-headers',
        dest='debug_printtraffic', action='store_true', default=False,
        help='Display sent and read HTTP traffic')
    verbosity.add_option(
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.10.2'
+__version__ = '2015.02.11'
Author	SHA1	Message	Date
Philipp Hagemeister	5277f09dfc	release 2015.02.11	2015-02-11 19:02:39 +01:00
Jaime Marquínez Ferrándiz	2d30521ab9	[youtube] Extract average rating (closes #2362 )	2015-02-11 18:39:31 +01:00
Jaime Marquínez Ferrándiz	050fa43561	flake8: Ignore some error added in pep8 1.6 * E402: we exectute code between imports, like modifying 'sys.path' in the tests * E731: we assign to lambdas in a lot of places, we may want to consider defining functions in a single line instead (what pep8 recommends)	2015-02-11 18:15:15 +01:00
Jaime Marquínez Ferrándiz	f36f92f4da	[aes] style: Put __all__ variable at the end of the file	2015-02-11 18:15:15 +01:00
Sergey M․	124f3bc67d	[dotsub] Fix extraction and modernize	2015-02-11 22:33:03 +06:00
Jaime Marquínez Ferrándiz	d304209a85	[test/parameters.json] Set 'fixup' to 'never' The fixed audio files for Youtube have a size lower that the minimum required.	2015-02-11 17:25:04 +01:00
Sergey M․	c56d7d899d	[dctptv] Skip rtmp download	2015-02-11 22:10:33 +06:00
Naglis Jonaitis	ea5db8469e	[canalplus] Add support for itele.fr URLs (Closes #4931 )	2015-02-11 16:21:52 +02:00
Naglis Jonaitis	3811c567e7	[teamcoco] Fix video id extraction	2015-02-11 15:47:19 +02:00
Sergey M․	054fe3cc40	[ntvru] Adapt to new direct delivery and modernize (Closes #4918 )	2015-02-10 21:35:34 +06:00
Philipp Hagemeister	af0d11f244	release 2015.02.10.5	2015-02-10 15:56:04 +01:00
Philipp Hagemeister	9650885be9	[escapist] Filter video differently (Fixes #4919 )	2015-02-10 15:55:51 +01:00
Philipp Hagemeister	596ac6e31f	[escapist] Modernize	2015-02-10 15:45:36 +01:00
Philipp Hagemeister	612ee37365	release 2015.02.10.4	2015-02-10 11:28:34 +01:00
Philipp Hagemeister	442c37b7a9	[YoutubeDL] Do not perform filter matching on partial results (Fixes #4921 )	2015-02-10 11:28:28 +01:00
Philipp Hagemeister	04bbe41330	release 2015.02.10.3	2015-02-10 05:42:47 +01:00
Philipp Hagemeister	8f84f57183	[ccc] Add new extractor (Fixes #4890 )	2015-02-10 05:42:41 +01:00
Philipp Hagemeister	6a78740211	[test/test_youtube_signature] Use fake YDL	2015-02-10 05:28:59 +01:00
Philipp Hagemeister	c0e1a415fd	[firstpost] Modernize	2015-02-10 05:28:48 +01:00
Philipp Hagemeister	bf8f082a90	[vimeo:album] Add support for album passwords (Fixes #4917 )	2015-02-10 04:53:21 +01:00
Philipp Hagemeister	2f543a2142	[options] Add alias --dump-header for --print-traffic	2015-02-10 04:52:33 +01:00
Philipp Hagemeister	7e5db8c930	[options] Add --no-color	2015-02-10 04:22:10 +01:00
Philipp Hagemeister	f7a211dcc8	[pornhd] Fix extraction (fixes #4915 )	2015-02-10 03:41:31 +01:00