release 2015.02.11

[youtube] Extract average rating (closes #2362 )
flake8: Ignore some error added in pep8 1.6
2015-02-11 19:02:39 +01:00 · 2015-02-11 18:39:31 +01:00 · 2015-02-11 18:15:15 +01:00 · 2015-02-11 18:15:15 +01:00 · 2015-02-11 22:33:03 +06:00 · 2015-02-11 17:25:04 +01:00
25 changed files with 478 additions and 143 deletions
--- a/README.md
+++ b/README.md
@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     on Windows)
    --flat-playlist                  Do not extract the videos of a playlist,
                                     only list them.
    --no-color                       Do not emit color codes in output.
 ## Network Options:
    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
@ -119,6 +120,23 @@ which means you can modify it, redistribute it or use it however you like.
                                     COUNT views
    --max-views COUNT                Do not download any videos with more than
                                     COUNT views
    --match-filter FILTER            (Experimental) Generic video filter.
                                     Specify any key (see help for -o for a list
                                     of available keys) to match if the key is
                                     present, !key to check if the key is not
                                     present,key > NUMBER (like "comment_count >
                                     12", also works with >=, <, <=, !=, =) to
                                     compare against a number, and & to require
                                     multiple matches. Values which are not
                                     known are excluded unless you put a
                                     question mark (?) after the operator.For
                                     example, to only match videos that have
                                     been liked more than 100 times and disliked
                                     less than 50 times (or the dislike
                                     functionality is not available at the given
                                     service), but who also have a description,
                                     use  --match-filter "like_count > 100 &
                                     dislike_count <? 50 & description" .
    --no-playlist                    If the URL refers to a video and a
                                     playlist, download only the video.
    --age-limit YEARS                download only videos suitable for the given
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -225,6 +225,7 @@
 - **mailru**: Видео@Mail.Ru
 - **Malemotion**
 - **MDR**
 - **media.ccc.de**
 - **metacafe**
 - **Metacritic**
 - **Mgoon**
--- a/setup.cfg
+++ b/setup.cfg
@ -3,4 +3,4 @@ universal = True
 [flake8]
 exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
-ignore = E501
+ignore = E402,E501,E731
--- a/test/parameters.json
+++ b/test/parameters.json
@ -39,5 +39,6 @@
    "writesubtitles": false,
    "allsubtitles": false,
    "listssubtitles": false,
-    "socket_timeout": 20
+    "socket_timeout": 20,
    "fixup": "never"
 }
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -53,6 +53,7 @@ from youtube_dl.utils import (
    version_tuple,
    xpath_with_ns,
    render_table,
    match_str,
 )
@ -459,6 +460,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
            '123  4\n'
            '9999 51')
    def test_match_str(self):
        self.assertRaises(ValueError, match_str, 'xy>foobar', {})
        self.assertFalse(match_str('xy', {'x': 1200}))
        self.assertTrue(match_str('!xy', {'x': 1200}))
        self.assertTrue(match_str('x', {'x': 1200}))
        self.assertFalse(match_str('!x', {'x': 1200}))
        self.assertTrue(match_str('x', {'x': 0}))
        self.assertFalse(match_str('x>0', {'x': 0}))
        self.assertFalse(match_str('x>0', {}))
        self.assertTrue(match_str('x>?0', {}))
        self.assertTrue(match_str('x>1K', {'x': 1200}))
        self.assertFalse(match_str('x>2K', {'x': 1200}))
        self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
        self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
        self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
        self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
        self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
        self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
        self.assertFalse(match_str(
            'like_count > 100 & dislike_count <? 50 & description',
            {'like_count': 90, 'description': 'foo'}))
        self.assertTrue(match_str(
            'like_count > 100 & dislike_count <? 50 & description',
            {'like_count': 190, 'description': 'foo'}))
        self.assertFalse(match_str(
            'like_count > 100 & dislike_count <? 50 & description',
            {'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
        self.assertFalse(match_str(
            'like_count > 100 & dislike_count <? 50 & description',
            {'like_count': 190, 'dislike_count': 10}))
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -8,11 +8,11 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import io
 import re
 import string
 from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.compat import compat_str, compat_urlretrieve
@ -88,7 +88,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
        if not os.path.exists(fn):
            compat_urlretrieve(url, fn)
-        ie = YoutubeIE()
+        ydl = FakeYDL()
        ie = YoutubeIE(ydl)
        if stype == 'js':
            with io.open(fn, encoding='utf-8') as testf:
                jscode = testf.read()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -228,6 +228,12 @@ class YoutubeDL(object):
    external_downloader:  Executable of the external downloader to call.
    listformats:       Print an overview of available video formats and exit.
    list_thumbnails:   Print a table of all thumbnails and exit.
    match_filter:      A function that gets called with the info_dict of
                       every video.
                       If it returns a message, the video is ignored.
                       If it returns None, the video is downloaded.
                       match_filter_func in utils.py is one example for this.
    no_color:          Do not emit color codes in output.
    The following parameters are not used by YoutubeDL itself, they are used by
@ -485,7 +491,7 @@ class YoutubeDL(object):
        else:
            if self.params.get('no_warnings'):
                return
-            if self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
                _msg_header = '\033[0;33mWARNING:\033[0m'
            else:
                _msg_header = 'WARNING:'
@ -497,7 +503,7 @@ class YoutubeDL(object):
        Do the same as trouble, but prefixes the message with 'ERROR:', colored
        in red if stderr is a tty file.
        '''
-        if self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
            _msg_header = '\033[0;31mERROR:\033[0m'
        else:
            _msg_header = 'ERROR:'
@ -554,7 +560,7 @@ class YoutubeDL(object):
            self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
            return None
-    def _match_entry(self, info_dict):
+    def _match_entry(self, info_dict, incomplete):
        """ Returns None iff the file should be downloaded """
        video_title = info_dict.get('title', info_dict.get('id', 'video'))
@ -583,9 +589,17 @@ class YoutubeDL(object):
            if max_views is not None and view_count > max_views:
                return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
        if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
-            return 'Skipping "%s" because it is age restricted' % title
+            return 'Skipping "%s" because it is age restricted' % video_title
        if self.in_download_archive(info_dict):
            return '%s has already been recorded in archive' % video_title
        if not incomplete:
            match_filter = self.params.get('match_filter')
            if match_filter is not None:
                ret = match_filter(info_dict)
                if ret is not None:
                    return ret
        return None
    @staticmethod
@ -779,7 +793,7 @@ class YoutubeDL(object):
                    'extractor_key': ie_result['extractor_key'],
                }
-                reason = self._match_entry(entry)
+                reason = self._match_entry(entry, incomplete=True)
                if reason is not None:
                    self.to_screen('[download] ' + reason)
                    continue
@ -1153,7 +1167,7 @@ class YoutubeDL(object):
        if 'format' not in info_dict:
            info_dict['format'] = info_dict['ext']
-        reason = self._match_entry(info_dict)
+        reason = self._match_entry(info_dict, incomplete=False)
        if reason is not None:
            self.to_screen('[download] ' + reason)
            return
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -23,9 +23,10 @@ from .compat import (
 )
 from .utils import (
    DateRange,
    DEFAULT_OUTTMPL,
    decodeOption,
    DEFAULT_OUTTMPL,
    DownloadError,
    match_filter_func,
    MaxDownloadsReached,
    preferredencoding,
    read_batch_urls,
@ -247,6 +248,9 @@ def _real_main(argv=None):
            xattr  # Confuse flake8
        except ImportError:
            parser.error('setting filesize xattr requested but python-xattr is not available')
    match_filter = (
        None if opts.match_filter is None
        else match_filter_func(opts.match_filter))
    ydl_opts = {
        'usenetrc': opts.usenetrc,
@ -344,6 +348,8 @@ def _real_main(argv=None):
        'list_thumbnails': opts.list_thumbnails,
        'playlist_items': opts.playlist_items,
        'xattr_set_filesize': opts.xattr_set_filesize,
        'match_filter': match_filter,
        'no_color': opts.no_color,
    }
    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
 import base64
 from math import ceil
@ -329,3 +327,5 @@ def inc(data):
            data[i] = data[i] + 1
            break
    return data
 __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -54,6 +54,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
 from .ccc import CCCIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@ -15,12 +15,13 @@ from ..utils import (
 class CanalplusIE(InfoExtractor):
    IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
-    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
+    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
    _SITE_ID_MAP = {
        'canalplus.fr': 'cplus',
        'piwiplus.fr': 'teletoon',
        'd8.tv': 'd8',
        'itele.fr': 'itele',
    }
    _TESTS = [{
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
            'upload_date': '20131108',
        },
        'skip': 'videos get deleted after a while',
    }, {
        'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
        'md5': '65aa83ad62fe107ce29e564bb8712580',
        'info_dict': {
            'id': '1213714',
            'ext': 'flv',
            'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
            'description': 'md5:8216206ec53426ea6321321f3b3c16db',
            'upload_date': '20150211',
        },
    }]
    def _real_extract(self, url):
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@ -0,0 +1,99 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    qualities,
    unified_strdate,
 )
 class CCCIE(InfoExtractor):
    IE_NAME = 'media.ccc.de'
    _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
    _TEST = {
        'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
        'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
        'info_dict': {
            'id': '20131228183',
            'ext': 'mp4',
            'title': 'Introduction to Processor Design',
            'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
            'thumbnail': 're:^https?://.*\.jpg$',
            'view_count': int,
            'upload_date': '20131229',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        if self._downloader.params.get('prefer_free_formats'):
            preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
        else:
            preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
        title = self._html_search_regex(
            r'(?s)<h1>(.*?)</h1>', webpage, 'title')
        description = self._html_search_regex(
            r"(?s)<p class='description'>(.*?)</p>",
            webpage, 'description', fatal=False)
        upload_date = unified_strdate(self._html_search_regex(
            r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
            webpage, 'upload date', fatal=False))
        view_count = int_or_none(self._html_search_regex(
            r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
            webpage, 'view count', fatal=False))
        matches = re.finditer(r'''(?xs)
            <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
            <a\s+href='(?P<http_url>[^']+)'>\s*
            (?:
                .*?
                <a\s+href='(?P<torrent_url>[^']+\.torrent)'
            )?''', webpage)
        formats = []
        for m in matches:
            format = m.group('format')
            format_id = self._search_regex(
                r'.*/([a-z0-9_-]+)/[^/]*$',
                m.group('http_url'), 'format id', default=None)
            vcodec = 'h264' if 'h264' in format_id else (
                'none' if format_id in ('mp3', 'opus') else None
            )
            formats.append({
                'format_id': format_id,
                'format': format,
                'url': m.group('http_url'),
                'vcodec': vcodec,
                'preference': preference(format_id),
            })
            if m.group('torrent_url'):
                formats.append({
                    'format_id': 'torrent-%s' % (format if format_id is None else format_id),
                    'format': '%s (torrent)' % format,
                    'proto': 'torrent',
                    'format_note': '(unsupported; will just download the .torrent file)',
                    'vcodec': vcodec,
                    'preference': -100 + preference(format_id),
                    'url': m.group('torrent_url'),
                })
        self._sort_formats(formats)
        thumbnail = self._html_search_regex(
            r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'view_count': view_count,
            'upload_date': upload_date,
            'formats': formats,
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -157,6 +157,7 @@ class InfoExtractor(object):
    view_count:     How many users have watched the video on the platform.
    like_count:     Number of positive ratings of the video
    dislike_count:  Number of negative ratings of the video
    average_rating: Average rating give by users, the scale used depends on the webpage
    comment_count:  Number of comments on the video
    comments:       A list of comments, each with one or more of the following
                    properties (all but one of text or html optional):
@ -271,7 +272,7 @@ class InfoExtractor(object):
            raise
        except compat_http_client.IncompleteRead as e:
            raise ExtractorError('A network error has occured.', cause=e, expected=True)
-        except (KeyError,) as e:
+        except (KeyError, StopIteration) as e:
            raise ExtractorError('An extractor error has occured.', cause=e)
    def set_downloader(self, downloader):
@ -514,7 +515,7 @@ class InfoExtractor(object):
                if mobj:
                    break
-        if os.name != 'nt' and sys.stderr.isatty():
+        if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
            _name = '\033[0;34m%s\033[0m' % name
        else:
            _name = name
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
            'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
            'ext': 'flv',
            'title': 'Videoinstallation für eine Kaufhausfassade'
        },
        'params': {
            # rtmp download
            'skip_download': True,
        }
    }
--- a/youtube_dl/extractor/dotsub.py
+++ b/youtube_dl/extractor/dotsub.py
@ -1,13 +1,14 @@
 from __future__ import unicode_literals
 import re
 import time
 from .common import InfoExtractor
 from ..utils import (
    float_or_none,
    int_or_none,
 )
 class DotsubIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
    _TEST = {
        'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
        'md5': '0914d4d69605090f623b7ac329fea66e',
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
            'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
            'ext': 'flv',
            'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
            'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
            'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
            'duration': 3169,
            'uploader': '4v4l0n42',
-            'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism  and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
+            'timestamp': 1292248482.625,
            'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
            'upload_date': '20101213',
            'view_count': int,
        }
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
-        video_id = mobj.group('id')
+
-        info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
+        info = self._download_json(
-        info = self._download_json(info_url, video_id)
+            'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
-        date = time.gmtime(info['dateCreated'] / 1000)  # The timestamp is in miliseconds
+        video_url = info.get('mediaURI')
        if not video_url:
            webpage = self._download_webpage(url, video_id)
            video_url = self._search_regex(
                r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
        return {
            'id': video_id,
-            'url': info['mediaURI'],
+            'url': video_url,
            'ext': 'flv',
            'title': info['title'],
-            'thumbnail': info['screenshotURI'],
+            'description': info.get('description'),
-            'description': info['description'],
+            'thumbnail': info.get('screenshotURI'),
-            'uploader': info['user'],
+            'duration': int_or_none(info.get('duration'), 1000),
-            'view_count': info['numberOfViews'],
+            'uploader': info.get('user'),
-            'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
+            'timestamp': float_or_none(info.get('dateCreated'), 1000),
            'view_count': int_or_none(info.get('numberOfViews')),
        }
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@ -1,18 +1,17 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
 )
 from ..utils import (
    ExtractorError,
    js_to_json,
 )
 class EscapistIE(InfoExtractor):
-    _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
+    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
    _TEST = {
        'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
        'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -20,31 +19,30 @@ class EscapistIE(InfoExtractor):
            'id': '6618',
            'ext': 'mp4',
            'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
-            'uploader': 'the-escapist-presents',
+            'uploader_id': 'the-escapist-presents',
            'uploader': 'The Escapist Presents',
            'title': "Breaking Down Baldur's Gate",
        }
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
        showName = mobj.group('showname')
        video_id = mobj.group('id')
        self.report_extraction(video_id)
        webpage = self._download_webpage(url, video_id)
-        videoDesc = self._html_search_regex(
+        uploader_id = self._html_search_regex(
-            r'<meta name="description" content="([^"]*)"',
+            r"<h1 class='headline'><a href='/videos/view/(.*?)'",
-            webpage, 'description', fatal=False)
+            webpage, 'uploader ID', fatal=False)
        uploader = self._html_search_regex(
            r"<h1 class='headline'>(.*?)</a>",
            webpage, 'uploader', fatal=False)
        description = self._html_search_meta('description', webpage)
-        playerUrl = self._og_search_video_url(webpage, name='player URL')
+        raw_title = self._html_search_meta('title', webpage, fatal=True)
        title = raw_title.partition(' : ')[2]
-        title = self._html_search_regex(
+        player_url = self._og_search_video_url(webpage, name='player URL')
-            r'<meta name="title" content="([^"]*)"',
+        config_url = compat_urllib_parse.unquote(self._search_regex(
-            webpage, 'title').split(' : ')[-1]
+            r'config=(.*)$', player_url, 'config URL'))
        configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
        configUrl = compat_urllib_parse.unquote(configUrl)
        formats = []
@ -53,18 +51,21 @@ class EscapistIE(InfoExtractor):
                cfgurl, video_id,
                'Downloading ' + name + ' configuration',
                'Unable to download ' + name + ' configuration',
-                transform_source=lambda s: s.replace("'", '"'))
+                transform_source=js_to_json)
            playlist = config['playlist']
            video_url = next(
                p['url'] for p in playlist
                if p.get('eventCategory') == 'Video')
            formats.append({
-                'url': playlist[1]['url'],
+                'url': video_url,
                'format_id': name,
                'quality': quality,
            })
-        _add_format('normal', configUrl, quality=0)
+        _add_format('normal', config_url, quality=0)
-        hq_url = (configUrl +
+        hq_url = (config_url +
-                  ('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
+                  ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
        try:
            _add_format('hq', hq_url, quality=1)
        except ExtractorError:
@ -75,9 +76,10 @@ class EscapistIE(InfoExtractor):
        return {
            'id': video_id,
            'formats': formats,
-            'uploader': showName,
+            'uploader': uploader,
            'uploader_id': uploader_id,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage),
-            'description': videoDesc,
+            'description': description,
-            'player_url': playerUrl,
+            'player_url': player_url,
        }
--- a/youtube_dl/extractor/firstpost.py
+++ b/youtube_dl/extractor/firstpost.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id)
-        title = self._html_search_meta('twitter:title', page, 'title')
+
        title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
        description = self._html_search_meta('twitter:description', page, 'title')
        data = self._download_xml(
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
                'height': int(details.find('./height').text.strip()),
            } for details in item.findall('./source/file_details') if details.find('./file').text
        ]
        self._sort_formats(formats)
        return {
            'id': video_id,
--- a/youtube_dl/extractor/ntvru.py
+++ b/youtube_dl/extractor/ntvru.py
@ -3,7 +3,9 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
-    unescapeHTML
+    clean_html,
    xpath_text,
    int_or_none,
 )
@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://www.ntv.ru/novosti/863142/',
            'md5': 'ba7ea172a91cb83eb734cad18c10e723',
            'info_dict': {
                'id': '746000',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'thumbnail': 're:^http://.*\.jpg',
                'duration': 136,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.ntv.ru/video/novosti/750370/',
            'md5': 'adecff79691b4d71e25220a191477124',
            'info_dict': {
                'id': '750370',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'thumbnail': 're:^http://.*\.jpg',
                'duration': 172,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
            'md5': '82dbd49b38e3af1d00df16acbeab260c',
            'info_dict': {
                'id': '747480',
-                'ext': 'flv',
+                'ext': 'mp4',
-                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'title': '«Сегодня». 21 марта 2014 года. 16:00',
-                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'description': '«Сегодня». 21 марта 2014 года. 16:00',
                'thumbnail': 're:^http://.*\.jpg',
                'duration': 1496,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.ntv.ru/kino/Koma_film',
            'md5': 'f825770930937aa7e5aca0dc0d29319a',
            'info_dict': {
-                'id': '758100',
+                'id': '1007609',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Остросюжетный фильм «Кома»',
                'description': 'Остросюжетный фильм «Кома»',
                'thumbnail': 're:^http://.*\.jpg',
                'duration': 5592,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
            'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
            'info_dict': {
                'id': '751482',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': '«Дело врачей»: «Деревце жизни»',
                'description': '«Дело врачей»: «Деревце жизни»',
                'thumbnail': 're:^http://.*\.jpg',
                'duration': 2590,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
    ]
@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        page = self._download_webpage(url, video_id)
-        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
+        webpage = self._download_webpage(url, video_id)
-        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
+        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
-        title = unescapeHTML(player.find('./data/title').text)
+
-        description = unescapeHTML(player.find('./data/description').text)
+        player = self._download_xml(
            'http://www.ntv.ru/vi%s/' % video_id,
            video_id, 'Downloading video XML')
        title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
        description = clean_html(xpath_text(player, './data/description', 'description'))
        video = player.find('./data/video')
-        video_id = video.find('./id').text
+        video_id = xpath_text(video, './id', 'video id')
-        thumbnail = video.find('./splash').text
+        thumbnail = xpath_text(video, './splash', 'thumbnail')
-        duration = int(video.find('./totaltime').text)
+        duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
-        view_count = int(video.find('./views').text)
+        view_count = int_or_none(xpath_text(video, './views', 'view count'))
        puid22 = video.find('./puid22').text
-        apps = {
+        token = self._download_webpage(
-            '4': 'video1',
+            'http://stat.ntv.ru/services/access/token',
-            '7': 'video2',
+            video_id, 'Downloading access token')
        }
        app = apps.get(puid22, apps['4'])
        formats = []
        for format_id in ['', 'hi', 'webm']:
-            file = video.find('./%sfile' % format_id)
+            file_ = video.find('./%sfile' % format_id)
-            if file is None:
+            if file_ is None:
                continue
            size = video.find('./%ssize' % format_id)
            formats.append({
-                'url': 'rtmp://media.ntv.ru/%s' % app,
+                'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
-                'app': app,
+                'filesize': int_or_none(size.text if size is not None else None),
                'play_path': file.text,
                'rtmp_conn': 'B:1',
                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                'page_url': 'http://www.ntv.ru',
                'flash_version': 'LNX 11,2,202,341',
                'rtmp_live': True,
                'ext': 'flv',
                'filesize': int(size.text),
            })
        self._sort_formats(formats)
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):
        quality = qualities(['sd', 'hd'])
        sources = json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
            webpage, 'sources')))
        formats = []
-        for container, s in sources.items():
+        for qname, video_url in sources.items():
-            for qname, video_url in s.items():
+            if not video_url:
-                formats.append({
+                continue
-                    'url': video_url,
+            formats.append({
-                    'container': container,
+                'url': video_url,
-                    'format_id': '%s-%s' % (container, qname),
+                'format_id': qname,
-                    'quality': quality(qname),
+                'quality': quality(qname),
-                })
+            })
        self._sort_formats(formats)
        return {
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@ -30,6 +30,11 @@ class TeamcocoIE(InfoExtractor):
            }
        }
    ]
    _VIDEO_ID_REGEXES = (
        r'"eVar42"\s*:\s*(\d+)',
        r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
        r'"id_not"\s*:\s*(\d+)'
    )
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -40,8 +45,7 @@ class TeamcocoIE(InfoExtractor):
        video_id = mobj.group("video_id")
        if not video_id:
            video_id = self._html_search_regex(
-                r'<div\s+class="player".*?data-id="(\d+?)"',
+                self._VIDEO_ID_REGEXES, webpage, 'video id')
                webpage, 'video id')
        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
        data = self._download_xml(
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        password_request = compat_urllib_request.Request(pass_url + '/password', data)
        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        password_request.add_header('Cookie', 'xsrft=%s' % token)
-        self._download_webpage(password_request, video_id,
+        return self._download_webpage(
-                               'Verifying the password',
+            password_request, video_id,
-                               'Wrong password')
+            'Verifying the password', 'Wrong password')
    def _verify_player_video_password(self, url, video_id):
        password = self._downloader.params.get('videopassword', None)
@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
-            if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
+            if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
                self._verify_video_password(url, video_id, webpage)
                return self._real_extract(url)
            else:
@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor):
    def _extract_list_title(self, webpage):
        return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
    def _login_list_password(self, page_url, list_id, webpage):
        login_form = self._search_regex(
            r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
            webpage, 'login form', default=None)
        if not login_form:
            return webpage
        password = self._downloader.params.get('videopassword', None)
        if password is None:
            raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
        fields = dict(re.findall(r'''(?x)<input\s+
            type="hidden"\s+
            name="([^"]+)"\s+
            value="([^"]*)"
            ''', login_form))
        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
        fields['token'] = token
        fields['password'] = password
        post = compat_urllib_parse.urlencode(fields)
        password_path = self._search_regex(
            r'action="([^"]+)"', login_form, 'password URL')
        password_url = compat_urlparse.urljoin(page_url, password_path)
        password_request = compat_urllib_request.Request(password_url, post)
        password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
        self._set_cookie('vimeo.com', 'xsrft', token)
        return self._download_webpage(
            password_request, list_id,
            'Verifying the password', 'Wrong password')
    def _extract_videos(self, list_id, base_url):
        video_ids = []
        for pagenum in itertools.count(1):
            page_url = self._page_url(base_url, pagenum)
            webpage = self._download_webpage(
-                self._page_url(base_url, pagenum), list_id,
+                page_url, list_id,
                'Downloading page %s' % pagenum)
            if pagenum == 1:
                webpage = self._login_list_password(page_url, list_id, webpage)
            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                break
@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE):
            'title': 'Staff Favorites: November 2013',
        },
        'playlist_mincount': 13,
    }, {
        'note': 'Password-protected album',
        'url': 'https://vimeo.com/album/3253534',
        'info_dict': {
            'title': 'test',
            'id': '3253534',
        },
        'playlist_count': 1,
        'params': {
            'videopassword': 'youtube-dl',
        }
    }]
    def _page_url(self, base_url, pagenum):
        return '%s/page:%d/' % (base_url, pagenum)
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        album_id = self._match_id(url)
        album_id = mobj.group('id')
        return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -25,6 +25,7 @@ from ..compat import (
 from ..utils import (
    clean_html,
    ExtractorError,
    float_or_none,
    get_element_by_attribute,
    get_element_by_id,
    int_or_none,
@ -1124,6 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            'view_count': view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
            'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
            'formats': formats,
        }
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None):
        action='store_const', dest='extract_flat', const='in_playlist',
        default=False,
        help='Do not extract the videos of a playlist, only list them.')
    general.add_option(
        '--no-color', '--no-colors',
        action='store_true', dest='no_color',
        default=False,
        help='Do not emit color codes in output.')
    network = optparse.OptionGroup(parser, 'Network Options')
    network.add_option(
@ -244,6 +249,25 @@ def parseOpts(overrideArguments=None):
        '--max-views',
        metavar='COUNT', dest='max_views', default=None, type=int,
        help='Do not download any videos with more than COUNT views')
    selection.add_option(
        '--match-filter',
        metavar='FILTER', dest='match_filter', default=None,
        help=(
            '(Experimental) Generic video filter. '
            'Specify any key (see help for -o for a list of available keys) to'
            ' match if the key is present, '
            '!key to check if the key is not present,'
            'key > NUMBER (like "comment_count > 12", also works with '
            '>=, <, <=, !=, =) to compare against a number, and '
            '& to require multiple matches. '
            'Values which are not known are excluded unless you'
            ' put a question mark (?) after the operator.'
            'For example, to only match videos that have been liked more than '
            '100 times and disliked less than 50 times (or the dislike '
            'functionality is not available at the given service), but who '
            'also have a description, use  --match-filter '
            '"like_count > 100 & dislike_count <? 50 & description" .'
        ))
    selection.add_option(
        '--no-playlist',
        action='store_true', dest='noplaylist', default=False,
@ -533,7 +557,7 @@ def parseOpts(overrideArguments=None):
        action='store_true', dest='youtube_print_sig_code', default=False,
        help=optparse.SUPPRESS_HELP)
    verbosity.add_option(
-        '--print-traffic',
+        '--print-traffic', '--dump-headers',
        dest='debug_printtraffic', action='store_true', default=False,
        help='Display sent and read HTTP traffic')
    verbosity.add_option(
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -17,6 +17,7 @@ import io
 import json
 import locale
 import math
 import operator
 import os
 import pipes
 import platform
@ -1678,3 +1679,79 @@ def render_table(header_row, data):
    max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
    format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
    return '\n'.join(format_str % tuple(row) for row in table)
 def _match_one(filter_part, dct):
    COMPARISON_OPERATORS = {
        '<': operator.lt,
        '<=': operator.le,
        '>': operator.gt,
        '>=': operator.ge,
        '=': operator.eq,
        '!=': operator.ne,
    }
    operator_rex = re.compile(r'''(?x)\s*
        (?P<key>[a-z_]+)
        \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
        (?:
            (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
            (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
        )
        \s*$
        ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
    m = operator_rex.search(filter_part)
    if m:
        op = COMPARISON_OPERATORS[m.group('op')]
        if m.group('strval') is not None:
            if m.group('op') not in ('=', '!='):
                raise ValueError(
                    'Operator %s does not support string values!' % m.group('op'))
            comparison_value = m.group('strval')
        else:
            try:
                comparison_value = int(m.group('intval'))
            except ValueError:
                comparison_value = parse_filesize(m.group('intval'))
                if comparison_value is None:
                    comparison_value = parse_filesize(m.group('intval') + 'B')
                if comparison_value is None:
                    raise ValueError(
                        'Invalid integer value %r in filter part %r' % (
                            m.group('intval'), filter_part))
        actual_value = dct.get(m.group('key'))
        if actual_value is None:
            return m.group('none_inclusive')
        return op(actual_value, comparison_value)
    UNARY_OPERATORS = {
        '': lambda v: v is not None,
        '!': lambda v: v is None,
    }
    operator_rex = re.compile(r'''(?x)\s*
        (?P<op>%s)\s*(?P<key>[a-z_]+)
        \s*$
        ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
    m = operator_rex.search(filter_part)
    if m:
        op = UNARY_OPERATORS[m.group('op')]
        actual_value = dct.get(m.group('key'))
        return op(actual_value)
    raise ValueError('Invalid filter part %r' % filter_part)
 def match_str(filter_str, dct):
    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
    return all(
        _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
 def match_filter_func(filter_str):
    def _match_func(info_dict):
        if match_str(filter_str, info_dict):
            return None
        else:
            video_title = info_dict.get('title', info_dict.get('id', 'video'))
            return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
    return _match_func
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2015.02.10.1'
+__version__ = '2015.02.11'
Author	SHA1	Message	Date
Philipp Hagemeister	5277f09dfc	release 2015.02.11	2015-02-11 19:02:39 +01:00
Jaime Marquínez Ferrándiz	2d30521ab9	[youtube] Extract average rating (closes #2362 )	2015-02-11 18:39:31 +01:00
Jaime Marquínez Ferrándiz	050fa43561	flake8: Ignore some error added in pep8 1.6 * E402: we exectute code between imports, like modifying 'sys.path' in the tests * E731: we assign to lambdas in a lot of places, we may want to consider defining functions in a single line instead (what pep8 recommends)	2015-02-11 18:15:15 +01:00
Jaime Marquínez Ferrándiz	f36f92f4da	[aes] style: Put __all__ variable at the end of the file	2015-02-11 18:15:15 +01:00
Sergey M․	124f3bc67d	[dotsub] Fix extraction and modernize	2015-02-11 22:33:03 +06:00
Jaime Marquínez Ferrándiz	d304209a85	[test/parameters.json] Set 'fixup' to 'never' The fixed audio files for Youtube have a size lower that the minimum required.	2015-02-11 17:25:04 +01:00
Sergey M․	c56d7d899d	[dctptv] Skip rtmp download	2015-02-11 22:10:33 +06:00
Naglis Jonaitis	ea5db8469e	[canalplus] Add support for itele.fr URLs (Closes #4931 )	2015-02-11 16:21:52 +02:00
Naglis Jonaitis	3811c567e7	[teamcoco] Fix video id extraction	2015-02-11 15:47:19 +02:00
Sergey M․	054fe3cc40	[ntvru] Adapt to new direct delivery and modernize (Closes #4918 )	2015-02-10 21:35:34 +06:00
Philipp Hagemeister	af0d11f244	release 2015.02.10.5	2015-02-10 15:56:04 +01:00
Philipp Hagemeister	9650885be9	[escapist] Filter video differently (Fixes #4919 )	2015-02-10 15:55:51 +01:00
Philipp Hagemeister	596ac6e31f	[escapist] Modernize	2015-02-10 15:45:36 +01:00
Philipp Hagemeister	612ee37365	release 2015.02.10.4	2015-02-10 11:28:34 +01:00
Philipp Hagemeister	442c37b7a9	[YoutubeDL] Do not perform filter matching on partial results (Fixes #4921 )	2015-02-10 11:28:28 +01:00
Philipp Hagemeister	04bbe41330	release 2015.02.10.3	2015-02-10 05:42:47 +01:00
Philipp Hagemeister	8f84f57183	[ccc] Add new extractor (Fixes #4890 )	2015-02-10 05:42:41 +01:00
Philipp Hagemeister	6a78740211	[test/test_youtube_signature] Use fake YDL	2015-02-10 05:28:59 +01:00
Philipp Hagemeister	c0e1a415fd	[firstpost] Modernize	2015-02-10 05:28:48 +01:00
Philipp Hagemeister	bf8f082a90	[vimeo:album] Add support for album passwords (Fixes #4917 )	2015-02-10 04:53:21 +01:00
Philipp Hagemeister	2f543a2142	[options] Add alias --dump-header for --print-traffic	2015-02-10 04:52:33 +01:00
Philipp Hagemeister	7e5db8c930	[options] Add --no-color	2015-02-10 04:22:10 +01:00
Philipp Hagemeister	f7a211dcc8	[pornhd] Fix extraction (fixes #4915 )	2015-02-10 03:41:31 +01:00
Philipp Hagemeister	845734773d	release 2015.02.10.2	2015-02-10 03:32:55 +01:00
Philipp Hagemeister	347de4931c	[YoutubeDL] Add generic video filtering (Fixes #4916 ) This functionality is intended to eventually encompass the current format filtering.	2015-02-10 03:32:24 +01:00
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2015.02.10.1'`	`__version__ = '2015.02.11'`