release 2015.02.10.3

[ccc] Add new extractor (Fixes #4890 )
[test/test_youtube_signature] Use fake YDL
2015-02-10 05:42:47 +01:00 · 2015-02-10 05:42:41 +01:00 · 2015-02-10 05:28:59 +01:00 · 2015-02-10 05:28:48 +01:00 · 2015-02-10 04:53:21 +01:00 · 2015-02-10 04:52:33 +01:00
13 changed files with 183 additions and 29 deletions
--- a/README.md
+++ b/README.md
@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     on Windows)
    --flat-playlist                  Do not extract the videos of a playlist,
                                     only list them.
    --no-color                       Do not emit color codes in output.
 ## Network Options:
    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -225,6 +225,7 @@
 - **mailru**: Видео@Mail.Ru
 - **Malemotion**
 - **MDR**
 - **media.ccc.de**
 - **metacafe**
 - **Metacritic**
 - **Mgoon**
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -8,11 +8,11 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import io
 import re
 import string
 from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.compat import compat_str, compat_urlretrieve
@ -88,7 +88,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
        if not os.path.exists(fn):
            compat_urlretrieve(url, fn)
-        ie = YoutubeIE()
+        ydl = FakeYDL()
        ie = YoutubeIE(ydl)
        if stype == 'js':
            with io.open(fn, encoding='utf-8') as testf:
                jscode = testf.read()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -233,6 +233,7 @@ class YoutubeDL(object):
                       If it returns a message, the video is ignored.
                       If it returns None, the video is downloaded.
                       match_filter_func in utils.py is one example for this.
    no_color:          Do not emit color codes in output.
    The following parameters are not used by YoutubeDL itself, they are used by
@ -490,7 +491,7 @@ class YoutubeDL(object):
        else:
            if self.params.get('no_warnings'):
                return
-            if self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
                _msg_header = '\033[0;33mWARNING:\033[0m'
            else:
                _msg_header = 'WARNING:'
@ -502,7 +503,7 @@ class YoutubeDL(object):
        Do the same as trouble, but prefixes the message with 'ERROR:', colored
        in red if stderr is a tty file.
        '''
-        if self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
            _msg_header = '\033[0;31mERROR:\033[0m'
        else:
            _msg_header = 'ERROR:'
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -349,6 +349,7 @@ def _real_main(argv=None):
        'playlist_items': opts.playlist_items,
        'xattr_set_filesize': opts.xattr_set_filesize,
        'match_filter': match_filter,
        'no_color': opts.no_color,
    }
    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -54,6 +54,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
 from .ccc import CCCIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@ -0,0 +1,99 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    qualities,
    unified_strdate,
 )
 class CCCIE(InfoExtractor):
    IE_NAME = 'media.ccc.de'
    _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
    _TEST = {
        'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
        'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
        'info_dict': {
            'id': '20131228183',
            'ext': 'mp4',
            'title': 'Introduction to Processor Design',
            'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
            'thumbnail': 're:^https?://.*\.jpg$',
            'view_count': int,
            'upload_date': '20131229',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        if self._downloader.params.get('prefer_free_formats'):
            preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
        else:
            preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
        title = self._html_search_regex(
            r'(?s)<h1>(.*?)</h1>', webpage, 'title')
        description = self._html_search_regex(
            r"(?s)<p class='description'>(.*?)</p>",
            webpage, 'description', fatal=False)
        upload_date = unified_strdate(self._html_search_regex(
            r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
            webpage, 'upload date', fatal=False))
        view_count = int_or_none(self._html_search_regex(
            r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
            webpage, 'view count', fatal=False))
        matches = re.finditer(r'''(?xs)
            <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
            <a\s+href='(?P<http_url>[^']+)'>\s*
            (?:
                .*?
                <a\s+href='(?P<torrent_url>[^']+\.torrent)'
            )?''', webpage)
        formats = []
        for m in matches:
            format = m.group('format')
            format_id = self._search_regex(
                r'.*/([a-z0-9_-]+)/[^/]*$',
                m.group('http_url'), 'format id', default=None)
            vcodec = 'h264' if 'h264' in format_id else (
                'none' if format_id in ('mp3', 'opus') else None
            )
            formats.append({
                'format_id': format_id,
                'format': format,
                'url': m.group('http_url'),
                'vcodec': vcodec,
                'preference': preference(format_id),
            })
            if m.group('torrent_url'):
                formats.append({
                    'format_id': 'torrent-%s' % (format if format_id is None else format_id),
                    'format': '%s (torrent)' % format,
                    'proto': 'torrent',
                    'format_note': '(unsupported; will just download the .torrent file)',
                    'vcodec': vcodec,
                    'preference': -100 + preference(format_id),
                    'url': m.group('torrent_url'),
                })
        self._sort_formats(formats)
        thumbnail = self._html_search_regex(
            r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'view_count': view_count,
            'upload_date': upload_date,
            'formats': formats,
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -514,7 +514,7 @@ class InfoExtractor(object):
                if mobj:
                    break
-        if os.name != 'nt' and sys.stderr.isatty():
+        if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
            _name = '\033[0;34m%s\033[0m' % name
        else:
            _name = name
--- a/youtube_dl/extractor/firstpost.py
+++ b/youtube_dl/extractor/firstpost.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id)
-        title = self._html_search_meta('twitter:title', page, 'title')
+
        title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
        description = self._html_search_meta('twitter:description', page, 'title')
        data = self._download_xml(
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
                'height': int(details.find('./height').text.strip()),
            } for details in item.findall('./source/file_details') if details.find('./file').text
        ]
        self._sort_formats(formats)
        return {
            'id': video_id,
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):
        quality = qualities(['sd', 'hd'])
        sources = json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
            webpage, 'sources')))
        formats = []
-        for container, s in sources.items():
+        for qname, video_url in sources.items():
-            for qname, video_url in s.items():
+            if not video_url:
-                formats.append({
+                continue
-                    'url': video_url,
+            formats.append({
-                    'container': container,
+                'url': video_url,
-                    'format_id': '%s-%s' % (container, qname),
+                'format_id': qname,
-                    'quality': quality(qname),
+                'quality': quality(qname),
-                })
+            })
        self._sort_formats(formats)
        return {
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        password_request = compat_urllib_request.Request(pass_url + '/password', data)
        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        password_request.add_header('Cookie', 'xsrft=%s' % token)
-        self._download_webpage(password_request, video_id,
+        return self._download_webpage(
-                               'Verifying the password',
+            password_request, video_id,
-                               'Wrong password')
+            'Verifying the password', 'Wrong password')
    def _verify_player_video_password(self, url, video_id):
        password = self._downloader.params.get('videopassword', None)
@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
-            if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
+            if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
                self._verify_video_password(url, video_id, webpage)
                return self._real_extract(url)
            else:
@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor):
    def _extract_list_title(self, webpage):
        return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
    def _login_list_password(self, page_url, list_id, webpage):
        login_form = self._search_regex(
            r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
            webpage, 'login form', default=None)
        if not login_form:
            return webpage
        password = self._downloader.params.get('videopassword', None)
        if password is None:
            raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
        fields = dict(re.findall(r'''(?x)<input\s+
            type="hidden"\s+
            name="([^"]+)"\s+
            value="([^"]*)"
            ''', login_form))
        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
        fields['token'] = token
        fields['password'] = password
        post = compat_urllib_parse.urlencode(fields)
        password_path = self._search_regex(
            r'action="([^"]+)"', login_form, 'password URL')
        password_url = compat_urlparse.urljoin(page_url, password_path)
        password_request = compat_urllib_request.Request(password_url, post)
        password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
        self._set_cookie('vimeo.com', 'xsrft', token)
        return self._download_webpage(
            password_request, list_id,
            'Verifying the password', 'Wrong password')
    def _extract_videos(self, list_id, base_url):
        video_ids = []
        for pagenum in itertools.count(1):
            page_url = self._page_url(base_url, pagenum)
            webpage = self._download_webpage(
-                self._page_url(base_url, pagenum), list_id,
+                page_url, list_id,
                'Downloading page %s' % pagenum)
            if pagenum == 1:
                webpage = self._login_list_password(page_url, list_id, webpage)
            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                break
@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE):
            'title': 'Staff Favorites: November 2013',
        },
        'playlist_mincount': 13,
    }, {
        'note': 'Password-protected album',
        'url': 'https://vimeo.com/album/3253534',
        'info_dict': {
            'title': 'test',
            'id': '3253534',
        },
        'playlist_count': 1,
        'params': {
            'videopassword': 'youtube-dl',
        }
    }]
    def _page_url(self, base_url, pagenum):
        return '%s/page:%d/' % (base_url, pagenum)
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        album_id = self._match_id(url)
        album_id = mobj.group('id')
        return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None):
        action='store_const', dest='extract_flat', const='in_playlist',
        default=False,
        help='Do not extract the videos of a playlist, only list them.')
    general.add_option(
        '--no-color', '--no-colors',
        action='store_true', dest='no_color',
        default=False,
        help='Do not emit color codes in output.')
    network = optparse.OptionGroup(parser, 'Network Options')
    network.add_option(
@ -552,7 +557,7 @@ def parseOpts(overrideArguments=None):
        action='store_true', dest='youtube_print_sig_code', default=False,
        help=optparse.SUPPRESS_HELP)
    verbosity.add_option(
-        '--print-traffic',
+        '--print-traffic', '--dump-headers',
        dest='debug_printtraffic', action='store_true', default=False,
        help='Display sent and read HTTP traffic')
    verbosity.add_option(
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2015.02.10.2'
+__version__ = '2015.02.10.3'
Author	SHA1	Message	Date
Philipp Hagemeister	04bbe41330	release 2015.02.10.3	2015-02-10 05:42:47 +01:00
Philipp Hagemeister	8f84f57183	[ccc] Add new extractor (Fixes #4890 )	2015-02-10 05:42:41 +01:00
Philipp Hagemeister	6a78740211	[test/test_youtube_signature] Use fake YDL	2015-02-10 05:28:59 +01:00
Philipp Hagemeister	c0e1a415fd	[firstpost] Modernize	2015-02-10 05:28:48 +01:00
Philipp Hagemeister	bf8f082a90	[vimeo:album] Add support for album passwords (Fixes #4917 )	2015-02-10 04:53:21 +01:00
Philipp Hagemeister	2f543a2142	[options] Add alias --dump-header for --print-traffic	2015-02-10 04:52:33 +01:00
Philipp Hagemeister	7e5db8c930	[options] Add --no-color	2015-02-10 04:22:10 +01:00
Philipp Hagemeister	f7a211dcc8	[pornhd] Fix extraction (fixes #4915 )	2015-02-10 03:41:31 +01:00
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2015.02.10.2'`	`__version__ = '2015.02.10.3'`