release 2015.02.10.3

[ccc] Add new extractor (Fixes #4890 )
[test/test_youtube_signature] Use fake YDL
2015-02-10 05:42:47 +01:00 · 2015-02-10 05:42:41 +01:00 · 2015-02-10 05:28:59 +01:00 · 2015-02-10 05:28:48 +01:00 · 2015-02-10 04:53:21 +01:00 · 2015-02-10 04:52:33 +01:00
13 changed files with 183 additions and 29 deletions
--- a/README.md
+++ b/README.md
@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     on Windows)
    --flat-playlist                  Do not extract the videos of a playlist,
                                     only list them.
+    --no-color                       Do not emit color codes in output.

 ## Network Options:
    --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -225,6 +225,7 @@
 - **mailru**: Видео@Mail.Ru
 - **Malemotion**
 - **MDR**
+ - **media.ccc.de**
 - **metacafe**
 - **Metacritic**
 - **Mgoon**
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -8,11 +8,11 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-
 import io
 import re
 import string

+from test.helper import FakeYDL
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.compat import compat_str, compat_urlretrieve

@ -88,7 +88,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
        if not os.path.exists(fn):
            compat_urlretrieve(url, fn)

-        ie = YoutubeIE()
+        ydl = FakeYDL()
+        ie = YoutubeIE(ydl)
        if stype == 'js':
            with io.open(fn, encoding='utf-8') as testf:
                jscode = testf.read()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -233,6 +233,7 @@ class YoutubeDL(object):
                       If it returns a message, the video is ignored.
                       If it returns None, the video is downloaded.
                       match_filter_func in utils.py is one example for this.
+    no_color:          Do not emit color codes in output.


    The following parameters are not used by YoutubeDL itself, they are used by
@ -490,7 +491,7 @@ class YoutubeDL(object):
        else:
            if self.params.get('no_warnings'):
                return
-            if self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
                _msg_header = '\033[0;33mWARNING:\033[0m'
            else:
                _msg_header = 'WARNING:'
@ -502,7 +503,7 @@ class YoutubeDL(object):
        Do the same as trouble, but prefixes the message with 'ERROR:', colored
        in red if stderr is a tty file.
        '''
-        if self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
            _msg_header = '\033[0;31mERROR:\033[0m'
        else:
            _msg_header = 'ERROR:'
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -349,6 +349,7 @@ def _real_main(argv=None):
        'playlist_items': opts.playlist_items,
        'xattr_set_filesize': opts.xattr_set_filesize,
        'match_filter': match_filter,
+        'no_color': opts.no_color,
    }

    with YoutubeDL(ydl_opts) as ydl:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -54,6 +54,7 @@ from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
 from .cbsnews import CBSNewsIE
+from .ccc import CCCIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@ -0,0 +1,99 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    qualities,
+    unified_strdate,
+)
+
+
+class CCCIE(InfoExtractor):
+    IE_NAME = 'media.ccc.de'
+    _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
+
+    _TEST = {
+        'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
+        'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
+        'info_dict': {
+            'id': '20131228183',
+            'ext': 'mp4',
+            'title': 'Introduction to Processor Design',
+            'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'view_count': int,
+            'upload_date': '20131229',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        if self._downloader.params.get('prefer_free_formats'):
+            preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
+        else:
+            preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
+
+        title = self._html_search_regex(
+            r'(?s)<h1>(.*?)</h1>', webpage, 'title')
+        description = self._html_search_regex(
+            r"(?s)<p class='description'>(.*?)</p>",
+            webpage, 'description', fatal=False)
+        upload_date = unified_strdate(self._html_search_regex(
+            r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
+            webpage, 'upload date', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
+            webpage, 'view count', fatal=False))
+
+        matches = re.finditer(r'''(?xs)
+            <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
+            <a\s+href='(?P<http_url>[^']+)'>\s*
+            (?:
+                .*?
+                <a\s+href='(?P<torrent_url>[^']+\.torrent)'
+            )?''', webpage)
+        formats = []
+        for m in matches:
+            format = m.group('format')
+            format_id = self._search_regex(
+                r'.*/([a-z0-9_-]+)/[^/]*$',
+                m.group('http_url'), 'format id', default=None)
+            vcodec = 'h264' if 'h264' in format_id else (
+                'none' if format_id in ('mp3', 'opus') else None
+            )
+            formats.append({
+                'format_id': format_id,
+                'format': format,
+                'url': m.group('http_url'),
+                'vcodec': vcodec,
+                'preference': preference(format_id),
+            })
+
+            if m.group('torrent_url'):
+                formats.append({
+                    'format_id': 'torrent-%s' % (format if format_id is None else format_id),
+                    'format': '%s (torrent)' % format,
+                    'proto': 'torrent',
+                    'format_note': '(unsupported; will just download the .torrent file)',
+                    'vcodec': vcodec,
+                    'preference': -100 + preference(format_id),
+                    'url': m.group('torrent_url'),
+                })
+        self._sort_formats(formats)
+
+        thumbnail = self._html_search_regex(
+            r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'view_count': view_count,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -514,7 +514,7 @@ class InfoExtractor(object):
                if mobj:
                    break

-        if os.name != 'nt' and sys.stderr.isatty():
+        if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
            _name = '\033[0;34m%s\033[0m' % name
        else:
            _name = name
--- a/youtube_dl/extractor/firstpost.py
+++ b/youtube_dl/extractor/firstpost.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        page = self._download_webpage(url, video_id)
-        title = self._html_search_meta('twitter:title', page, 'title')
+
+        title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
        description = self._html_search_meta('twitter:description', page, 'title')

        data = self._download_xml(
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
                'height': int(details.find('./height').text.strip()),
            } for details in item.findall('./source/file_details') if details.find('./file').text
        ]
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@ -46,14 +46,15 @@ class PornHdIE(InfoExtractor):

        quality = qualities(['sd', 'hd'])
        sources = json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
+            webpage, 'sources')))
        formats = []
-        for container, s in sources.items():
-            for qname, video_url in s.items():
+        for qname, video_url in sources.items():
+            if not video_url:
+                continue
            formats.append({
                'url': video_url,
-                    'container': container,
-                    'format_id': '%s-%s' % (container, qname),
+                'format_id': qname,
                'quality': quality(qname),
            })
        self._sort_formats(formats)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        password_request = compat_urllib_request.Request(pass_url + '/password', data)
        password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        password_request.add_header('Cookie', 'xsrft=%s' % token)
-        self._download_webpage(password_request, video_id,
-                               'Verifying the password',
-                               'Wrong password')
+        return self._download_webpage(
+            password_request, video_id,
+            'Verifying the password', 'Wrong password')

    def _verify_player_video_password(self, url, video_id):
        password = self._downloader.params.get('videopassword', None)
@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')

-            if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
+            if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
                self._verify_video_password(url, video_id, webpage)
                return self._real_extract(url)
            else:
@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor):
    def _extract_list_title(self, webpage):
        return self._html_search_regex(self._TITLE_RE, webpage, 'list title')

+    def _login_list_password(self, page_url, list_id, webpage):
+        login_form = self._search_regex(
+            r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
+            webpage, 'login form', default=None)
+        if not login_form:
+            return webpage
+
+        password = self._downloader.params.get('videopassword', None)
+        if password is None:
+            raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            value="([^"]*)"
+            ''', login_form))
+        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+        fields['token'] = token
+        fields['password'] = password
+        post = compat_urllib_parse.urlencode(fields)
+        password_path = self._search_regex(
+            r'action="([^"]+)"', login_form, 'password URL')
+        password_url = compat_urlparse.urljoin(page_url, password_path)
+        password_request = compat_urllib_request.Request(password_url, post)
+        password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
+        self._set_cookie('vimeo.com', 'xsrft', token)
+
+        return self._download_webpage(
+            password_request, list_id,
+            'Verifying the password', 'Wrong password')
+
    def _extract_videos(self, list_id, base_url):
        video_ids = []
        for pagenum in itertools.count(1):
+            page_url = self._page_url(base_url, pagenum)
            webpage = self._download_webpage(
-                self._page_url(base_url, pagenum), list_id,
+                page_url, list_id,
                'Downloading page %s' % pagenum)
+
+            if pagenum == 1:
+                webpage = self._login_list_password(page_url, list_id, webpage)
+
            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                break
@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE):
            'title': 'Staff Favorites: November 2013',
        },
        'playlist_mincount': 13,
+    }, {
+        'note': 'Password-protected album',
+        'url': 'https://vimeo.com/album/3253534',
+        'info_dict': {
+            'title': 'test',
+            'id': '3253534',
+        },
+        'playlist_count': 1,
+        'params': {
+            'videopassword': 'youtube-dl',
+        }
    }]

    def _page_url(self, base_url, pagenum):
        return '%s/page:%d/' % (base_url, pagenum)

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        album_id = mobj.group('id')
+        album_id = self._match_id(url)
        return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)


--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None):
        action='store_const', dest='extract_flat', const='in_playlist',
        default=False,
        help='Do not extract the videos of a playlist, only list them.')
+    general.add_option(
+        '--no-color', '--no-colors',
+        action='store_true', dest='no_color',
+        default=False,
+        help='Do not emit color codes in output.')

    network = optparse.OptionGroup(parser, 'Network Options')
    network.add_option(
@ -552,7 +557,7 @@ def parseOpts(overrideArguments=None):
        action='store_true', dest='youtube_print_sig_code', default=False,
        help=optparse.SUPPRESS_HELP)
    verbosity.add_option(
-        '--print-traffic',
+        '--print-traffic', '--dump-headers',
        dest='debug_printtraffic', action='store_true', default=False,
        help='Display sent and read HTTP traffic')
    verbosity.add_option(
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.10.2'
+__version__ = '2015.02.10.3'
Author	SHA1	Message	Date
Philipp Hagemeister	04bbe41330	release 2015.02.10.3	2015-02-10 05:42:47 +01:00
Philipp Hagemeister	8f84f57183	[ccc] Add new extractor (Fixes #4890 )	2015-02-10 05:42:41 +01:00
Philipp Hagemeister	6a78740211	[test/test_youtube_signature] Use fake YDL	2015-02-10 05:28:59 +01:00
Philipp Hagemeister	c0e1a415fd	[firstpost] Modernize	2015-02-10 05:28:48 +01:00
Philipp Hagemeister	bf8f082a90	[vimeo:album] Add support for album passwords (Fixes #4917 )	2015-02-10 04:53:21 +01:00
Philipp Hagemeister	2f543a2142	[options] Add alias --dump-header for --print-traffic	2015-02-10 04:52:33 +01:00
Philipp Hagemeister	7e5db8c930	[options] Add --no-color	2015-02-10 04:22:10 +01:00
Philipp Hagemeister	f7a211dcc8	[pornhd] Fix extraction (fixes #4915 )	2015-02-10 03:41:31 +01:00