release 2014.11.16

[spiegel] Correct handling of redirects to spiegel.tv (Closes #4211 )
[spiegeltv] Match hash-style URLs (Closes #4210 )
2014-11-16 00:51:46 +01:00 · 2014-11-16 00:51:31 +01:00 · 2014-11-16 00:40:09 +01:00 · 2014-11-16 00:33:51 +01:00 · 2014-11-15 22:00:32 +01:00 · 2014-11-15 16:56:04 +02:00
19 changed files with 150 additions and 56 deletions
--- a/1
+++ b/1
@ -81,3 +81,4 @@ winwon
 Xavier Beynon
 Gabriel Schubiner
 xantares
+Jan Matějka
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -284,6 +284,10 @@ class TestUtil(unittest.TestCase):
        d = json.loads(stripped)
        self.assertEqual(d, [{"id": "532cb", "x": 3}])

+        stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'STATUS': 'OK'})
+
    def test_uppercase_escape(self):
        self.assertEqual(uppercase_escape('aä'), 'aä')
        self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@ -288,6 +288,14 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
 else:
    compat_getpass = getpass.getpass

+# Old 2.6 and 2.7 releases require kwargs to be bytes
+try:
+    (lambda x: x)(**{'x': 0})
+except TypeError:
+    def compat_kwargs(kwargs):
+        return dict((bytes(k), v) for k, v in kwargs.items())
+else:
+    compat_kwargs = lambda kwargs: kwargs

 __all__ = [
    'compat_HTTPError',
@ -299,6 +307,7 @@ __all__ = [
    'compat_html_entities',
    'compat_html_parser',
    'compat_http_client',
+    'compat_kwargs',
    'compat_ord',
    'compat_parse_qs',
    'compat_print',
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -127,6 +127,7 @@ from .francetv import (
 )
 from .freesound import FreesoundIE
 from .freespeech import FreespeechIE
+from .freevideo import FreeVideoIE
 from .funnyordie import FunnyOrDieIE
 from .gamekings import GamekingsIE
 from .gameone import (
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@ -71,11 +71,12 @@ class BlipTVIE(SubtitlesInfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        lookup_id = mobj.group('lookup_id')

-        # See https://github.com/rg3/youtube-dl/issues/857
+        # See https://github.com/rg3/youtube-dl/issues/857 and
+        # https://github.com/rg3/youtube-dl/issues/4197
        if lookup_id:
            info_page = self._download_webpage(
                'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
-            video_id = self._search_regex(r'data-episode-id="([0-9]+)', info_page, 'video_id')
+            video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
        else:
            video_id = mobj.group('id')

--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@ -31,7 +31,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
    }


-class ComedyCentralShowsIE(InfoExtractor):
+class ComedyCentralShowsIE(MTVServicesInfoExtractor):
    IE_DESC = 'The Daily Show / The Colbert Report'
    # urls can be abbreviations like :thedailyshow or :colbert
    # urls for episodes like:
@ -109,14 +109,6 @@ class ComedyCentralShowsIE(InfoExtractor):
        '400': (384, 216),
    }

-    @staticmethod
-    def _transform_rtmp_url(rtmp_video_url):
-        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
-        if not m:
-            raise ExtractorError('Cannot transform RTMP url')
-        base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
-        return base + m.group('finalid')
-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
        if mobj is None:
@ -212,9 +204,6 @@ class ComedyCentralShowsIE(InfoExtractor):
                    'ext': self._video_extensions.get(format, 'mp4'),
                    'height': h,
                    'width': w,
-
-                    'format_note': 'HTTP 400 at the moment (patches welcome!)',
-                    'preference': -100,
                })
                formats.append({
                    'format_id': 'rtmp-%s' % format,
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@ -264,8 +264,6 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
            if not lang_code:
                continue
            sub_root = xml.etree.ElementTree.fromstring(subtitle)
-            if not sub_root:
-                subtitles[lang_code] = ''
            if sub_format == 'ass':
                subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
            else:
--- a/youtube_dl/extractor/freevideo.py
+++ b/youtube_dl/extractor/freevideo.py
@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class FreeVideoIE(InfoExtractor):
+    _VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
+
+    _TEST = {
+        'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
+        'info_dict': {
+            'id': 'vysukany-zadecek-22033',
+            'ext': 'mp4',
+            "title": "vysukany-zadecek-22033",
+            "age_limit": 18,
+        },
+        'skip': 'Blocked outside .cz',
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage, handle = self._download_webpage_handle(url, video_id)
+        if '//www.czechav.com/' in handle.geturl():
+            raise ExtractorError(
+                'Access to freevideo is blocked from your location',
+                expected=True)
+
+        video_url = self._search_regex(
+            r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*?/video.mp4)"',
+            webpage, 'video URL')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_id,
+            'age_limit': 18,
+        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -434,7 +434,17 @@ class GenericIE(InfoExtractor):
                'title': 'Chet Chat 171 - Oct 29, 2014',
                'upload_date': '20141029',
            }
-        }
+        },
+        # Livestream embed
+        {
+            'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
+            'info_dict': {
+                'id': '67864563',
+                'ext': 'flv',
+                'upload_date': '20141112',
+                'title': 'Rosetta #CometLanding webcast HL 10',
+            }
+        },
    ]

    def report_following_redirect(self, new_url):
@ -916,6 +926,12 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')

+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Livestream')
+
        def check_video(vurl):
            vpath = compat_urlparse.urlparse(vurl).path
            vext = determine_ext(vpath)
--- a/youtube_dl/extractor/goldenmoustache.py
+++ b/youtube_dl/extractor/goldenmoustache.py
@ -10,7 +10,7 @@ from ..utils import (

 class GoldenMoustacheIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/',
        'md5': '0f904432fa07da5054d6c8beb5efb51a',
        'info_dict': {
@ -21,7 +21,18 @@ class GoldenMoustacheIE(InfoExtractor):
            'thumbnail': 're:^https?://.*\.jpg$',
            'view_count': int,
        }
-    }
+    }, {
+        'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
+        'md5': '27f0c50fb4dd5f01dc9082fc67cd5700',
+        'info_dict': {
+            'id': '55249',
+            'ext': 'mp4',
+            'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
+            'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
+            'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
+            'view_count': int,
+        }
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -30,7 +41,7 @@ class GoldenMoustacheIE(InfoExtractor):
        video_url = self._html_search_regex(
            r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL')
        title = self._html_search_regex(
-            r'<title>(.*?) - Golden Moustache</title>', webpage, 'title')
+            r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(webpage)
        view_count = int_or_none(self._html_search_regex(
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@ -18,7 +18,7 @@ from ..utils import (

 class LivestreamIE(InfoExtractor):
    IE_NAME = 'livestream'
-    _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
+    _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
    _TESTS = [{
        'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
        'md5': '53274c76ba7754fb0e8d072716f2292b',
@ -37,6 +37,9 @@ class LivestreamIE(InfoExtractor):
            'title': 'TEDCity2.0 (English)',
        },
        'playlist_mincount': 4,
+    }, {
+        'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
+        'only_matching': True,
    }]

    def _parse_smil(self, video_id, smil_url):
--- a/youtube_dl/extractor/mailru.py
+++ b/youtube_dl/extractor/mailru.py
@ -16,7 +16,7 @@ class MailRuIE(InfoExtractor):
            'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
            'md5': 'dea205f03120046894db4ebb6159879a',
            'info_dict': {
-                'id': '46301138',
+                'id': '46301138_76',
                'ext': 'mp4',
                'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
                'timestamp': 1393232740,
@ -30,7 +30,7 @@ class MailRuIE(InfoExtractor):
            'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
            'md5': '00a91a58c3402204dcced523777b475f',
            'info_dict': {
-                'id': '46843144',
+                'id': '46843144_1263',
                'ext': 'mp4',
                'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
                'timestamp': 1397217632,
@ -54,33 +54,36 @@ class MailRuIE(InfoExtractor):

        author = video_data['author']
        uploader = author['name']
-        uploader_id = author['id']
+        uploader_id = author.get('id') or author.get('email')
+        view_count = video_data.get('views_count')

-        movie = video_data['movie']
-        content_id = str(movie['contentId'])
-        title = movie['title']
+        meta_data = video_data['meta']
+        content_id = '%s_%s' % (
+            meta_data.get('accId', ''), meta_data['itemId'])
+        title = meta_data['title']
        if title.endswith('.mp4'):
            title = title[:-4]
-        thumbnail = movie['poster']
-        duration = movie['duration']
-
-        view_count = video_data['views_count']
+        thumbnail = meta_data['poster']
+        duration = meta_data['duration']
+        timestamp = meta_data['timestamp']

        formats = [
            {
                'url': video['url'],
-                'format_id': video['name'],
+                'format_id': video['key'],
+                'height': int(video['key'].rstrip('p'))
            } for video in video_data['videos']
        ]
+        self._sort_formats(formats)

        return {
            'id': content_id,
            'title': title,
            'thumbnail': thumbnail,
-            'timestamp': video_data['timestamp'],
+            'timestamp': timestamp,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
-        }
+        }
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@ -186,7 +186,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
    def _get_feed_url(self, uri):
        video_id = self._id_from_uri(uri)
        site_id = uri.replace(video_id, '')
-        config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id)
+        config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
+            'context4/context5/config.xml'.format(site_id))
        config_doc = self._download_xml(config_url, video_id)
        feed_node = config_doc.find('.//feed')
        feed_url = feed_node.text.strip().split('?')[0]
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@ -7,6 +7,7 @@ from ..utils import (
    unified_strdate,
    parse_duration,
    qualities,
+    strip_jsonp,
    url_basename,
 )

@ -63,7 +64,7 @@ class NPOIE(InfoExtractor):
            'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
            video_id,
            # We have to remove the javascript callback
-            transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j)
+            transform_source=strip_jsonp,
        )
        token_page = self._download_webpage(
            'http://ida.omroep.nl/npoplayer/i.js',
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@ -5,6 +5,7 @@ import re

 from .common import InfoExtractor
 from ..compat import compat_urlparse
+from .spiegeltv import SpiegeltvIE


 class SpiegelIE(InfoExtractor):
@ -42,7 +43,11 @@ class SpiegelIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage, handle = self._download_webpage_handle(url, video_id)
+
+        # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
+        if SpiegeltvIE.suitable(handle.geturl()):
+            return self.url_result(handle.geturl(), 'Spiegeltv')

        title = re.sub(r'\s+', ' ', self._html_search_regex(
            r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
--- a/youtube_dl/extractor/spiegeltv.py
+++ b/youtube_dl/extractor/spiegeltv.py
@ -1,13 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import re
 from .common import InfoExtractor
+from ..utils import float_or_none


 class SpiegeltvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)'
+    _TESTS = [{
        'url': 'http://www.spiegel.tv/filme/flug-mh370/',
        'info_dict': {
            'id': 'flug-mh370',
@ -20,12 +20,15 @@ class SpiegeltvIE(InfoExtractor):
            # rtmp download
            'skip_download': True,
        }
-    }
+    }, {
+        'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        if '/#/' in url:
+            url = url.replace('/#/', '/')
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')

@ -61,12 +64,8 @@ class SpiegeltvIE(InfoExtractor):
            })

        description = media_json['subtitle']
-        duration = media_json['duration_in_ms'] / 1000.
-
-        if is_wide:
-            format = '16x9'
-        else:
-            format = '4x3'
+        duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
+        format = '16x9' if is_wide else '4x3'

        url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v'

--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -8,6 +8,7 @@ import sys
 from .compat import (
    compat_expanduser,
    compat_getenv,
+    compat_kwargs,
 )
 from .utils import (
    get_term_width,
@ -112,7 +113,7 @@ def parseOpts(overrideArguments=None):
        'conflict_handler': 'resolve',
    }

-    parser = optparse.OptionParser(**kw)
+    parser = optparse.OptionParser(**compat_kwargs(kw))

    general = optparse.OptionGroup(parser, 'General Options')
    general.add_option(
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -73,10 +73,22 @@ def preferredencoding():
 def write_json_file(obj, fn):
    """ Encode obj as JSON and write it to fn, atomically """

+    if sys.version_info < (3, 0):
+        encoding = get_filesystem_encoding()
+        # os.path.basename returns a bytes object, but NamedTemporaryFile
+        # will fail if the filename contains non ascii characters unless we
+        # use a unicode object
+        path_basename = lambda f: os.path.basename(fn).decode(encoding)
+        # the same for os.path.dirname
+        path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
+    else:
+        path_basename = os.path.basename
+        path_dirname = os.path.dirname
+
    args = {
        'suffix': '.tmp',
-        'prefix': os.path.basename(fn) + '.',
-        'dir': os.path.dirname(fn),
+        'prefix': path_basename(fn) + '.',
+        'dir': path_dirname(fn),
        'delete': False,
    }

@ -843,7 +855,7 @@ def bytes_to_intlist(bs):
 def intlist_to_bytes(xs):
    if not xs:
        return b''
-    return struct.pack('%dB' % len(xs), *xs)
+    return struct_pack('%dB' % len(xs), *xs)


 # Cross-platform file locking
@ -1331,7 +1343,8 @@ def parse_age_limit(s):


 def strip_jsonp(code):
-    return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
+    return re.sub(
+        r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)


 def js_to_json(code):
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2014.11.13.1'
+__version__ = '2014.11.16'
Author	SHA1	Message	Date
Philipp Hagemeister	0cf166ad4f	release 2014.11.16	2014-11-16 00:51:46 +01:00
Philipp Hagemeister	2707b50ffe	[spiegel] Correct handling of redirects to spiegel.tv (Closes #4211 )	2014-11-16 00:51:31 +01:00
Philipp Hagemeister	939fe70de0	[spiegeltv] Match hash-style URLs (Closes #4210 )	2014-11-16 00:40:09 +01:00
Philipp Hagemeister	89c15fe0b3	[spiegeltv] Modernize	2014-11-16 00:33:51 +01:00
Jaime Marquínez Ferrándiz	ec5f601670	[utils] Fix "write_json_file" for unicode names in python 2.x (fixes #4125 )	2014-11-15 22:00:32 +01:00
Naglis Jonaitis	8caa0c9779	[bliptv] Fix the resolve of lookup ID (Closes #4197 )	2014-11-15 16:56:04 +02:00
Philipp Hagemeister	e2548b5b25	release 2014.11.15.1	2014-11-15 15:21:50 +01:00
Philipp Hagemeister	bbefcf04bf	[goldenmoustache] Fix title (Closes #4203 )	2014-11-15 15:21:34 +01:00
Philipp Hagemeister	c7b0add86f	[compat] Work around kwargs bugs in old 2.6 Python releases (Fixes #3813 )	2014-11-15 15:17:19 +01:00
Philipp Hagemeister	a0155d93d9	release 2014.11.15	2014-11-15 11:01:54 +01:00
Philipp Hagemeister	00d9ef0b70	[mailru] Adapt to new data format (Fixes #4201 )	2014-11-15 11:01:17 +01:00
Philipp Hagemeister	0cc8888038	[crunchyroll] Remove NOP code (#2782 )	2014-11-15 00:34:03 +01:00
Philipp Hagemeister	c735450e07	release 2014.11.14	2014-11-14 22:27:56 +01:00
Jaime Marquínez Ferrándiz	71f8c7ce7a	[mtvservices:embedded] Improve config url (fixes #4092 )	2014-11-14 19:02:18 +01:00
Jaime Marquínez Ferrándiz	5fee0eeac0	[ComedyCentralShows] Use the rtmp urls transform function from the MTV IE (fixes #3364 ) It produces the right mp4 urls, so we stop prefering the rtmp urls.	2014-11-14 18:36:04 +01:00
Philipp Hagemeister	eb4157fd17	[utils] Fix struct.pack call on very old Python versions (#4181 )	2014-11-14 00:39:32 +01:00
Philipp Hagemeister	69ede8ef81	release 2014.11.13.3	2014-11-13 16:28:24 +01:00
Philipp Hagemeister	609a61e3e6	[npo] Improve npo.nl (Fixes #4173 )	2014-11-13 16:28:05 +01:00
Philipp Hagemeister	bf951c5e29	release 2014.11.13.2	2014-11-13 16:12:54 +01:00
Philipp Hagemeister	af63fed7d8	[generic] Add support for livestream embeds (Fixes #4185 )	2014-11-13 16:12:51 +01:00
Philipp Hagemeister	68d1d41c03	Credit @yaccz for freevideo (#4131 )	2014-11-13 15:59:48 +01:00
Philipp Hagemeister	3deed1e91a	[freevideo] Simplify and raise error for foreigners (Fixes #4131 )	2014-11-13 15:59:22 +01:00
Philipp Hagemeister	11b28e93d3	Merge remote-tracking branch 'yaccz/add-extractor/freevideo'	2014-11-13 15:53:16 +01:00
yac	3898c8a7b2	[FreeVideo] Add new extractor	2014-11-08 00:13:28 +01:00