release 2015.02.10

[extractor/common] Wrap extractor errors (Fixes #1194 )
For now, we just wrap some common errors. More may follow. We do not want to catch actual programming errors in the extractors, such as 1 // 0.
2015-02-10 01:19:52 +01:00 · 2015-02-10 01:17:23 +01:00 · 2015-02-09 19:08:51 +01:00 · 2015-02-09 16:05:01 +01:00 · 2015-02-09 15:59:19 +01:00 · 2015-02-09 15:59:14 +01:00
11 changed files with 158 additions and 36 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -392,6 +392,7 @@
 - **StreamCZ**
 - **StreetVoice**
 - **SunPorno**
 - **SVTPlay**
 - **SWRMediathek**
 - **Syfy**
 - **SztvHu**
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -1546,7 +1546,6 @@ class YoutubeDL(object):
            line(f, idlen) for f in formats
            if f.get('preference') is None or f['preference'] >= -1000]
        if len(formats) > 1:
            formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
            formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
        header_line = line({
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -428,6 +428,7 @@ from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
 from .streetvoice import StreetVoiceIE
 from .sunporno import SunPornoIE
 from .svtplay import SVTPlayIE
 from .swrmediathek import SWRMediathekIE
 from .syfy import SyfyIE
 from .sztvhu import SztvHuIE
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor):
        download_link = m_download.group(1)
        video_id = self._search_regex(
-            r'var TralbumData = {.*?id: (?P<id>\d+),?$',
+            r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
-            webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
+            webpage, 'video id')
        download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
        # We get the dictionary of the track from some javascript code
-        info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
+        all_info = self._parse_json(self._search_regex(
-        info = json.loads(info)[0]
+            r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
        info = All_info[0]
        # We pick mp3-320 for now, until format selection can be easily implemented.
        mp3_info = info['downloads']['mp3-320']
        # If we try to use this url it says the link has expired
        initial_url = mp3_info['url']
-        re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
+        m_url = re.match(
-        m_url = re.match(re_url, initial_url)
+            r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
            initial_url)
        # We build the url we will use to get the final track url
        # This url is build in Bandcamp in the script download_bunde_*.js
        request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
        final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
        # If we could correctly generate the .rand field the url would be
        # in the "download_url" key
-        final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
+        final_url = self._search_regex(
            r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
        return {
            'id': video_id,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -264,8 +264,15 @@ class InfoExtractor(object):
    def extract(self, url):
        """Extracts URL information and returns it in list of dicts."""
-        self.initialize()
+        try:
-        return self._real_extract(url)
+            self.initialize()
            return self._real_extract(url)
        except ExtractorError:
            raise
        except compat_http_client.IncompleteRead as e:
            raise ExtractorError('A network error has occured.', cause=e, expected=True)
        except (KeyError,) as e:
            raise ExtractorError('An extractor error has occured.', cause=e)
    def set_downloader(self, downloader):
        """Sets the downloader for this IE."""
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -524,6 +524,19 @@ class GenericIE(InfoExtractor):
                'upload_date': '20150126',
            },
            'add_ie': ['Viddler'],
        },
        # jwplayer YouTube
        {
            'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
            'info_dict': {
                'id': 'Mrj4DVp2zeA',
                'ext': 'mp4',
                'upload_date': '20150204',
                'uploader': 'The National Archives UK',
                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
                'uploader_id': 'NationalArchives08',
                'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
            },
        }
    ]
@ -1034,7 +1047,12 @@ class GenericIE(InfoExtractor):
        # Look for embedded sbs.com.au player
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
+            r'''(?x)
            (?:
                <meta\s+property="og:video"\s+content=|
                <iframe[^>]+?src=
            )
            (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'SBS')
@ -1065,6 +1083,8 @@ class GenericIE(InfoExtractor):
            return self.url_result(mobj.group('url'), 'Livestream')
        def check_video(vurl):
            if YoutubeIE.suitable(vurl):
                return True
            vpath = compat_urlparse.urlparse(vurl).path
            vext = determine_ext(vpath)
            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
@ -1082,7 +1102,8 @@ class GenericIE(InfoExtractor):
                    JWPlayerOptions|
                    jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
                )
-                .*?file\s*:\s*["\'](.*?)["\']''', webpage))
+                .*?
                ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
        if not found:
            # Broaden the search a little bit
            found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@ -91,6 +91,15 @@ class RTLnowIE(InfoExtractor):
            },
        },
        {
            'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
            'info_dict': {
                'id': '188729',
                'ext': 'flv',
                'upload_date': '20150204',
                'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
                'title': 'Der Bachelor - Folge 4',
            }
        }, {
            'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
            'only_matching': True,
        },
@ -134,9 +143,18 @@ class RTLnowIE(InfoExtractor):
                    'player_url': video_page_url + 'includes/vodplayer.swf',
                }
            else:
-                fmt = {
+                mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
-                    'url': filename.text,
+                if mobj:
-                }
+                    fmt = {
                        'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
                        'play_path': 'mp4:' + mobj.group('play_path'),
                        'page_url': url,
                        'player_url': video_page_url + 'includes/vodplayer.swf',
                    }
                else:
                    fmt = {
                        'url': filename.text,
                    }
            fmt.update({
                'width': int_or_none(filename.get('width')),
                'height': int_or_none(filename.get('height')),
--- a/youtube_dl/extractor/svtplay.py
+++ b/youtube_dl/extractor/svtplay.py
@ -0,0 +1,56 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
 )
 class SVTPlayIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
        'md5': 'f4a184968bc9c802a9b41316657aaa80',
        'info_dict': {
            'id': '2609989',
            'ext': 'mp4',
            'title': 'SM veckan vinter, Örebro - Rally, final',
            'duration': 4500,
            'thumbnail': 're:^https?://.*[\.-]jpg$',
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        info = self._download_json(
            'http://www.svtplay.se/video/%s?output=json' % video_id, video_id)
        title = info['context']['title']
        thumbnail = info['context'].get('thumbnailImage')
        video_info = info['video']
        formats = []
        for vr in video_info['videoReferences']:
            vurl = vr['url']
            if determine_ext(vurl) == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    vurl, video_id,
                    ext='mp4', entry_protocol='m3u8_native',
                    m3u8_id=vr.get('playerType')))
            else:
                formats.append({
                    'format_id': vr.get('playerType'),
                    'url': vurl,
                })
        self._sort_formats(formats)
        duration = video_info.get('materialLength')
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
            'duration': duration,
        }
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@ -1,40 +1,55 @@
 # coding: utf-8
 from __future__ import unicode_literals
-import json
+import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class TriluliluIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)'
    _TEST = {
        'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
        'md5': 'c1450a00da251e2769b74b9005601cac',
        'info_dict': {
-            'id': 'big-buck-bunny-1',
+            'id': 'ae2899e124140b',
            'ext': 'mp4',
            'title': 'Big Buck Bunny',
            'description': ':) pentru copilul din noi',
        },
        # Server ignores Range headers (--test)
        'params': {
            'skip_download': True
        }
    }
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)
        if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage):
            raise ExtractorError(
                'This video is not available in your country.', expected=True)
        elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage):
            raise ExtractorError('This video is private.', expected=True)
        flashvars_str = self._search_regex(
            r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None)
        if flashvars_str:
            flashvars = self._parse_json(flashvars_str, display_id)
        else:
            raise ExtractorError(
                'This page does not contain videos', expected=True)
        if flashvars['isMP3'] == 'true':
            raise ExtractorError(
                'Audio downloads are currently not supported', expected=True)
        video_id = flashvars['hash']
        title = self._og_search_title(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
-        description = self._og_search_description(webpage)
+        description = self._og_search_description(webpage, default=None)
        log_str = self._search_regex(
            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
        log = json.loads(log_str)
        format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
-                      'video-formats2' % log)
+                      'video-formats2' % flashvars)
        format_doc = self._download_xml(
            format_url, video_id,
            note='Downloading formats',
@ -44,10 +59,10 @@ class TriluliluIE(InfoExtractor):
            'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
            '&source=site&hash=%(hash)s&username=%(userid)s&'
            'key=ministhebest&format=%%s&sig=&exp=' %
-            log)
+            flashvars)
        formats = [
            {
-                'format': fnode.text,
+                'format_id': fnode.text.partition('-')[2],
                'url': video_url_template % fnode.text,
                'ext': fnode.text.partition('-')[0]
            }
@ -56,8 +71,8 @@ class TriluliluIE(InfoExtractor):
        ]
        return {
            '_type': 'video',
            'id': video_id,
            'display_id': display_id,
            'formats': formats,
            'title': title,
            'description': description,
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -780,8 +780,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                    fo for fo in formats
                    if fo['format_id'] == format_id)
            except StopIteration:
-                f.update(self._formats.get(format_id, {}).items())
+                full_info = self._formats.get(format_id, {}).copy()
-                formats.append(f)
+                full_info.update(f)
                formats.append(full_info)
            else:
                existing_format.update(f)
        return formats
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2015.02.09'
+__version__ = '2015.02.10'
Author	SHA1	Message	Date
Philipp Hagemeister	34814eb66e	release 2015.02.10	2015-02-10 01:19:52 +01:00
Philipp Hagemeister	3a5bcd0326	[extractor/common] Wrap extractor errors (Fixes #1194 ) For now, we just wrap some common errors. More may follow. We do not want to catch actual programming errors in the extractors, such as 1 // 0.	2015-02-10 01:17:23 +01:00
Philipp Hagemeister	99c2398bc6	[bandcamp] Use our API to get more stable error messages (#1194 )	2015-02-09 19:08:51 +01:00
Philipp Hagemeister	28f1272870	[svtplay] Correct test case	2015-02-09 16:05:01 +01:00
Philipp Hagemeister	f18e3a2fc0	release 2015.02.09.3	2015-02-09 15:59:19 +01:00
Philipp Hagemeister	c4c5dc27cb	Merge branch 'master' of github.com:rg3/youtube-dl	2015-02-09 15:59:14 +01:00
Naglis Jonaitis	2caf182f37	[trilulilu] Add support for videos without category in the URL (Closes #4067 ) Also, update the testcase, detect private/country restricted videos and modernize a bit.	2015-02-09 17:00:05 +02:00
Philipp Hagemeister	43f244b6d5	[YoutubeDL] Do not show worst in --list-formats output Nobody wants to know what the worst possible format is. And if they do, they can still provide -f worst.	2015-02-09 15:57:42 +01:00
Philipp Hagemeister	1309b396d0	[svtplay] Add new extractor (Fixes #4914 )	2015-02-09 15:56:59 +01:00
Jaime Marquínez Ferrándiz	ba61796458	[youtube] Don't override format info from the dash manifest (fixes #4911 )	2015-02-09 15:04:22 +01:00
Philipp Hagemeister	3255fe7141	release 2015.02.09.2	2015-02-09 14:46:30 +01:00
Philipp Hagemeister	e98b8e79ea	[generic] Improve SBS detection (Fixes #4899 )	2015-02-09 14:46:10 +01:00
Philipp Hagemeister	196121c51b	release 2015.02.09.1	2015-02-09 10:49:10 +01:00
Philipp Hagemeister	5269028951	[rtlnow] Add test for @mmue's extension (#4908 )	2015-02-09 10:47:19 +01:00
Philipp Hagemeister	f7bc056b5a	Merge remote-tracking branch 'mmue/fix-rtlnow'	2015-02-09 10:44:55 +01:00
Philipp Hagemeister	a0f7198544	[generic] Add support for jwPlayer YouTube videos This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)	2015-02-09 10:43:01 +01:00
Markus Müller	bdb186f3b0	fix rtlnow for newer series like "Der Bachelor" season 5	2015-02-08 21:55:39 +01:00
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2015.02.09'`	`__version__ = '2015.02.10'`