release 2015.02.24

[bloomberg] Modernize
[rtve] Extract subtitles
2015-02-24 11:24:26 +01:00 · 2015-02-24 11:08:00 +01:00 · 2015-02-23 23:04:07 +01:00 · 2015-02-24 01:35:15 +06:00 · 2015-02-23 20:51:30 +02:00 · 2015-02-23 20:45:52 +02:00
7 changed files with 78 additions and 30 deletions
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -25,6 +25,7 @@ from youtube_dl.extractor import (
    RaiIE,
    VikiIE,
    ThePlatformIE,
+    RTVEALaCartaIE,
 )


@ -305,5 +306,19 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
        self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')


+class TestRtveSubtitles(BaseTestSubtitles):
+    url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
+    IE = RTVEALaCartaIE
+
+    def test_allsubtitles(self):
+        print('Skipping, only available from Spain')
+        return
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['es']))
+        self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -6,7 +6,7 @@ from .common import InfoExtractor


 class BloombergIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
+    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'

    _TEST = {
        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
+        name = self._match_id(url)
        webpage = self._download_webpage(url, name)
+
        f4m_url = self._search_regex(
            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
            'f4m url')
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
        params_raw = compat_urllib_parse.unquote(data['params'])
        params = json.loads(params_raw)
        video_data = params['video_data'][0]
-        video_url = video_data.get('hd_src')
-        if not video_url:
-            video_url = video_data['sd_src']
-        if not video_url:
-            raise ExtractorError('Cannot find video URL')
+
+        formats = []
+        for quality in ['sd', 'hd']:
+            src = video_data.get('%s_src' % quality)
+            if src is not None:
+                formats.append({
+                    'format_id': quality,
+                    'url': src,
+                })
+        if not formats:
+            raise ExtractorError('Cannot find video formats')

        video_title = self._html_search_regex(
            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
        return {
            'id': video_id,
            'title': video_title,
-            'url': video_url,
+            'formats': formats,
            'duration': int_or_none(video_data.get('video_duration')),
            'thumbnail': video_data.get('thumbnail_src'),
        }
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@ -1,23 +1,26 @@
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals

 import random
 import re

 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    xpath_text,
+)


 class Laola1TvIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
    _TEST = {
-        'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
+        'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
        'info_dict': {
-            'id': '250019',
+            'id': '227883',
            'ext': 'mp4',
-            'title': 'Bitburger Open Grand Prix Gold - Court 1',
-            'categories': ['Badminton'],
-            'uploader': 'BWF - Badminton World Federation',
-            'is_live': True,
+            'title': 'Straubing Tigers - Kölner Haie',
+            'categories': ['Eishockey'],
+            'is_live': False,
        },
        'params': {
            'skip_download': True,
@ -43,15 +46,26 @@ class Laola1TvIE(InfoExtractor):
            r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
        flashvars = dict((m[0], m[1]) for m in flashvars_m)

+        partner_id = self._search_regex(
+            r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
+
        xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
-                   'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
-                       video_id, portal, lang))
+                   'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
+                       video_id, partner_id, portal, lang))
        hd_doc = self._download_xml(xml_url, video_id)

-        title = hd_doc.find('.//video/title').text
-        flash_url = hd_doc.find('.//video/url').text
-        categories = hd_doc.find('.//video/meta_sports').text.split(',')
-        uploader = hd_doc.find('.//video/meta_organistation').text
+        title = xpath_text(hd_doc, './/video/title', fatal=True)
+        flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
+        uploader = xpath_text(hd_doc, './/video/meta_organistation')
+
+        is_live = xpath_text(hd_doc, './/video/islive') == 'true'
+        if is_live:
+            raise ExtractorError(
+                'Live streams are not supported by the f4m downloader.')
+
+        categories = xpath_text(hd_doc, './/video/meta_sports')
+        if categories:
+            categories = categories.split(',')

        ident = random.randint(10000000, 99999999)
        token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % (
@ -60,15 +74,16 @@ class Laola1TvIE(InfoExtractor):
        token_doc = self._download_xml(
            token_url, video_id, note='Downloading token')
        token_attrib = token_doc.find('.//token').attrib
-        if token_attrib.get('auth') == 'blocked':
-            raise ExtractorError('Token error: ' % token_attrib.get('comment'))
+        if token_attrib.get('auth') in ('blocked', 'restricted'):
+            raise ExtractorError(
+                'Token error: %s' % token_attrib.get('comment'), expected=True)

        video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
            token_attrib['url'], token_attrib['auth'])

        return {
            'id': video_id,
-            'is_live': True,
+            'is_live': is_live,
            'title': title,
            'url': video_url,
            'uploader': uploader,
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@ -102,14 +102,26 @@ class RTVEALaCartaIE(InfoExtractor):
            video_url = compat_urlparse.urljoin(
                'http://mvod1.akcdn.rtve.es/', video_path)

+        subtitles = None
+        if info.get('sbtFile') is not None:
+            subtitles = self.extract_subtitles(video_id, info['sbtFile'])
+
        return {
            'id': video_id,
            'title': info['title'],
            'url': video_url,
            'thumbnail': info.get('image'),
            'page_url': url,
+            'subtitles': subtitles,
        }

+    def _get_subtitles(self, video_id, sub_file):
+        subs = self._download_json(
+            sub_file + '.json', video_id,
+            'Downloading subtitles info')['page']['items']
+        return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+            for s in subs)
+

 class RTVELiveIE(InfoExtractor):
    IE_NAME = 'rtve.es:live'
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1153,13 +1153,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                        |  p/
                        )
                        (
-                            (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+                            (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
                            # Top tracks, they can also include dots
                            |(?:MC)[\w\.]*
                        )
                        .*
                     |
-                        ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
+                        ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
                     )"""
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
@ -1244,7 +1244,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            for vid_id in ids]

    def _extract_mix(self, playlist_id):
-        # The mixes are generated from a a single video
+        # The mixes are generated from a single video
        # the id of the playlist is just 'RD' + video_id
        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
        webpage = self._download_webpage(
@ -1280,7 +1280,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            else:
                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

-        if playlist_id.startswith('RD'):
+        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
            # Mixes require a custom extraction process
            return self._extract_mix(playlist_id)

--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.23.1'
+__version__ = '2015.02.24'
Author	SHA1	Message	Date
Philipp Hagemeister	b665ba6aa6	release 2015.02.24	2015-02-24 11:24:26 +01:00
Philipp Hagemeister	ec5913b5cd	[bloomberg] Modernize	2015-02-24 11:08:00 +01:00
Jaime Marquínez Ferrándiz	25ac63ed71	[rtve] Extract subtitles	2015-02-23 23:04:07 +01:00
Sergey M․	99209c2916	[youtube] Extract UL playlists as mixes (Closes #5040 )	2015-02-24 01:35:15 +06:00
Naglis Jonaitis	1fbaa0a521	[laola1tv] Use raw strings for regular expressions Oops	2015-02-23 20:51:30 +02:00
Naglis Jonaitis	3037b91e05	[laola1tv] Improve extraction and update test case (#3742 )	2015-02-23 20:45:52 +02:00
Jaime Marquínez Ferrándiz	ffdf972b91	[facebook] Extract all the formats (closes #5037 )	2015-02-23 18:54:15 +01:00