release 2015.02.26

[utils] Bump our user agent
[escapist] Fix config URL matching
2015-02-26 00:42:02 +01:00 · 2015-02-26 00:40:12 +01:00 · 2015-02-26 00:24:54 +01:00 · 2015-02-25 23:52:54 +01:00 · 2015-02-25 17:56:51 +01:00 · 2015-02-25 01:08:54 +06:00
17 changed files with 116 additions and 46 deletions
--- a/README.md
+++ b/README.md
@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like.
                                     dislike_count <? 50 & description" .
    --no-playlist                    If the URL refers to a video and a
                                     playlist, download only the video.
+    --yes-playlist                   If the URL refers to a video and a
+                                     playlist, download the playlist.
    --age-limit YEARS                download only videos suitable for the given
                                     age
    --download-archive FILE          Download only videos not listed in the
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -25,6 +25,7 @@ from youtube_dl.extractor import (
    RaiIE,
    VikiIE,
    ThePlatformIE,
+    RTVEALaCartaIE,
 )


@ -305,5 +306,19 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
        self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')


+class TestRtveSubtitles(BaseTestSubtitles):
+    url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
+    IE = RTVEALaCartaIE
+
+    def test_allsubtitles(self):
+        print('Skipping, only available from Spain')
+        return
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['es']))
+        self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(
            sanitize_filename('New World record at 0:12:34'),
            'New World record at 0_12_34')
+        self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+        self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')

        forbidden = '"\0\\/'
        for fc in forbidden:
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@ -6,7 +6,7 @@ from .common import InfoExtractor


 class BloombergIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
+    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'

    _TEST = {
        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
+        name = self._match_id(url)
        webpage = self._download_webpage(url, name)
+
        f4m_url = self._search_regex(
            r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
            'f4m url')
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
        title = self._html_search_regex(
            r'<title>(.*?) - EPORNER', webpage, 'title')

-        redirect_code = self._html_search_regex(
-            r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
-            webpage, 'redirect_code')
-        redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
+        redirect_url = 'http://www.eporner.com/config5/%s' % video_id
        player_code = self._download_webpage(
            redirect_url, display_id, note='Downloading player config')

@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
-            'age_limit': self._rta_search(webpage),
+            'age_limit': 18,
        }
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@ -42,7 +42,14 @@ class EscapistIE(InfoExtractor):
        title = raw_title.partition(' : ')[2]

        config_url = compat_urllib_parse.unquote(self._html_search_regex(
-            r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL'))
+            r'''(?x)
+            (?:
+                <param\s+name="flashvars".*?\s+value="config=|
+                flashvars=&quot;config=
+            )
+            (https?://[^"&]+)
+            ''',
+            webpage, 'config URL'))

        formats = []

--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
        params_raw = compat_urllib_parse.unquote(data['params'])
        params = json.loads(params_raw)
        video_data = params['video_data'][0]
-        video_url = video_data.get('hd_src')
-        if not video_url:
-            video_url = video_data['sd_src']
-        if not video_url:
-            raise ExtractorError('Cannot find video URL')
+
+        formats = []
+        for quality in ['sd', 'hd']:
+            src = video_data.get('%s_src' % quality)
+            if src is not None:
+                formats.append({
+                    'format_id': quality,
+                    'url': src,
+                })
+        if not formats:
+            raise ExtractorError('Cannot find video formats')

        video_title = self._html_search_regex(
            r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
        return {
            'id': video_id,
            'title': video_title,
-            'url': video_url,
+            'formats': formats,
            'duration': int_or_none(video_data.get('video_duration')),
            'thumbnail': video_data.get('thumbnail_src'),
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -1208,7 +1208,9 @@ class GenericIE(InfoExtractor):
            return entries[0]
        else:
            for num, e in enumerate(entries, start=1):
-                e['title'] = '%s (%d)' % (e['title'], num)
+                # 'url' results don't have a title
+                if e.get('title') is not None:
+                    e['title'] = '%s (%d)' % (e['title'], num)
            return {
                '_type': 'playlist',
                'entries': entries,
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@ -1,23 +1,26 @@
+# -*- coding: utf-8 -*-
 from __future__ import unicode_literals

 import random
 import re

 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    xpath_text,
+)


 class Laola1TvIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
    _TEST = {
-        'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
+        'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
        'info_dict': {
-            'id': '250019',
+            'id': '227883',
            'ext': 'mp4',
-            'title': 'Bitburger Open Grand Prix Gold - Court 1',
-            'categories': ['Badminton'],
-            'uploader': 'BWF - Badminton World Federation',
-            'is_live': True,
+            'title': 'Straubing Tigers - Kölner Haie',
+            'categories': ['Eishockey'],
+            'is_live': False,
        },
        'params': {
            'skip_download': True,
@ -43,15 +46,26 @@ class Laola1TvIE(InfoExtractor):
            r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
        flashvars = dict((m[0], m[1]) for m in flashvars_m)

+        partner_id = self._search_regex(
+            r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
+
        xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
-                   'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
-                       video_id, portal, lang))
+                   'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
+                       video_id, partner_id, portal, lang))
        hd_doc = self._download_xml(xml_url, video_id)

-        title = hd_doc.find('.//video/title').text
-        flash_url = hd_doc.find('.//video/url').text
-        categories = hd_doc.find('.//video/meta_sports').text.split(',')
-        uploader = hd_doc.find('.//video/meta_organistation').text
+        title = xpath_text(hd_doc, './/video/title', fatal=True)
+        flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
+        uploader = xpath_text(hd_doc, './/video/meta_organistation')
+
+        is_live = xpath_text(hd_doc, './/video/islive') == 'true'
+        if is_live:
+            raise ExtractorError(
+                'Live streams are not supported by the f4m downloader.')
+
+        categories = xpath_text(hd_doc, './/video/meta_sports')
+        if categories:
+            categories = categories.split(',')

        ident = random.randint(10000000, 99999999)
        token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % (
@ -60,15 +74,16 @@ class Laola1TvIE(InfoExtractor):
        token_doc = self._download_xml(
            token_url, video_id, note='Downloading token')
        token_attrib = token_doc.find('.//token').attrib
-        if token_attrib.get('auth') == 'blocked':
-            raise ExtractorError('Token error: ' % token_attrib.get('comment'))
+        if token_attrib.get('auth') in ('blocked', 'restricted'):
+            raise ExtractorError(
+                'Token error: %s' % token_attrib.get('comment'), expected=True)

        video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
            token_attrib['url'], token_attrib['auth'])

        return {
            'id': video_id,
-            'is_live': True,
+            'is_live': is_live,
            'title': title,
            'url': video_url,
            'uploader': uploader,
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
    IE_NAME = 'mitele.es'
    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
        'info_dict': {
@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
            'display_id': 'programa-144',
            'duration': 2913,
        },
-    }
+    }]

    def _real_extract(self, url):
        episode = self._match_id(url)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    float_or_none,
@ -158,7 +159,9 @@ class NRKTVIE(InfoExtractor):
    def _get_subtitles(self, subtitlesurl, video_id, baseurl):
        url = "%s%s" % (baseurl, subtitlesurl)
        self._debug_print('%s: Subtitle url: %s' % (video_id, url))
-        captions = self._download_xml(url, video_id, 'Downloading subtitles')
+        captions = self._download_xml(
+            url, video_id, 'Downloading subtitles',
+            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
        lang = captions.get('lang', 'no')
        ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
        srt = ''
@ -167,8 +170,7 @@ class NRKTVIE(InfoExtractor):
            duration = parse_duration(p.get('dur'))
            starttime = self._seconds2str(begin)
            endtime = self._seconds2str(begin + duration)
-            text = '\n'.join(p.itertext())
-            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
+            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
        return {lang: [
            {'ext': 'ttml', 'url': url},
            {'ext': 'srt', 'data': srt},
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@ -102,14 +102,27 @@ class RTVEALaCartaIE(InfoExtractor):
            video_url = compat_urlparse.urljoin(
                'http://mvod1.akcdn.rtve.es/', video_path)

+        subtitles = None
+        if info.get('sbtFile') is not None:
+            subtitles = self.extract_subtitles(video_id, info['sbtFile'])
+
        return {
            'id': video_id,
            'title': info['title'],
            'url': video_url,
            'thumbnail': info.get('image'),
            'page_url': url,
+            'subtitles': subtitles,
        }

+    def _get_subtitles(self, video_id, sub_file):
+        subs = self._download_json(
+            sub_file + '.json', video_id,
+            'Downloading subtitles info')['page']['items']
+        return dict(
+            (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+            for s in subs)
+

 class RTVELiveIE(InfoExtractor):
    IE_NAME = 'rtve.es:live'
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@ -6,9 +6,9 @@ from .mitele import MiTeleIE

 class TelecincoIE(MiTeleIE):
    IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
        'info_dict': {
            'id': 'MDSVID20141015_0058',
@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
            'title': 'Con Martín Berasategui, hacer un bacalao al ...',
            'duration': 662,
        },
-    }
+    }, {
+        'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+        'only_matching': True,
+    }]
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1153,13 +1153,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                        |  p/
                        )
                        (
-                            (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+                            (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
                            # Top tracks, they can also include dots
                            |(?:MC)[\w\.]*
                        )
                        .*
                     |
-                        ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
+                        ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
                     )"""
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
@ -1244,7 +1244,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            for vid_id in ids]

    def _extract_mix(self, playlist_id):
-        # The mixes are generated from a a single video
+        # The mixes are generated from a single video
        # the id of the playlist is just 'RD' + video_id
        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
        webpage = self._download_webpage(
@ -1280,7 +1280,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            else:
                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

-        if playlist_id.startswith('RD'):
+        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
            # Mixes require a custom extraction process
            return self._extract_mix(playlist_id)

--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -272,6 +272,10 @@ def parseOpts(overrideArguments=None):
        '--no-playlist',
        action='store_true', dest='noplaylist', default=False,
        help='If the URL refers to a video and a playlist, download only the video.')
+    selection.add_option(
+        '--yes-playlist',
+        action='store_false', dest='noplaylist', default=False,
+        help='If the URL refers to a video and a playlist, download the playlist.')
    selection.add_option(
        '--age-limit',
        metavar='YEARS', dest='age_limit', default=None, type=int,
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -54,7 +54,7 @@ from .compat import (
 compiled_regex_type = type(re.compile(''))

 std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
@ -304,6 +304,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
        # Common case of "Foreign band name - English song title"
        if restricted and result.startswith('-_'):
            result = result[2:]
+        if result.startswith('-'):
+            result = '_' + result[len('-'):]
        if not result:
            result = '_'
    return result
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.23.1'
+__version__ = '2015.02.26'
Author	SHA1	Message	Date
Philipp Hagemeister	13cd97f3df	release 2015.02.26	2015-02-26 00:42:02 +01:00
Philipp Hagemeister	183139340b	[utils] Bump our user agent	2015-02-26 00:40:12 +01:00
Philipp Hagemeister	1c69bca258	[escapist] Fix config URL matching	2015-02-26 00:24:54 +01:00
Jaime Marquínez Ferrándiz	c10ea454dc	[telecinco] Recognize more urls (closes #5065 )	2015-02-25 23:52:54 +01:00
Jaime Marquínez Ferrándiz	13d8fbef30	[generic] Don't set the 'title' if it's not defined in the entry (closes #5061 ) Some of them may be an 'url' result, which in general don't have the 'title' field.	2015-02-25 17:56:51 +01:00
Sergey M․	37f885650c	[eporner] Simplify and hardcode age limit	2015-02-25 01:08:54 +06:00
Sergey M.	c8c34ccb20	Merge pull request #5056 from logon84/master Eporner Fix (Closes #5050)	2015-02-25 01:05:35 +06:00
logon84	e765ed3a9c	[eporner] Fix redirect_code error	2015-02-24 19:41:46 +01:00
logon84	59c7cbd482	Update eporner.py Updated to work. Old version shows an error about being unable to extract "redirect_code"	2015-02-24 18:58:32 +01:00
Sergey M․	41b264e77c	[nrktv] Workaround subtitles conversion issues on python 2.6 (Closes #5036 )	2015-02-24 23:06:44 +06:00
Philipp Hagemeister	df4bd0d53f	[options] Add --yes-playlist as inverse of --no-playlist (Fixes #5051 )	2015-02-24 17:25:02 +01:00
Philipp Hagemeister	4f3b21e1c7	release 2015.02.24.2	2015-02-24 16:34:42 +01:00
Philipp Hagemeister	54233c9080	[escapist] Support JavaScript player (Fixes #5034 )	2015-02-24 16:33:07 +01:00
Philipp Hagemeister	db8e13ef71	release 2015.02.24.1	2015-02-24 11:38:21 +01:00
Philipp Hagemeister	5a42414b9c	[utils] Prevent hyphen at beginning of filename (Fixes #5035 )	2015-02-24 11:38:01 +01:00
Philipp Hagemeister	9c665ab72e	[rtve] PEP8	2015-02-24 11:37:27 +01:00
Philipp Hagemeister	b665ba6aa6	release 2015.02.24	2015-02-24 11:24:26 +01:00
Philipp Hagemeister	ec5913b5cd	[bloomberg] Modernize	2015-02-24 11:08:00 +01:00
Jaime Marquínez Ferrándiz	25ac63ed71	[rtve] Extract subtitles	2015-02-23 23:04:07 +01:00
Sergey M․	99209c2916	[youtube] Extract UL playlists as mixes (Closes #5040 )	2015-02-24 01:35:15 +06:00
Naglis Jonaitis	1fbaa0a521	[laola1tv] Use raw strings for regular expressions Oops	2015-02-23 20:51:30 +02:00
Naglis Jonaitis	3037b91e05	[laola1tv] Improve extraction and update test case (#3742 )	2015-02-23 20:45:52 +02:00
Jaime Marquínez Ferrándiz	ffdf972b91	[facebook] Extract all the formats (closes #5037 )	2015-02-23 18:54:15 +01:00