release 2015.02.26.2

[escapist] Detect IP blocking and use another UA (Fixes #5069 )
release 2015.02.26.1
2015-02-26 09:45:11 +01:00 · 2015-02-26 09:19:26 +01:00 · 2015-02-26 01:47:16 +01:00 · 2015-02-26 01:47:12 +01:00 · 2015-02-26 01:44:20 +01:00 · 2015-02-26 01:35:43 +01:00
23 changed files with 527 additions and 84 deletions
--- a/1
+++ b/1
@ -112,3 +112,4 @@ Frans de Jonge
 Robin de Rooij
 Ryan Schmidt
 Leslie P. Polzer
+Duncan Keall
--- a/1
+++ b/1
@ -2,6 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas

 clean:
 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+	find -name "*.pyc" -delete

 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
--- a/README.md
+++ b/README.md
@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like.
                                     dislike_count <? 50 & description" .
    --no-playlist                    If the URL refers to a video and a
                                     playlist, download only the video.
+    --yes-playlist                   If the URL refers to a video and a
+                                     playlist, download the playlist.
    --age-limit YEARS                download only videos suitable for the given
                                     age
    --download-archive FILE          Download only videos not listed in the
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -17,6 +17,7 @@
 - **AdultSwim**
 - **Aftenposten**
 - **Aftonbladet**
+ - **AirMozilla**
 - **AlJazeera**
 - **Allocine**
 - **AlphaPorno**
@ -220,6 +221,9 @@
 - **Ku6**
 - **la7.tv**
 - **Laola1Tv**
+ - **Letv**
+ - **LetvPlaylist**
+ - **LetvTv**
 - **lifenews**: LIFE | NEWS
 - **LiveLeak**
 - **livestream**
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(
            sanitize_filename('New World record at 0:12:34'),
            'New World record at 0_12_34')
+        self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+        self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')

        forbidden = '"\0\\/'
        for fc in forbidden:
@ -244,6 +246,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_duration('2.5 hours'), 9000)
        self.assertEqual(parse_duration('02:03:04'), 7384)
        self.assertEqual(parse_duration('01:02:03:04'), 93784)
+        self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)

    def test_fix_xml_ampersands(self):
        self.assertEqual(
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@ -11,6 +11,7 @@ from .common import FileDownloader
 from .http import HttpFD
 from ..compat import (
    compat_urlparse,
+    compat_urllib_error,
 )
 from ..utils import (
    struct_pack,
@ -121,7 +122,8 @@ class FlvReader(io.BytesIO):

        self.read_unsigned_int()  # BootstrapinfoVersion
        # Profile,Live,Update,Reserved
-        self.read(1)
+        flags = self.read_unsigned_char()
+        live = flags & 0x20 != 0
        # time scale
        self.read_unsigned_int()
        # CurrentMediaTime
@ -160,6 +162,7 @@ class FlvReader(io.BytesIO):
        return {
            'segments': segments,
            'fragments': fragments,
+            'live': live,
        }

    def read_bootstrap_info(self):
@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
    for segment, fragments_count in segment_run_table['segment_run']:
        for _ in range(fragments_count):
            res.append((segment, next(fragments_counter)))
+
+    if boot_info['live']:
+        res = res[-2:]
+
    return res


@ -246,6 +253,38 @@ class F4mFD(FileDownloader):
            self.report_error('Unsupported DRM')
        return media

+    def _get_bootstrap_from_url(self, bootstrap_url):
+        bootstrap = self.ydl.urlopen(bootstrap_url).read()
+        return read_bootstrap_info(bootstrap)
+
+    def _update_live_fragments(self, bootstrap_url, latest_fragment):
+        fragments_list = []
+        retries = 30
+        while (not fragments_list) and (retries > 0):
+            boot_info = self._get_bootstrap_from_url(bootstrap_url)
+            fragments_list = build_fragments_list(boot_info)
+            fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+            if not fragments_list:
+                # Retry after a while
+                time.sleep(5.0)
+                retries -= 1
+
+        if not fragments_list:
+            self.report_error('Failed to update fragments')
+
+        return fragments_list
+
+    def _parse_bootstrap_node(self, node, base_url):
+        if node.text is None:
+            bootstrap_url = compat_urlparse.urljoin(
+                base_url, node.attrib['url'])
+            boot_info = self._get_bootstrap_from_url(bootstrap_url)
+        else:
+            bootstrap_url = None
+            bootstrap = base64.b64decode(node.text)
+            boot_info = read_bootstrap_info(bootstrap)
+        return (boot_info, bootstrap_url)
+
    def real_download(self, filename, info_dict):
        man_url = info_dict['url']
        requested_bitrate = info_dict.get('tbr')
@ -265,18 +304,13 @@ class F4mFD(FileDownloader):

        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
        bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        if bootstrap_node.text is None:
-            bootstrap_url = compat_urlparse.urljoin(
-                base_url, bootstrap_node.attrib['url'])
-            bootstrap = self.ydl.urlopen(bootstrap_url).read()
-        else:
-            bootstrap = base64.b64decode(bootstrap_node.text)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
+        live = boot_info['live']
        metadata_node = media.find(_add_ns('metadata'))
        if metadata_node is not None:
            metadata = base64.b64decode(metadata_node.text)
        else:
            metadata = None
-        boot_info = read_bootstrap_info(bootstrap)

        fragments_list = build_fragments_list(boot_info)
        if self.params.get('test', False):
@ -301,6 +335,7 @@ class F4mFD(FileDownloader):
        (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')

        write_flv_header(dest_stream)
+        if not live:
            write_metadata_tag(dest_stream, metadata)

        # This dict stores the download progress, it's updated by the progress
@ -348,12 +383,14 @@ class F4mFD(FileDownloader):
        http_dl.add_progress_hook(frag_progress_hook)

        frags_filenames = []
-        for (seg_i, frag_i) in fragments_list:
+        while fragments_list:
+            seg_i, frag_i = fragments_list.pop(0)
            name = 'Seg%d-Frag%d' % (seg_i, frag_i)
            url = base_url + name
            if akamai_pv:
                url += '?' + akamai_pv.strip(';')
            frag_filename = '%s-%s' % (tmpfilename, name)
+            try:
                success = http_dl.download(frag_filename, {'url': url})
                if not success:
                    return False
@ -365,7 +402,26 @@ class F4mFD(FileDownloader):
                        if box_type == b'mdat':
                            dest_stream.write(box_data)
                            break
+                if live:
+                    os.remove(frag_filename)
+                else:
                    frags_filenames.append(frag_filename)
+            except (compat_urllib_error.HTTPError, ) as err:
+                if live and (err.code == 404 or err.code == 410):
+                    # We didn't keep up with the live window. Continue
+                    # with the next available fragment.
+                    msg = 'Fragment %d unavailable' % frag_i
+                    self.report_warning(msg)
+                    fragments_list = []
+                else:
+                    raise
+
+            if not fragments_list and live and bootstrap_url:
+                fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+                total_frags += len(fragments_list)
+                if fragments_list and (fragments_list[0][1] > frag_i + 1):
+                    msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+                    self.report_warning(msg)

        dest_stream.close()

--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@ -119,7 +119,9 @@ class RtmpFD(FileDownloader):
        # Download using rtmpdump. rtmpdump returns exit code 2 when
        # the connection was interrumpted and resuming appears to be
        # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
+        basic_args = [
+            'rtmpdump', '--verbose', '-r', url,
+            '-o', encodeFilename(tmpfilename, True)]
        if player_url is not None:
            basic_args += ['--swfVfy', player_url]
        if page_url is not None:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -8,6 +8,7 @@ from .adobetv import AdobeTVIE
 from .adultswim import AdultSwimIE
 from .aftenposten import AftenpostenIE
 from .aftonbladet import AftonbladetIE
+from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
 from .anitube import AnitubeIE
@ -237,6 +238,11 @@ from .krasview import KrasViewIE
 from .ku6 import Ku6IE
 from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
+from .letv import (
+    LetvIE,
+    LetvTvIE,
+    LetvPlaylistIE
+)
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
 from .livestream import (
--- a/youtube_dl/extractor/airmozilla.py
+++ b/youtube_dl/extractor/airmozilla.py
@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class AirMozillaIE(InfoExtractor):
+    _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
+    _TEST = {
+        'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
+        'md5': '2e3e7486ba5d180e829d453875b9b8bf',
+        'info_dict': {
+            'id': '6x4q2w',
+            'ext': 'mp4',
+            'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
+            'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
+            'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
+            'timestamp': 1422487800,
+            'upload_date': '20150128',
+            'location': 'SFO Commons',
+            'duration': 3780,
+            'view_count': int,
+            'categories': ['Main'],
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
+
+        embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
+        jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
+        metadata = self._parse_json(jwconfig, video_id)
+
+        formats = [{
+            'url': source['file'],
+            'ext': source['type'],
+            'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
+            'format': source['label'],
+            'height': int(source['label'].rstrip('p')),
+        } for source in metadata['playlist'][0]['sources']]
+        self._sort_formats(formats)
+
+        view_count = int_or_none(self._html_search_regex(
+            r'Views since archived: ([0-9]+)',
+            webpage, 'view count', fatal=False))
+        timestamp = parse_iso8601(self._html_search_regex(
+            r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
+        duration = parse_duration(self._search_regex(
+            r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
+            webpage, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'formats': formats,
+            'url': self._og_search_url(webpage),
+            'display_id': display_id,
+            'thumbnail': metadata['playlist'][0].get('image'),
+            'description': self._og_search_description(webpage),
+            'timestamp': timestamp,
+            'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
+            'duration': duration,
+            'view_count': view_count,
+            'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
+        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -921,10 +921,23 @@ class InfoExtractor(object):

        formats = []
        rtmp_count = 0
+        if smil.findall('./body/seq/video'):
+            video = smil.findall('./body/seq/video')[0]
+            fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
+            formats.extend(fmts)
+        else:
            for video in smil.findall('./body/switch/video'):
+                fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
+                formats.extend(fmts)
+
+        self._sort_formats(formats)
+
+        return formats
+
+    def _parse_smil_video(self, video, video_id, base, rtmp_count):
        src = video.get('src')
        if not src:
-                continue
+            return ([], rtmp_count)
        bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
        width = int_or_none(video.get('width'))
        height = int_or_none(video.get('height'))
@ -937,11 +950,11 @@ class InfoExtractor(object):
                    proto = 'http'
        ext = video.get('ext')
        if proto == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
+            return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
        elif proto == 'rtmp':
            rtmp_count += 1
            streamer = video.get('streamer') or base
-                formats.append({
+            return ([{
                'url': streamer,
                'play_path': src,
                'ext': 'flv',
@ -949,10 +962,15 @@ class InfoExtractor(object):
                'tbr': bitrate,
                'width': width,
                'height': height,
-                })
-        self._sort_formats(formats)
-
-        return formats
+            }], rtmp_count)
+        elif proto.startswith('http'):
+            return ([{
+                'url': base + src,
+                'ext': ext or 'flv',
+                'tbr': bitrate,
+                'width': width,
+                'height': height,
+            }], rtmp_count)

    def _live_title(self, name):
        """ Generate the title for a live video """
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
        title = self._html_search_regex(
            r'<title>(.*?) - EPORNER', webpage, 'title')

-        redirect_code = self._html_search_regex(
-            r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
-            webpage, 'redirect_code')
-        redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
+        redirect_url = 'http://www.eporner.com/config5/%s' % video_id
        player_code = self._download_webpage(
            redirect_url, display_id, note='Downloading player config')

@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
-            'age_limit': self._rta_search(webpage),
+            'age_limit': 18,
        }
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
+    compat_urllib_request,
 )
 from ..utils import (
    ExtractorError,
@ -12,6 +13,7 @@ from ..utils import (

 class EscapistIE(InfoExtractor):
    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+    _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
    _TEST = {
        'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
        'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -28,7 +30,9 @@ class EscapistIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage_req = compat_urllib_request.Request(url)
+        webpage_req.add_header('User-Agent', self._USER_AGENT)
+        webpage = self._download_webpage(webpage_req, video_id)

        uploader_id = self._html_search_regex(
            r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
@ -42,25 +46,43 @@ class EscapistIE(InfoExtractor):
        title = raw_title.partition(' : ')[2]

        config_url = compat_urllib_parse.unquote(self._html_search_regex(
-            r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL'))
+            r'''(?x)
+            (?:
+                <param\s+name="flashvars".*?\s+value="config=|
+                flashvars=&quot;config=
+            )
+            (https?://[^"&]+)
+            ''',
+            webpage, 'config URL'))

        formats = []
+        ad_formats = []

-        def _add_format(name, cfgurl, quality):
+        def _add_format(name, cfg_url, quality):
+            cfg_req = compat_urllib_request.Request(cfg_url)
+            cfg_req.add_header('User-Agent', self._USER_AGENT)
            config = self._download_json(
-                cfgurl, video_id,
+                cfg_req, video_id,
                'Downloading ' + name + ' configuration',
                'Unable to download ' + name + ' configuration',
                transform_source=js_to_json)

            playlist = config['playlist']
-            video_url = next(
-                p['url'] for p in playlist
-                if p.get('eventCategory') == 'Video')
-            formats.append({
-                'url': video_url,
+            for p in playlist:
+                if p.get('eventCategory') == 'Video':
+                    ar = formats
+                elif p.get('eventCategory') == 'Video Postroll':
+                    ar = ad_formats
+                else:
+                    continue
+
+                ar.append({
+                    'url': p['url'],
                    'format_id': name,
                    'quality': quality,
+                    'http_headers': {
+                        'User-Agent': self._USER_AGENT,
+                    },
                })

        _add_format('normal', config_url, quality=0)
@ -70,10 +92,12 @@ class EscapistIE(InfoExtractor):
            _add_format('hq', hq_url, quality=1)
        except ExtractorError:
            pass  # That's fine, we'll just use normal quality
-
        self._sort_formats(formats)

-        return {
+        if '/escapist/sales-marketing/' in formats[-1]['url']:
+            raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
+
+        res = {
            'id': video_id,
            'formats': formats,
            'uploader': uploader,
@ -82,3 +106,19 @@ class EscapistIE(InfoExtractor):
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': description,
        }
+
+        if self._downloader.params.get('include_ads') and ad_formats:
+            self._sort_formats(ad_formats)
+            ad_res = {
+                'id': '%s-ad' % video_id,
+                'title': '%s (Postroll)' % title,
+                'formats': ad_formats,
+            }
+            return {
+                '_type': 'playlist',
+                'entries': [res, ad_res],
+                'title': title,
+                'id': video_id,
+            }
+
+        return res
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -1208,6 +1208,8 @@ class GenericIE(InfoExtractor):
            return entries[0]
        else:
            for num, e in enumerate(entries, start=1):
+                # 'url' results don't have a title
+                if e.get('title') is not None:
                    e['title'] = '%s (%d)' % (e['title'], num)
            return {
                '_type': 'playlist',
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@ -0,0 +1,190 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urlparse,
+    compat_urllib_parse,
+)
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class LetvIE(InfoExtractor):
+    _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
+
+    _TESTS = [{
+        'url': 'http://www.letv.com/ptv/vplay/22005890.html',
+        'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
+        'info_dict': {
+            'id': '22005890',
+            'ext': 'mp4',
+            'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
+            'timestamp': 1424747397,
+            'upload_date': '20150224',
+            'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
+        }
+    }, {
+        'url': 'http://www.letv.com/ptv/vplay/1415246.html',
+        'info_dict': {
+            'id': '1415246',
+            'ext': 'mp4',
+            'title': '美人天下01',
+            'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
+        },
+        'expected_warnings': [
+            'publish time'
+        ]
+    }]
+    # http://www.letv.com/ptv/vplay/1118082.html
+    # This video is available only in Mainland China
+
+    @staticmethod
+    def urshift(val, n):
+        return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+    # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
+    def ror(self, param1, param2):
+        _loc3_ = 0
+        while _loc3_ < param2:
+            param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
+            _loc3_ += 1
+        return param1
+
+    def calc_time_key(self, param1):
+        _loc2_ = 773625421
+        _loc3_ = self.ror(param1, _loc2_ % 13)
+        _loc3_ = _loc3_ ^ _loc2_
+        _loc3_ = self.ror(_loc3_, _loc2_ % 17)
+        return _loc3_
+
+    def _real_extract(self, url):
+        media_id = self._match_id(url)
+        page = self._download_webpage(url, media_id)
+        params = {
+            'id': media_id,
+            'platid': 1,
+            'splatid': 101,
+            'format': 1,
+            'tkey': self.calc_time_key(int(time.time())),
+            'domain': 'www.letv.com'
+        }
+        play_json = self._download_json(
+            'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
+            media_id, 'playJson data')
+
+        # Check for errors
+        playstatus = play_json['playstatus']
+        if playstatus['status'] == 0:
+            flag = playstatus['flag']
+            if flag == 1:
+                msg = 'Country %s auth error' % playstatus['country']
+            else:
+                msg = 'Generic error. flag = %d' % flag
+            raise ExtractorError(msg, expected=True)
+
+        playurl = play_json['playurl']
+
+        formats = ['350', '1000', '1300', '720p', '1080p']
+        dispatch = playurl['dispatch']
+
+        urls = []
+        for format_id in formats:
+            if format_id in dispatch:
+                media_url = playurl['domain'][0] + dispatch[format_id][0]
+
+                # Mimic what flvxz.com do
+                url_parts = list(compat_urlparse.urlparse(media_url))
+                qs = dict(compat_urlparse.parse_qs(url_parts[4]))
+                qs.update({
+                    'platid': '14',
+                    'splatid': '1401',
+                    'tss': 'no',
+                    'retry': 1
+                })
+                url_parts[4] = compat_urllib_parse.urlencode(qs)
+                media_url = compat_urlparse.urlunparse(url_parts)
+
+                url_info_dict = {
+                    'url': media_url,
+                    'ext': determine_ext(dispatch[format_id][1])
+                }
+
+                if format_id[-1:] == 'p':
+                    url_info_dict['height'] = format_id[:-1]
+
+                urls.append(url_info_dict)
+
+        publish_time = parse_iso8601(self._html_search_regex(
+            r'发布时间&nbsp;([^<>]+) ', page, 'publish time', fatal=False),
+            delimiter=' ', timezone=datetime.timedelta(hours=8))
+        description = self._html_search_meta('description', page, fatal=False)
+
+        return {
+            'id': media_id,
+            'formats': urls,
+            'title': playurl['title'],
+            'thumbnail': playurl['pic'],
+            'description': description,
+            'timestamp': publish_time,
+        }
+
+
+class LetvTvIE(InfoExtractor):
+    _VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
+    _TESTS = [{
+        'url': 'http://www.letv.com/tv/46177.html',
+        'info_dict': {
+            'id': '46177',
+            'title': '美人天下',
+            'description': 'md5:395666ff41b44080396e59570dbac01c'
+        },
+        'playlist_count': 35
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        page = self._download_webpage(url, playlist_id)
+
+        media_urls = list(set(re.findall(
+            r'http://www.letv.com/ptv/vplay/\d+.html', page)))
+        entries = [self.url_result(media_url, ie='Letv')
+                   for media_url in media_urls]
+
+        title = self._html_search_meta('keywords', page,
+                                       fatal=False).split('，')[0]
+        description = self._html_search_meta('description', page, fatal=False)
+
+        return self.playlist_result(entries, playlist_id, playlist_title=title,
+                                    playlist_description=description)
+
+
+class LetvPlaylistIE(LetvTvIE):
+    _VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
+    _TESTS = [{
+        'url': 'http://tv.letv.com/izt/wuzetian/index.html',
+        'info_dict': {
+            'id': 'wuzetian',
+            'title': '武媚娘传奇',
+            'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
+        },
+        # This playlist contains some extra videos other than the drama itself
+        'playlist_mincount': 96
+    }, {
+        'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
+        'info_dict': {
+            'id': 'lswjzzjc',
+            # The title should be "劲舞青春", but I can't find a simple way to
+            # determine the playlist title
+            'title': '乐视午间自制剧场',
+            'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
+        },
+        'playlist_mincount': 7
+    }]
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
    IE_NAME = 'mitele.es'
    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
        'info_dict': {
@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
            'display_id': 'programa-144',
            'duration': 2913,
        },
-    }
+    }]

    def _real_extract(self, url):
        episode = self._match_id(url)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    float_or_none,
@ -158,7 +159,9 @@ class NRKTVIE(InfoExtractor):
    def _get_subtitles(self, subtitlesurl, video_id, baseurl):
        url = "%s%s" % (baseurl, subtitlesurl)
        self._debug_print('%s: Subtitle url: %s' % (video_id, url))
-        captions = self._download_xml(url, video_id, 'Downloading subtitles')
+        captions = self._download_xml(
+            url, video_id, 'Downloading subtitles',
+            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
        lang = captions.get('lang', 'no')
        ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
        srt = ''
@ -167,8 +170,7 @@ class NRKTVIE(InfoExtractor):
            duration = parse_duration(p.get('dur'))
            starttime = self._seconds2str(begin)
            endtime = self._seconds2str(begin + duration)
-            text = '\n'.join(p.itertext())
-            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
+            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
        return {lang: [
            {'ext': 'ttml', 'url': url},
            {'ext': 'srt', 'data': srt},
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@ -146,7 +146,7 @@ class RTLnowIE(InfoExtractor):
                mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
                if mobj:
                    fmt = {
-                        'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
+                        'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
                        'play_path': 'mp4:' + mobj.group('play_path'),
                        'page_url': url,
                        'player_url': video_page_url + 'includes/vodplayer.swf',
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@ -119,7 +119,8 @@ class RTVEALaCartaIE(InfoExtractor):
        subs = self._download_json(
            sub_file + '.json', video_id,
            'Downloading subtitles info')['page']['items']
-        return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+        return dict(
+            (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
            for s in subs)


--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@ -6,9 +6,9 @@ from .mitele import MiTeleIE

 class TelecincoIE(MiTeleIE):
    IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
        'info_dict': {
            'id': 'MDSVID20141015_0058',
@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
            'title': 'Con Martín Berasategui, hacer un bacalao al ...',
            'duration': 662,
        },
-    }
+    }, {
+        'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+        'only_matching': True,
+    }]
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
                'title': 'Servicezeit',
                'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
                'upload_date': '20140310',
+                'is_live': False
            },
            'params': {
                'skip_download': True,
@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
                'title': 'Marga Spiegel ist tot',
                'description': 'md5:2309992a6716c347891c045be50992e4',
                'upload_date': '20140311',
+                'is_live': False
            },
            'params': {
                'skip_download': True,
@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
                'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
                'description': 'md5:2309992a6716c347891c045be50992e4',
                'upload_date': '20091129',
+                'is_live': False
            },
        },
        {
@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
                'title': 'Flavia Coelho: Amar é Amar',
                'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
                'upload_date': '20140717',
+                'is_live': False
            },
        },
        {
@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
            'info_dict': {
                'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
            }
+        },
+        {
+            'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
+            'info_dict': {
+                'id': 'mdb-103364',
+                'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
+                'ext': 'flv',
+                'upload_date': '20150212',
+                'is_live': True
+            },
+            'params': {
+                'skip_download': True,
+            },
        }
    ]

@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
        video_url = flashvars['dslSrc'][0]
        title = flashvars['trackerClipTitle'][0]
        thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
+        is_live = flashvars.get('isLive', ['0'])[0] == '1'
+
+        if is_live:
+            title = self._live_title(title)

        if 'trackerClipAirTime' in flashvars:
            upload_date = flashvars['trackerClipAirTime'][0]
@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
        if video_url.endswith('.f4m'):
            video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
            ext = 'flv'
+        elif video_url.endswith('.smil'):
+            fmt = self._extract_smil_formats(video_url, page_id)[0]
+            video_url = fmt['url']
+            sep = '&' if '?' in video_url else '?'
+            video_url += sep
+            video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
+            ext = fmt['ext']
        else:
            ext = determine_ext(video_url)

@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
+            'is_live': is_live
        }


--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -272,6 +272,10 @@ def parseOpts(overrideArguments=None):
        '--no-playlist',
        action='store_true', dest='noplaylist', default=False,
        help='If the URL refers to a video and a playlist, download only the video.')
+    selection.add_option(
+        '--yes-playlist',
+        action='store_false', dest='noplaylist', default=False,
+        help='If the URL refers to a video and a playlist, download the playlist.')
    selection.add_option(
        '--age-limit',
        metavar='YEARS', dest='age_limit', default=None, type=int,
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -54,7 +54,7 @@ from .compat import (
 compiled_regex_type = type(re.compile(''))

 std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
@ -304,6 +304,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
        # Common case of "Foreign band name - English song title"
        if restricted and result.startswith('-_'):
            result = result[2:]
+        if result.startswith('-'):
+            result = '_' + result[len('-'):]
        if not result:
            result = '_'
    return result
@ -1288,6 +1290,7 @@ def parse_duration(s):
            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
            (?P<only_hours>[0-9.]+)\s*(?:hours?)|

+            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
            (?:
                (?:
                    (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
@ -1306,10 +1309,14 @@ def parse_duration(s):
        return float_or_none(m.group('only_hours'), invscale=60 * 60)
    if m.group('secs'):
        res += int(m.group('secs'))
+    if m.group('mins_reversed'):
+        res += int(m.group('mins_reversed')) * 60
    if m.group('mins'):
        res += int(m.group('mins')) * 60
    if m.group('hours'):
        res += int(m.group('hours')) * 60 * 60
+    if m.group('hours_reversed'):
+        res += int(m.group('hours_reversed')) * 60 * 60
    if m.group('days'):
        res += int(m.group('days')) * 24 * 60 * 60
    if m.group('ms'):
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.24'
+__version__ = '2015.02.26.2'
Author	SHA1	Message	Date
Philipp Hagemeister	9724e5d336	release 2015.02.26.2	2015-02-26 09:45:11 +01:00
Philipp Hagemeister	63a562f95e	[escapist] Detect IP blocking and use another UA (Fixes #5069 )	2015-02-26 09:19:26 +01:00
Philipp Hagemeister	5c340b0387	release 2015.02.26.1	2015-02-26 01:47:16 +01:00
Philipp Hagemeister	1c6510f57a	[Makefile] clean pyc files in clean target	2015-02-26 01:47:12 +01:00
Philipp Hagemeister	2a15a98a6a	[rmtp] Encode filename before invoking subprocess This fixes #5066. Reproducible with LC_ALL=C youtube-dl "http://www.prosieben.de/tv/germanys-next-topmodel/video/playlist/ganze-folge-episode-2-das-casting-in-muenchen"	2015-02-26 01:44:20 +01:00
Philipp Hagemeister	72a406e7aa	[extractor/common] Pass in video_id (#5057 )	2015-02-26 01:35:43 +01:00
Philipp Hagemeister	feccc3ff37	Merge remote-tracking branch 'aajanki/wdr_live'	2015-02-26 01:34:01 +01:00
Philipp Hagemeister	265bfa2c79	[letv] Simplify	2015-02-26 01:30:18 +01:00
Philipp Hagemeister	8faf9b9b41	Merge remote-tracking branch 'yan12125/IE_Letv'	2015-02-26 01:26:55 +01:00
Philipp Hagemeister	84be7c230c	Cred @duncankl for airmozilla	2015-02-26 01:25:54 +01:00
Philipp Hagemeister	3e675fabe0	[airmozilla] Be more tolerant when nonessential items are missing (#5030 )	2015-02-26 01:25:00 +01:00
Philipp Hagemeister	cd5b4b0bc2	Merge remote-tracking branch 'duncankl/airmozilla'	2015-02-26 01:15:08 +01:00
Philipp Hagemeister	7ef822021b	Merge remote-tracking branch 'mmue/fix-rtlnow'	2015-02-26 01:13:03 +01:00
Philipp Hagemeister	9a48926a57	[escapist] Add support for advertisements	2015-02-26 00:59:53 +01:00
Philipp Hagemeister	13cd97f3df	release 2015.02.26	2015-02-26 00:42:02 +01:00
Philipp Hagemeister	183139340b	[utils] Bump our user agent	2015-02-26 00:40:12 +01:00
Philipp Hagemeister	1c69bca258	[escapist] Fix config URL matching	2015-02-26 00:24:54 +01:00
Jaime Marquínez Ferrándiz	c10ea454dc	[telecinco] Recognize more urls (closes #5065 )	2015-02-25 23:52:54 +01:00
Markus Müller	9504fc21b5	Fix the RTL extractor for new episodes by using a different hostname	2015-02-25 23:27:19 +01:00
Jaime Marquínez Ferrándiz	13d8fbef30	[generic] Don't set the 'title' if it's not defined in the entry (closes #5061 ) Some of them may be an 'url' result, which in general don't have the 'title' field.	2015-02-25 17:56:51 +01:00
Antti Ajanki	b8988b63a6	[wdr] Download a live stream	2015-02-24 21:23:59 +02:00
Antti Ajanki	5eaaeb7c31	[f4m] Tolerate missed fragments on live streams	2015-02-24 21:22:59 +02:00
Antti Ajanki	c4f8c453ae	[f4m] Refresh fragment list periodically on live streams	2015-02-24 21:22:59 +02:00
Antti Ajanki	6f4ba54079	[extractor/common] Extract HTTP (possibly f4m) URLs from a .smil file	2015-02-24 21:22:59 +02:00
Antti Ajanki	637570326b	[extractor/common] Extract the first of a seq of videos in a .smil file	2015-02-24 21:22:59 +02:00
Sergey M․	37f885650c	[eporner] Simplify and hardcode age limit	2015-02-25 01:08:54 +06:00
Sergey M.	c8c34ccb20	Merge pull request #5056 from logon84/master Eporner Fix (Closes #5050)	2015-02-25 01:05:35 +06:00
logon84	e765ed3a9c	[eporner] Fix redirect_code error	2015-02-24 19:41:46 +01:00
Yen Chi Hsuan	677063594e	[Letv] Update testcases	2015-02-25 02:10:55 +08:00
logon84	59c7cbd482	Update eporner.py Updated to work. Old version shows an error about being unable to extract "redirect_code"	2015-02-24 18:58:32 +01:00
Yen Chi Hsuan	570311610e	[Letv] Add playlist support	2015-02-25 01:26:44 +08:00
Sergey M․	41b264e77c	[nrktv] Workaround subtitles conversion issues on python 2.6 (Closes #5036 )	2015-02-24 23:06:44 +06:00
Philipp Hagemeister	df4bd0d53f	[options] Add --yes-playlist as inverse of --no-playlist (Fixes #5051 )	2015-02-24 17:25:02 +01:00
Yen Chi Hsuan	7f09a662a0	[Letv] Add new extractor. Single video only	2015-02-24 23:58:21 +08:00
Philipp Hagemeister	4f3b21e1c7	release 2015.02.24.2	2015-02-24 16:34:42 +01:00
Philipp Hagemeister	54233c9080	[escapist] Support JavaScript player (Fixes #5034 )	2015-02-24 16:33:07 +01:00
Philipp Hagemeister	db8e13ef71	release 2015.02.24.1	2015-02-24 11:38:21 +01:00
Philipp Hagemeister	5a42414b9c	[utils] Prevent hyphen at beginning of filename (Fixes #5035 )	2015-02-24 11:38:01 +01:00
Philipp Hagemeister	9c665ab72e	[rtve] PEP8	2015-02-24 11:37:27 +01:00
Duncan Keall	1b40dc92eb	[airmozilla] Add new extractor	2015-02-23 16:10:08 +13:00