release 2015.02.26.2

[escapist] Detect IP blocking and use another UA (Fixes #5069 )
release 2015.02.26.1
2015-02-26 09:45:11 +01:00 · 2015-02-26 09:19:26 +01:00 · 2015-02-26 01:47:16 +01:00 · 2015-02-26 01:47:12 +01:00 · 2015-02-26 01:44:20 +01:00 · 2015-02-26 01:35:43 +01:00
23 changed files with 527 additions and 84 deletions
--- a/1
+++ b/1
@ -112,3 +112,4 @@ Frans de Jonge
 Robin de Rooij
 Ryan Schmidt
 Leslie P. Polzer
 Duncan Keall
--- a/1
+++ b/1
@ -2,6 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
 clean:
 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
 	find -name "*.pyc" -delete
 PREFIX ?= /usr/local
 BINDIR ?= $(PREFIX)/bin
--- a/README.md
+++ b/README.md
@ -139,6 +139,8 @@ which means you can modify it, redistribute it or use it however you like.
                                     dislike_count <? 50 & description" .
    --no-playlist                    If the URL refers to a video and a
                                     playlist, download only the video.
    --yes-playlist                   If the URL refers to a video and a
                                     playlist, download the playlist.
    --age-limit YEARS                download only videos suitable for the given
                                     age
    --download-archive FILE          Download only videos not listed in the
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -17,6 +17,7 @@
 - **AdultSwim**
 - **Aftenposten**
 - **Aftonbladet**
 - **AirMozilla**
 - **AlJazeera**
 - **Allocine**
 - **AlphaPorno**
@ -220,6 +221,9 @@
 - **Ku6**
 - **la7.tv**
 - **Laola1Tv**
 - **Letv**
 - **LetvPlaylist**
 - **LetvTv**
 - **lifenews**: LIFE | NEWS
 - **LiveLeak**
 - **livestream**
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(
            sanitize_filename('New World record at 0:12:34'),
            'New World record at 0_12_34')
        self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
        self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
        forbidden = '"\0\\/'
        for fc in forbidden:
@ -244,6 +246,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_duration('2.5 hours'), 9000)
        self.assertEqual(parse_duration('02:03:04'), 7384)
        self.assertEqual(parse_duration('01:02:03:04'), 93784)
        self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
    def test_fix_xml_ampersands(self):
        self.assertEqual(
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@ -11,6 +11,7 @@ from .common import FileDownloader
 from .http import HttpFD
 from ..compat import (
    compat_urlparse,
    compat_urllib_error,
 )
 from ..utils import (
    struct_pack,
@ -121,7 +122,8 @@ class FlvReader(io.BytesIO):
        self.read_unsigned_int()  # BootstrapinfoVersion
        # Profile,Live,Update,Reserved
-        self.read(1)
+        flags = self.read_unsigned_char()
        live = flags & 0x20 != 0
        # time scale
        self.read_unsigned_int()
        # CurrentMediaTime
@ -160,6 +162,7 @@ class FlvReader(io.BytesIO):
        return {
            'segments': segments,
            'fragments': fragments,
            'live': live,
        }
    def read_bootstrap_info(self):
@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
    for segment, fragments_count in segment_run_table['segment_run']:
        for _ in range(fragments_count):
            res.append((segment, next(fragments_counter)))
    if boot_info['live']:
        res = res[-2:]
    return res
@ -246,6 +253,38 @@ class F4mFD(FileDownloader):
            self.report_error('Unsupported DRM')
        return media
    def _get_bootstrap_from_url(self, bootstrap_url):
        bootstrap = self.ydl.urlopen(bootstrap_url).read()
        return read_bootstrap_info(bootstrap)
    def _update_live_fragments(self, bootstrap_url, latest_fragment):
        fragments_list = []
        retries = 30
        while (not fragments_list) and (retries > 0):
            boot_info = self._get_bootstrap_from_url(bootstrap_url)
            fragments_list = build_fragments_list(boot_info)
            fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
            if not fragments_list:
                # Retry after a while
                time.sleep(5.0)
                retries -= 1
        if not fragments_list:
            self.report_error('Failed to update fragments')
        return fragments_list
    def _parse_bootstrap_node(self, node, base_url):
        if node.text is None:
            bootstrap_url = compat_urlparse.urljoin(
                base_url, node.attrib['url'])
            boot_info = self._get_bootstrap_from_url(bootstrap_url)
        else:
            bootstrap_url = None
            bootstrap = base64.b64decode(node.text)
            boot_info = read_bootstrap_info(bootstrap)
        return (boot_info, bootstrap_url)
    def real_download(self, filename, info_dict):
        man_url = info_dict['url']
        requested_bitrate = info_dict.get('tbr')
@ -265,18 +304,13 @@ class F4mFD(FileDownloader):
        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
        bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        if bootstrap_node.text is None:
+        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
-            bootstrap_url = compat_urlparse.urljoin(
+        live = boot_info['live']
                base_url, bootstrap_node.attrib['url'])
            bootstrap = self.ydl.urlopen(bootstrap_url).read()
        else:
            bootstrap = base64.b64decode(bootstrap_node.text)
        metadata_node = media.find(_add_ns('metadata'))
        if metadata_node is not None:
            metadata = base64.b64decode(metadata_node.text)
        else:
            metadata = None
        boot_info = read_bootstrap_info(bootstrap)
        fragments_list = build_fragments_list(boot_info)
        if self.params.get('test', False):
@ -301,7 +335,8 @@ class F4mFD(FileDownloader):
        (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
        write_flv_header(dest_stream)
-        write_metadata_tag(dest_stream, metadata)
+        if not live:
            write_metadata_tag(dest_stream, metadata)
        # This dict stores the download progress, it's updated by the progress
        # hook
@ -348,24 +383,45 @@ class F4mFD(FileDownloader):
        http_dl.add_progress_hook(frag_progress_hook)
        frags_filenames = []
-        for (seg_i, frag_i) in fragments_list:
+        while fragments_list:
            seg_i, frag_i = fragments_list.pop(0)
            name = 'Seg%d-Frag%d' % (seg_i, frag_i)
            url = base_url + name
            if akamai_pv:
                url += '?' + akamai_pv.strip(';')
            frag_filename = '%s-%s' % (tmpfilename, name)
-            success = http_dl.download(frag_filename, {'url': url})
+            try:
-            if not success:
+                success = http_dl.download(frag_filename, {'url': url})
-                return False
+                if not success:
-            with open(frag_filename, 'rb') as down:
+                    return False
-                down_data = down.read()
+                with open(frag_filename, 'rb') as down:
-                reader = FlvReader(down_data)
+                    down_data = down.read()
-                while True:
+                    reader = FlvReader(down_data)
-                    _, box_type, box_data = reader.read_box_info()
+                    while True:
-                    if box_type == b'mdat':
+                        _, box_type, box_data = reader.read_box_info()
-                        dest_stream.write(box_data)
+                        if box_type == b'mdat':
-                        break
+                            dest_stream.write(box_data)
-            frags_filenames.append(frag_filename)
+                            break
                if live:
                    os.remove(frag_filename)
                else:
                    frags_filenames.append(frag_filename)
            except (compat_urllib_error.HTTPError, ) as err:
                if live and (err.code == 404 or err.code == 410):
                    # We didn't keep up with the live window. Continue
                    # with the next available fragment.
                    msg = 'Fragment %d unavailable' % frag_i
                    self.report_warning(msg)
                    fragments_list = []
                else:
                    raise
            if not fragments_list and live and bootstrap_url:
                fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
                total_frags += len(fragments_list)
                if fragments_list and (fragments_list[0][1] > frag_i + 1):
                    msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
                    self.report_warning(msg)
        dest_stream.close()
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@ -119,7 +119,9 @@ class RtmpFD(FileDownloader):
        # Download using rtmpdump. rtmpdump returns exit code 2 when
        # the connection was interrumpted and resuming appears to be
        # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
+        basic_args = [
            'rtmpdump', '--verbose', '-r', url,
            '-o', encodeFilename(tmpfilename, True)]
        if player_url is not None:
            basic_args += ['--swfVfy', player_url]
        if page_url is not None:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -8,6 +8,7 @@ from .adobetv import AdobeTVIE
 from .adultswim import AdultSwimIE
 from .aftenposten import AftenpostenIE
 from .aftonbladet import AftonbladetIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
 from .anitube import AnitubeIE
@ -237,6 +238,11 @@ from .krasview import KrasViewIE
 from .ku6 import Ku6IE
 from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
 from .letv import (
    LetvIE,
    LetvTvIE,
    LetvPlaylistIE
 )
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
 from .livestream import (
--- a/youtube_dl/extractor/airmozilla.py
+++ b/youtube_dl/extractor/airmozilla.py
@ -0,0 +1,74 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    parse_duration,
    parse_iso8601,
 )
 class AirMozillaIE(InfoExtractor):
    _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
    _TEST = {
        'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
        'md5': '2e3e7486ba5d180e829d453875b9b8bf',
        'info_dict': {
            'id': '6x4q2w',
            'ext': 'mp4',
            'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
            'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
            'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
            'timestamp': 1422487800,
            'upload_date': '20150128',
            'location': 'SFO Commons',
            'duration': 3780,
            'view_count': int,
            'categories': ['Main'],
        }
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
        embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
        jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
        metadata = self._parse_json(jwconfig, video_id)
        formats = [{
            'url': source['file'],
            'ext': source['type'],
            'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
            'format': source['label'],
            'height': int(source['label'].rstrip('p')),
        } for source in metadata['playlist'][0]['sources']]
        self._sort_formats(formats)
        view_count = int_or_none(self._html_search_regex(
            r'Views since archived: ([0-9]+)',
            webpage, 'view count', fatal=False))
        timestamp = parse_iso8601(self._html_search_regex(
            r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
        duration = parse_duration(self._search_regex(
            r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
            webpage, 'duration', fatal=False))
        return {
            'id': video_id,
            'title': self._og_search_title(webpage),
            'formats': formats,
            'url': self._og_search_url(webpage),
            'display_id': display_id,
            'thumbnail': metadata['playlist'][0].get('image'),
            'description': self._og_search_description(webpage),
            'timestamp': timestamp,
            'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
            'duration': duration,
            'view_count': view_count,
            'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -921,39 +921,57 @@ class InfoExtractor(object):
        formats = []
        rtmp_count = 0
-        for video in smil.findall('./body/switch/video'):
+        if smil.findall('./body/seq/video'):
-            src = video.get('src')
+            video = smil.findall('./body/seq/video')[0]
-            if not src:
+            fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
-                continue
+            formats.extend(fmts)
-            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+        else:
-            width = int_or_none(video.get('width'))
+            for video in smil.findall('./body/switch/video'):
-            height = int_or_none(video.get('height'))
+                fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
-            proto = video.get('proto')
+                formats.extend(fmts)
-            if not proto:
+
                if base:
                    if base.startswith('rtmp'):
                        proto = 'rtmp'
                    elif base.startswith('http'):
                        proto = 'http'
            ext = video.get('ext')
            if proto == 'm3u8':
                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
            elif proto == 'rtmp':
                rtmp_count += 1
                streamer = video.get('streamer') or base
                formats.append({
                    'url': streamer,
                    'play_path': src,
                    'ext': 'flv',
                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
                    'tbr': bitrate,
                    'width': width,
                    'height': height,
                })
        self._sort_formats(formats)
        return formats
    def _parse_smil_video(self, video, video_id, base, rtmp_count):
        src = video.get('src')
        if not src:
            return ([], rtmp_count)
        bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
        width = int_or_none(video.get('width'))
        height = int_or_none(video.get('height'))
        proto = video.get('proto')
        if not proto:
            if base:
                if base.startswith('rtmp'):
                    proto = 'rtmp'
                elif base.startswith('http'):
                    proto = 'http'
        ext = video.get('ext')
        if proto == 'm3u8':
            return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
        elif proto == 'rtmp':
            rtmp_count += 1
            streamer = video.get('streamer') or base
            return ([{
                'url': streamer,
                'play_path': src,
                'ext': 'flv',
                'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
                'tbr': bitrate,
                'width': width,
                'height': height,
            }], rtmp_count)
        elif proto.startswith('http'):
            return ([{
                'url': base + src,
                'ext': ext or 'flv',
                'tbr': bitrate,
                'width': width,
                'height': height,
            }], rtmp_count)
    def _live_title(self, name):
        """ Generate the title for a live video """
        now = datetime.datetime.now()
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
        title = self._html_search_regex(
            r'<title>(.*?) - EPORNER', webpage, 'title')
-        redirect_code = self._html_search_regex(
+        redirect_url = 'http://www.eporner.com/config5/%s' % video_id
            r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
            webpage, 'redirect_code')
        redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
        player_code = self._download_webpage(
            redirect_url, display_id, note='Downloading player config')
@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
-            'age_limit': self._rta_search(webpage),
+            'age_limit': 18,
        }
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
 )
 from ..utils import (
    ExtractorError,
@ -12,6 +13,7 @@ from ..utils import (
 class EscapistIE(InfoExtractor):
    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
    _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
    _TEST = {
        'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
        'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -28,7 +30,9 @@ class EscapistIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage_req = compat_urllib_request.Request(url)
        webpage_req.add_header('User-Agent', self._USER_AGENT)
        webpage = self._download_webpage(webpage_req, video_id)
        uploader_id = self._html_search_regex(
            r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
@ -42,26 +46,44 @@ class EscapistIE(InfoExtractor):
        title = raw_title.partition(' : ')[2]
        config_url = compat_urllib_parse.unquote(self._html_search_regex(
-            r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL'))
+            r'''(?x)
            (?:
                <param\s+name="flashvars".*?\s+value="config=|
                flashvars=&quot;config=
            )
            (https?://[^"&]+)
            ''',
            webpage, 'config URL'))
        formats = []
        ad_formats = []
-        def _add_format(name, cfgurl, quality):
+        def _add_format(name, cfg_url, quality):
            cfg_req = compat_urllib_request.Request(cfg_url)
            cfg_req.add_header('User-Agent', self._USER_AGENT)
            config = self._download_json(
-                cfgurl, video_id,
+                cfg_req, video_id,
                'Downloading ' + name + ' configuration',
                'Unable to download ' + name + ' configuration',
                transform_source=js_to_json)
            playlist = config['playlist']
-            video_url = next(
+            for p in playlist:
-                p['url'] for p in playlist
+                if p.get('eventCategory') == 'Video':
-                if p.get('eventCategory') == 'Video')
+                    ar = formats
-            formats.append({
+                elif p.get('eventCategory') == 'Video Postroll':
-                'url': video_url,
+                    ar = ad_formats
-                'format_id': name,
+                else:
-                'quality': quality,
+                    continue
-            })
+
                ar.append({
                    'url': p['url'],
                    'format_id': name,
                    'quality': quality,
                    'http_headers': {
                        'User-Agent': self._USER_AGENT,
                    },
                })
        _add_format('normal', config_url, quality=0)
        hq_url = (config_url +
@ -70,10 +92,12 @@ class EscapistIE(InfoExtractor):
            _add_format('hq', hq_url, quality=1)
        except ExtractorError:
            pass  # That's fine, we'll just use normal quality
        self._sort_formats(formats)
-        return {
+        if '/escapist/sales-marketing/' in formats[-1]['url']:
            raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
        res = {
            'id': video_id,
            'formats': formats,
            'uploader': uploader,
@ -82,3 +106,19 @@ class EscapistIE(InfoExtractor):
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': description,
        }
        if self._downloader.params.get('include_ads') and ad_formats:
            self._sort_formats(ad_formats)
            ad_res = {
                'id': '%s-ad' % video_id,
                'title': '%s (Postroll)' % title,
                'formats': ad_formats,
            }
            return {
                '_type': 'playlist',
                'entries': [res, ad_res],
                'title': title,
                'id': video_id,
            }
        return res
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -1208,7 +1208,9 @@ class GenericIE(InfoExtractor):
            return entries[0]
        else:
            for num, e in enumerate(entries, start=1):
-                e['title'] = '%s (%d)' % (e['title'], num)
+                # 'url' results don't have a title
                if e.get('title') is not None:
                    e['title'] = '%s (%d)' % (e['title'], num)
            return {
                '_type': 'playlist',
                'entries': entries,
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@ -0,0 +1,190 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import datetime
 import re
 import time
 from .common import InfoExtractor
 from ..compat import (
    compat_urlparse,
    compat_urllib_parse,
 )
 from ..utils import (
    determine_ext,
    ExtractorError,
    parse_iso8601,
 )
 class LetvIE(InfoExtractor):
    _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
    _TESTS = [{
        'url': 'http://www.letv.com/ptv/vplay/22005890.html',
        'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
        'info_dict': {
            'id': '22005890',
            'ext': 'mp4',
            'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
            'timestamp': 1424747397,
            'upload_date': '20150224',
            'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
        }
    }, {
        'url': 'http://www.letv.com/ptv/vplay/1415246.html',
        'info_dict': {
            'id': '1415246',
            'ext': 'mp4',
            'title': '美人天下01',
            'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
        },
        'expected_warnings': [
            'publish time'
        ]
    }]
    # http://www.letv.com/ptv/vplay/1118082.html
    # This video is available only in Mainland China
    @staticmethod
    def urshift(val, n):
        return val >> n if val >= 0 else (val + 0x100000000) >> n
    # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
    def ror(self, param1, param2):
        _loc3_ = 0
        while _loc3_ < param2:
            param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
            _loc3_ += 1
        return param1
    def calc_time_key(self, param1):
        _loc2_ = 773625421
        _loc3_ = self.ror(param1, _loc2_ % 13)
        _loc3_ = _loc3_ ^ _loc2_
        _loc3_ = self.ror(_loc3_, _loc2_ % 17)
        return _loc3_
    def _real_extract(self, url):
        media_id = self._match_id(url)
        page = self._download_webpage(url, media_id)
        params = {
            'id': media_id,
            'platid': 1,
            'splatid': 101,
            'format': 1,
            'tkey': self.calc_time_key(int(time.time())),
            'domain': 'www.letv.com'
        }
        play_json = self._download_json(
            'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
            media_id, 'playJson data')
        # Check for errors
        playstatus = play_json['playstatus']
        if playstatus['status'] == 0:
            flag = playstatus['flag']
            if flag == 1:
                msg = 'Country %s auth error' % playstatus['country']
            else:
                msg = 'Generic error. flag = %d' % flag
            raise ExtractorError(msg, expected=True)
        playurl = play_json['playurl']
        formats = ['350', '1000', '1300', '720p', '1080p']
        dispatch = playurl['dispatch']
        urls = []
        for format_id in formats:
            if format_id in dispatch:
                media_url = playurl['domain'][0] + dispatch[format_id][0]
                # Mimic what flvxz.com do
                url_parts = list(compat_urlparse.urlparse(media_url))
                qs = dict(compat_urlparse.parse_qs(url_parts[4]))
                qs.update({
                    'platid': '14',
                    'splatid': '1401',
                    'tss': 'no',
                    'retry': 1
                })
                url_parts[4] = compat_urllib_parse.urlencode(qs)
                media_url = compat_urlparse.urlunparse(url_parts)
                url_info_dict = {
                    'url': media_url,
                    'ext': determine_ext(dispatch[format_id][1])
                }
                if format_id[-1:] == 'p':
                    url_info_dict['height'] = format_id[:-1]
                urls.append(url_info_dict)
        publish_time = parse_iso8601(self._html_search_regex(
            r'发布时间&nbsp;([^<>]+) ', page, 'publish time', fatal=False),
            delimiter=' ', timezone=datetime.timedelta(hours=8))
        description = self._html_search_meta('description', page, fatal=False)
        return {
            'id': media_id,
            'formats': urls,
            'title': playurl['title'],
            'thumbnail': playurl['pic'],
            'description': description,
            'timestamp': publish_time,
        }
 class LetvTvIE(InfoExtractor):
    _VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
    _TESTS = [{
        'url': 'http://www.letv.com/tv/46177.html',
        'info_dict': {
            'id': '46177',
            'title': '美人天下',
            'description': 'md5:395666ff41b44080396e59570dbac01c'
        },
        'playlist_count': 35
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        page = self._download_webpage(url, playlist_id)
        media_urls = list(set(re.findall(
            r'http://www.letv.com/ptv/vplay/\d+.html', page)))
        entries = [self.url_result(media_url, ie='Letv')
                   for media_url in media_urls]
        title = self._html_search_meta('keywords', page,
                                       fatal=False).split('，')[0]
        description = self._html_search_meta('description', page, fatal=False)
        return self.playlist_result(entries, playlist_id, playlist_title=title,
                                    playlist_description=description)
 class LetvPlaylistIE(LetvTvIE):
    _VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
    _TESTS = [{
        'url': 'http://tv.letv.com/izt/wuzetian/index.html',
        'info_dict': {
            'id': 'wuzetian',
            'title': '武媚娘传奇',
            'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
        },
        # This playlist contains some extra videos other than the drama itself
        'playlist_mincount': 96
    }, {
        'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
        'info_dict': {
            'id': 'lswjzzjc',
            # The title should be "劲舞青春", but I can't find a simple way to
            # determine the playlist title
            'title': '乐视午间自制剧场',
            'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
        },
        'playlist_mincount': 7
    }]
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
    IE_NAME = 'mitele.es'
    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
        'info_dict': {
@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
            'display_id': 'programa-144',
            'duration': 2913,
        },
-    }
+    }]
    def _real_extract(self, url):
        episode = self._match_id(url)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    float_or_none,
@ -158,7 +159,9 @@ class NRKTVIE(InfoExtractor):
    def _get_subtitles(self, subtitlesurl, video_id, baseurl):
        url = "%s%s" % (baseurl, subtitlesurl)
        self._debug_print('%s: Subtitle url: %s' % (video_id, url))
-        captions = self._download_xml(url, video_id, 'Downloading subtitles')
+        captions = self._download_xml(
            url, video_id, 'Downloading subtitles',
            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
        lang = captions.get('lang', 'no')
        ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
        srt = ''
@ -167,8 +170,7 @@ class NRKTVIE(InfoExtractor):
            duration = parse_duration(p.get('dur'))
            starttime = self._seconds2str(begin)
            endtime = self._seconds2str(begin + duration)
-            text = '\n'.join(p.itertext())
+            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
        return {lang: [
            {'ext': 'ttml', 'url': url},
            {'ext': 'srt', 'data': srt},
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@ -146,7 +146,7 @@ class RTLnowIE(InfoExtractor):
                mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
                if mobj:
                    fmt = {
-                        'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
+                        'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
                        'play_path': 'mp4:' + mobj.group('play_path'),
                        'page_url': url,
                        'player_url': video_page_url + 'includes/vodplayer.swf',
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@ -119,7 +119,8 @@ class RTVEALaCartaIE(InfoExtractor):
        subs = self._download_json(
            sub_file + '.json', video_id,
            'Downloading subtitles info')['page']['items']
-        return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+        return dict(
            (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
            for s in subs)
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@ -6,9 +6,9 @@ from .mitele import MiTeleIE
 class TelecincoIE(MiTeleIE):
    IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
        'info_dict': {
            'id': 'MDSVID20141015_0058',
@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
            'title': 'Con Martín Berasategui, hacer un bacalao al ...',
            'duration': 662,
        },
-    }
+    }, {
        'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
        'only_matching': True,
    }]
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
                'title': 'Servicezeit',
                'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
                'upload_date': '20140310',
                'is_live': False
            },
            'params': {
                'skip_download': True,
@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
                'title': 'Marga Spiegel ist tot',
                'description': 'md5:2309992a6716c347891c045be50992e4',
                'upload_date': '20140311',
                'is_live': False
            },
            'params': {
                'skip_download': True,
@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
                'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
                'description': 'md5:2309992a6716c347891c045be50992e4',
                'upload_date': '20091129',
                'is_live': False
            },
        },
        {
@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
                'title': 'Flavia Coelho: Amar é Amar',
                'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
                'upload_date': '20140717',
                'is_live': False
            },
        },
        {
@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
            'info_dict': {
                'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
            }
        },
        {
            'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
            'info_dict': {
                'id': 'mdb-103364',
                'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
                'ext': 'flv',
                'upload_date': '20150212',
                'is_live': True
            },
            'params': {
                'skip_download': True,
            },
        }
    ]
@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
        video_url = flashvars['dslSrc'][0]
        title = flashvars['trackerClipTitle'][0]
        thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
        is_live = flashvars.get('isLive', ['0'])[0] == '1'
        if is_live:
            title = self._live_title(title)
        if 'trackerClipAirTime' in flashvars:
            upload_date = flashvars['trackerClipAirTime'][0]
@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
        if video_url.endswith('.f4m'):
            video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
            ext = 'flv'
        elif video_url.endswith('.smil'):
            fmt = self._extract_smil_formats(video_url, page_id)[0]
            video_url = fmt['url']
            sep = '&' if '?' in video_url else '?'
            video_url += sep
            video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
            ext = fmt['ext']
        else:
            ext = determine_ext(video_url)
@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'is_live': is_live
        }
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -272,6 +272,10 @@ def parseOpts(overrideArguments=None):
        '--no-playlist',
        action='store_true', dest='noplaylist', default=False,
        help='If the URL refers to a video and a playlist, download only the video.')
    selection.add_option(
        '--yes-playlist',
        action='store_false', dest='noplaylist', default=False,
        help='If the URL refers to a video and a playlist, download the playlist.')
    selection.add_option(
        '--age-limit',
        metavar='YEARS', dest='age_limit', default=None, type=int,
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -54,7 +54,7 @@ from .compat import (
 compiled_regex_type = type(re.compile(''))
 std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
@ -304,6 +304,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
        # Common case of "Foreign band name - English song title"
        if restricted and result.startswith('-_'):
            result = result[2:]
        if result.startswith('-'):
            result = '_' + result[len('-'):]
        if not result:
            result = '_'
    return result
@ -1288,6 +1290,7 @@ def parse_duration(s):
            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
            (?:
                (?:
                    (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
@ -1306,10 +1309,14 @@ def parse_duration(s):
        return float_or_none(m.group('only_hours'), invscale=60 * 60)
    if m.group('secs'):
        res += int(m.group('secs'))
    if m.group('mins_reversed'):
        res += int(m.group('mins_reversed')) * 60
    if m.group('mins'):
        res += int(m.group('mins')) * 60
    if m.group('hours'):
        res += int(m.group('hours')) * 60 * 60
    if m.group('hours_reversed'):
        res += int(m.group('hours_reversed')) * 60 * 60
    if m.group('days'):
        res += int(m.group('days')) * 24 * 60 * 60
    if m.group('ms'):
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals
-__version__ = '2015.02.24'
+__version__ = '2015.02.26.2'
Author	SHA1	Message	Date
Philipp Hagemeister	9724e5d336	release 2015.02.26.2	2015-02-26 09:45:11 +01:00
Philipp Hagemeister	63a562f95e	[escapist] Detect IP blocking and use another UA (Fixes #5069 )	2015-02-26 09:19:26 +01:00
Philipp Hagemeister	5c340b0387	release 2015.02.26.1	2015-02-26 01:47:16 +01:00
Philipp Hagemeister	1c6510f57a	[Makefile] clean pyc files in clean target	2015-02-26 01:47:12 +01:00
Philipp Hagemeister	2a15a98a6a	[rmtp] Encode filename before invoking subprocess This fixes #5066. Reproducible with LC_ALL=C youtube-dl "http://www.prosieben.de/tv/germanys-next-topmodel/video/playlist/ganze-folge-episode-2-das-casting-in-muenchen"	2015-02-26 01:44:20 +01:00
Philipp Hagemeister	72a406e7aa	[extractor/common] Pass in video_id (#5057 )	2015-02-26 01:35:43 +01:00
Philipp Hagemeister	feccc3ff37	Merge remote-tracking branch 'aajanki/wdr_live'	2015-02-26 01:34:01 +01:00
Philipp Hagemeister	265bfa2c79	[letv] Simplify	2015-02-26 01:30:18 +01:00
Philipp Hagemeister	8faf9b9b41	Merge remote-tracking branch 'yan12125/IE_Letv'	2015-02-26 01:26:55 +01:00
Philipp Hagemeister	84be7c230c	Cred @duncankl for airmozilla	2015-02-26 01:25:54 +01:00
Philipp Hagemeister	3e675fabe0	[airmozilla] Be more tolerant when nonessential items are missing (#5030 )	2015-02-26 01:25:00 +01:00
Philipp Hagemeister	cd5b4b0bc2	Merge remote-tracking branch 'duncankl/airmozilla'	2015-02-26 01:15:08 +01:00
Philipp Hagemeister	7ef822021b	Merge remote-tracking branch 'mmue/fix-rtlnow'	2015-02-26 01:13:03 +01:00
Philipp Hagemeister	9a48926a57	[escapist] Add support for advertisements	2015-02-26 00:59:53 +01:00
Philipp Hagemeister	13cd97f3df	release 2015.02.26	2015-02-26 00:42:02 +01:00
Philipp Hagemeister	183139340b	[utils] Bump our user agent	2015-02-26 00:40:12 +01:00
Philipp Hagemeister	1c69bca258	[escapist] Fix config URL matching	2015-02-26 00:24:54 +01:00
Jaime Marquínez Ferrándiz	c10ea454dc	[telecinco] Recognize more urls (closes #5065 )	2015-02-25 23:52:54 +01:00
Markus Müller	9504fc21b5	Fix the RTL extractor for new episodes by using a different hostname	2015-02-25 23:27:19 +01:00
Jaime Marquínez Ferrándiz	13d8fbef30	[generic] Don't set the 'title' if it's not defined in the entry (closes #5061 ) Some of them may be an 'url' result, which in general don't have the 'title' field.	2015-02-25 17:56:51 +01:00
Antti Ajanki	b8988b63a6	[wdr] Download a live stream	2015-02-24 21:23:59 +02:00
Antti Ajanki	5eaaeb7c31	[f4m] Tolerate missed fragments on live streams	2015-02-24 21:22:59 +02:00
Antti Ajanki	c4f8c453ae	[f4m] Refresh fragment list periodically on live streams	2015-02-24 21:22:59 +02:00
Antti Ajanki	6f4ba54079	[extractor/common] Extract HTTP (possibly f4m) URLs from a .smil file	2015-02-24 21:22:59 +02:00
Antti Ajanki	637570326b	[extractor/common] Extract the first of a seq of videos in a .smil file	2015-02-24 21:22:59 +02:00
Sergey M․	37f885650c	[eporner] Simplify and hardcode age limit	2015-02-25 01:08:54 +06:00
Sergey M.	c8c34ccb20	Merge pull request #5056 from logon84/master Eporner Fix (Closes #5050)	2015-02-25 01:05:35 +06:00
logon84	e765ed3a9c	[eporner] Fix redirect_code error	2015-02-24 19:41:46 +01:00
Yen Chi Hsuan	677063594e	[Letv] Update testcases	2015-02-25 02:10:55 +08:00
logon84	59c7cbd482	Update eporner.py Updated to work. Old version shows an error about being unable to extract "redirect_code"	2015-02-24 18:58:32 +01:00
Yen Chi Hsuan	570311610e	[Letv] Add playlist support	2015-02-25 01:26:44 +08:00
Sergey M․	41b264e77c	[nrktv] Workaround subtitles conversion issues on python 2.6 (Closes #5036 )	2015-02-24 23:06:44 +06:00
Philipp Hagemeister	df4bd0d53f	[options] Add --yes-playlist as inverse of --no-playlist (Fixes #5051 )	2015-02-24 17:25:02 +01:00
Yen Chi Hsuan	7f09a662a0	[Letv] Add new extractor. Single video only	2015-02-24 23:58:21 +08:00
Philipp Hagemeister	4f3b21e1c7	release 2015.02.24.2	2015-02-24 16:34:42 +01:00
Philipp Hagemeister	54233c9080	[escapist] Support JavaScript player (Fixes #5034 )	2015-02-24 16:33:07 +01:00
Philipp Hagemeister	db8e13ef71	release 2015.02.24.1	2015-02-24 11:38:21 +01:00
Philipp Hagemeister	5a42414b9c	[utils] Prevent hyphen at beginning of filename (Fixes #5035 )	2015-02-24 11:38:01 +01:00
Philipp Hagemeister	9c665ab72e	[rtve] PEP8	2015-02-24 11:37:27 +01:00
Duncan Keall	1b40dc92eb	[airmozilla] Add new extractor	2015-02-23 16:10:08 +13:00
`@ -1,3 +1,3 @@`
	`from __future__ import unicode_literals`	`from __future__ import unicode_literals`

	`__version__ = '2015.02.24'`	`__version__ = '2015.02.26.2'`