release 2016.02.13

[nbc] Correct test
[youtube] Add test for #8536
2016-02-13 08:25:05 +01:00 · 2016-02-13 07:45:32 +06:00 · 2016-02-13 05:18:58 +06:00 · 2016-02-13 05:01:20 +06:00 · 2016-02-13 00:36:47 +06:00 · 2016-02-13 00:23:14 +06:00
18 changed files with 380 additions and 54 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -89,6 +89,8 @@
 - **canalc2.tv**
 - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
 - **Canvas**
+ - **CBC**
+ - **CBCPlayer**
 - **CBS**
 - **CBSNews**: CBS News
 - **CBSNewsLiveVideo**: CBS News Live Videos
@ -120,6 +122,7 @@
 - **ComedyCentralShows**: The Daily Show / The Colbert Report
 - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
 - **Cracked**
+ - **Crackle**
 - **Criterion**
 - **CrooksAndLiars**
 - **Crunchyroll**
@ -445,6 +448,7 @@
 - **PlanetaPlay**
 - **play.fm**
 - **played.to**
+ - **PlaysTV**
 - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
 - **Playvid**
 - **Playwire**
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -1288,6 +1288,9 @@ class YoutubeDL(object):

            if format.get('format_id') is None:
                format['format_id'] = compat_str(i)
+            else:
+                # Sanitize format_id from characters used in format selector expression
+                format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
            format_id = format['format_id']
            if format_id not in formats_dict:
                formats_dict[format_id] = []
@ -1338,7 +1341,6 @@ class YoutubeDL(object):
        if req_format is None:
            req_format_list = []
            if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
-                    info_dict['extractor'] in ['youtube', 'ted'] and
                    not info_dict.get('is_live')):
                merger = FFmpegMergerPP(self)
                if merger.available and merger.can_merge():
@ -1795,7 +1797,7 @@ class YoutubeDL(object):
            else:
                res = '%sp' % format['height']
        elif format.get('width') is not None:
-            res = '?x%d' % format['width']
+            res = '%dx?' % format['width']
        else:
            res = default
        return res
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -89,6 +89,10 @@ from .camdemy import (
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .canvas import CanvasIE
+from .cbc import (
+    CBCIE,
+    CBCPlayerIE,
+)
 from .cbs import CBSIE
 from .cbsnews import (
    CBSNewsIE,
@ -126,6 +130,7 @@ from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
+from .crackle import CrackleIE
 from .criterion import CriterionIE
 from .crooksandliars import CrooksAndLiarsIE
 from .crunchyroll import (
@ -533,6 +538,7 @@ from .planetaplay import PlanetaPlayIE
 from .pladform import PladformIE
 from .played import PlayedIE
 from .playfm import PlayFMIE
+from .plays import PlaysTVIE
 from .playtvak import PlaytvakIE
 from .playvid import PlayvidIE
 from .playwire import PlaywireIE
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@ -0,0 +1,113 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class CBCIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
+    _TESTS = [{
+        # with mediaId
+        'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
+        'info_dict': {
+            'id': '2682904050',
+            'ext': 'flv',
+            'title': 'Don Cherry – All-Stars',
+            'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
+            'timestamp': 1454475540,
+            'upload_date': '20160203',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }, {
+        # with clipId
+        'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
+        'info_dict': {
+            'id': '2487345465',
+            'ext': 'flv',
+            'title': 'Robin Williams freestyles on 90 Minutes Live',
+            'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
+            'upload_date': '19700101',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }, {
+        # multiple iframes
+        'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
+        'playlist': [{
+            'info_dict': {
+                'id': '2680832926',
+                'ext': 'flv',
+                'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
+                'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
+                'upload_date': '19700101',
+            },
+        }, {
+            'info_dict': {
+                'id': '2658915080',
+                'ext': 'flv',
+                'title': 'Fly like an eagle!',
+                'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
+                'upload_date': '19700101',
+            },
+        }],
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        player_init = self._search_regex(
+            r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
+            default=None)
+        if player_init:
+            player_info = self._parse_json(player_init, display_id, js_to_json)
+            media_id = player_info.get('mediaId')
+            if not media_id:
+                clip_id = player_info['clipId']
+                media_id = self._download_json(
+                    'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
+                    clip_id)['entries'][0]['id'].split('/')[-1]
+            return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+        else:
+            entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
+            return self.playlist_result(entries)
+
+
+class CBCPlayerIE(InfoExtractor):
+    _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.cbc.ca/player/play/2683190193',
+        'info_dict': {
+            'id': '2683190193',
+            'ext': 'flv',
+            'title': 'Gerry Runs a Sweat Shop',
+            'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
+            'timestamp': 1455067800,
+            'upload_date': '20160210',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self.url_result(
+            'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
+            'ThePlatformFeed', video_id)
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@ -2,6 +2,7 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
    int_or_none,
    parse_duration,
@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
    _TESTS = [{
        'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
        'info_dict': {
-            'id': 'miranda-sings-happy-thanksgiving-miranda',
+            'id': '2494164',
            'ext': 'mp4',
            'upload_date': '20141127',
            'timestamp': 1417107600,
            'duration': 1232,
            'title': 'Happy Thanksgiving Miranda',
            'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
-            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
        },
        'params': {
            'skip_download': 'requires ffmpeg',
@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):
                r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
            display_id)['videoData']

-        video_id = full_data['activeVideo']['video']
-        video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
+        display_id = full_data['activeVideo']['video']
+        video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
+        video_id = compat_str(video_data['mediaId'])
        thumbnails = [{
            'url': video_data['images']['thumb'],
        }, {
            'url': video_data['images']['poster'],
        }]
-        formats = self._extract_m3u8_formats(
-            video_data['mediaUrl'], video_id, ext='mp4')

        timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
            video_data.get('pubDate'))
@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):
            video_data.get('duration'))

        return {
+            '_type': 'url_transparent',
+            'url': 'crackle:%s' % video_id,
            'id': video_id,
            'display_id': display_id,
            'title': video_data['title'],
@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):
            'timestamp': timestamp,
            'duration': duration,
            'thumbnails': thumbnails,
-            'formats': formats,
+            'season_number': int_or_none(video_data.get('season')),
+            'episode_number': int_or_none(video_data.get('episode')),
            'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
        }
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1186,12 +1186,13 @@ class InfoExtractor(object):
        http_count = 0
        m3u8_count = 0

-        src_urls = []
+        srcs = []
        videos = smil.findall(self._xpath_ns('.//video', namespace))
        for video in videos:
            src = video.get('src')
-            if not src:
+            if not src or src in srcs:
                continue
+            srcs.append(src)

            bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
            filesize = int_or_none(video.get('size') or video.get('fileSize'))
@ -1223,9 +1224,7 @@ class InfoExtractor(object):
                continue

            src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
-            if src_url in src_urls:
-                continue
-            src_urls.append(src_url)
+            src_url = src_url.strip()

            if proto == 'm3u8' or src_ext == 'm3u8':
                m3u8_formats = self._extract_m3u8_formats(
@ -1436,14 +1435,16 @@ class InfoExtractor(object):
                                base_url = base_url_e.text + base_url
                                if re.match(r'^https?://', base_url):
                                    break
-                        if not re.match(r'^https?://', base_url):
+                        if mpd_base_url and not re.match(r'^https?://', base_url):
+                            if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
+                                mpd_base_url += '/'
                            base_url = mpd_base_url + base_url
                        representation_id = representation_attrib.get('id')
                        lang = representation_attrib.get('lang')
                        url_el = representation.find(_add_ns('BaseURL'))
                        filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
                        f = {
-                            'format_id': mpd_id or representation_id,
+                            'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                            'url': base_url,
                            'width': int_or_none(representation_attrib.get('width')),
                            'height': int_or_none(representation_attrib.get('height')),
--- a/youtube_dl/extractor/crackle.py
+++ b/youtube_dl/extractor/crackle.py
@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class CrackleIE(InfoExtractor):
+    _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.crackle.com/the-art-of-more/2496419',
+        'info_dict': {
+            'id': '2496419',
+            'ext': 'mp4',
+            'title': 'Heavy Lies the Head',
+            'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }
+
+    # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
+    _SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
+    _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
+    _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
+
+    # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
+    _MEDIA_FILE_SLOTS = {
+        'c544.flv': {
+            'width': 544,
+            'height': 306,
+        },
+        '360p.mp4': {
+            'width': 640,
+            'height': 360,
+        },
+        '480p.mp4': {
+            'width': 852,
+            'height': 478,
+        },
+        '480p_1mbps.mp4': {
+            'width': 852,
+            'height': 478,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        item = self._download_xml(
+            'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
+            video_id).find('i')
+        title = item.attrib['t']
+
+        thumbnail = None
+        subtitles = {}
+        formats = self._extract_m3u8_formats(
+            'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
+            video_id, 'mp4', m3u8_id='hls', fatal=None)
+        path = item.attrib.get('p')
+        if path:
+            thumbnail = self._THUMBNAIL_TEMPLATE % path
+            http_base_url = 'http://ahttp.crackle.com/' + path
+            for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
+                formats.append({
+                    'url': http_base_url + mfs_path,
+                    'format_id': 'http-' + mfs_path.split('.')[0],
+                    'width': mfs_info['width'],
+                    'height': mfs_info['height'],
+                })
+            for cc in item.findall('cc'):
+                locale = cc.attrib.get('l')
+                v = cc.attrib.get('v')
+                if locale and v:
+                    if locale not in subtitles:
+                        subtitles[locale] = []
+                    subtitles[locale] = [{
+                        'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
+                        'ext': 'ttml',
+                    }]
+        self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': item.attrib.get('d'),
+            'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
+            'series': item.attrib.get('sn'),
+            'season_number': int_or_none(item.attrib.get('se')),
+            'episode_number': int_or_none(item.attrib.get('ep')),
+            'thumbnail': thumbnail,
+            'subtitles': subtitles,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -224,6 +224,20 @@ class GenericIE(InfoExtractor):
                'skip_download': True,
            },
        },
+        # MPD from http://dash-mse-test.appspot.com/media.html
+        {
+            'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
+            'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
+            'info_dict': {
+                'id': 'car-20120827-manifest',
+                'ext': 'mp4',
+                'title': 'car-20120827-manifest',
+                'formats': 'mincount:9',
+            },
+            'params': {
+                'format': 'bestvideo',
+            },
+        },
        # google redirect
        {
            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@ -1302,7 +1316,8 @@ class GenericIE(InfoExtractor):
                return {
                    'id': video_id,
                    'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
-                    'formats': self._parse_mpd_formats(doc, video_id),
+                    'formats': self._parse_mpd_formats(
+                        doc, video_id, mpd_base_url=url.rpartition('/')[0]),
                }
        except compat_xml_parse_error:
            pass
@ -1413,7 +1428,7 @@ class GenericIE(InfoExtractor):

        # Look for embedded Dailymotion player
        matches = re.findall(
-            r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
+            r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
        if matches:
            return _playlist_from_matches(
                matches, lambda m: unescapeHTML(m[1]))
--- a/youtube_dl/extractor/hotstar.py
+++ b/youtube_dl/extractor/hotstar.py
@ -10,8 +10,8 @@ from ..utils import (


 class HotStarIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
+    _TESTS = [{
        'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
        'info_dict': {
            'id': '1000076273',
@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
            # m3u8 download
            'skip_download': True,
        }
-    }
+    }, {
+        'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.hotstar.com/1000000515',
+        'only_matching': True,
+    }]

    _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
    _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@ -57,7 +57,7 @@ class NBCIE(InfoExtractor):
        {
            # This video has expired but with an escaped embedURL
            'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
-            'skip': 'Expired'
+            'only_matching': True,
        }
    ]

--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -4,10 +4,12 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import compat_HTTPError
 from ..utils import (
    ExtractorError,
    determine_ext,
    int_or_none,
+    js_to_json,
    strip_jsonp,
    unified_strdate,
    US_RATINGS,
@ -199,7 +201,7 @@ class PBSIE(InfoExtractor):
                'id': '2365006249',
                'ext': 'mp4',
                'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
-                'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
+                'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
                'duration': 3190,
            },
            'params': {
@ -213,7 +215,7 @@ class PBSIE(InfoExtractor):
                'id': '2365297690',
                'ext': 'mp4',
                'title': 'FRONTLINE - Losing Iraq',
-                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+                'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
                'duration': 5050,
            },
            'params': {
@ -227,7 +229,7 @@ class PBSIE(InfoExtractor):
                'id': '2201174722',
                'ext': 'mp4',
                'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
-                'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
+                'description': 'md5:95a19f568689d09a166dff9edada3301',
                'duration': 801,
            },
        },
@ -237,8 +239,8 @@ class PBSIE(InfoExtractor):
            'info_dict': {
                'id': '2365297708',
                'ext': 'mp4',
-                'description': 'md5:68d87ef760660eb564455eb30ca464fe',
                'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
+                'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
                'duration': 6559,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
@ -278,7 +280,7 @@ class PBSIE(InfoExtractor):
                'display_id': 'player',
                'ext': 'mp4',
                'title': 'American Experience - Death and the Civil War, Chapter 1',
-                'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
+                'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
                'duration': 682,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
@ -287,20 +289,19 @@ class PBSIE(InfoExtractor):
            },
        },
        {
-            'url': 'http://video.pbs.org/video/2365367186/',
+            'url': 'http://www.pbs.org/video/2365245528/',
            'info_dict': {
-                'id': '2365367186',
-                'display_id': '2365367186',
+                'id': '2365245528',
+                'display_id': '2365245528',
                'ext': 'mp4',
-                'title': 'To Catch A Comet - Full Episode',
-                'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
-                'duration': 3342,
+                'title': 'FRONTLINE - United States of Secrets (Part One)',
+                'description': 'md5:55756bd5c551519cc4b7703e373e217e',
+                'duration': 6851,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
            'params': {
                'skip_download': True,  # requires ffmpeg
            },
-            'skip': 'Expired',
        },
        {
            # Video embedded in iframe containing angle brackets as attribute's value (e.g.
@ -312,7 +313,7 @@ class PBSIE(InfoExtractor):
                'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
                'ext': 'mp4',
                'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
-                'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
+                'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
                'duration': 1480,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
@ -328,7 +329,7 @@ class PBSIE(InfoExtractor):
                'display_id': 'the-atomic-artists',
                'ext': 'mp4',
                'title': 'FRONTLINE - The Atomic Artists',
-                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+                'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
                'duration': 723,
                'thumbnail': 're:^https?://.*\.jpg$',
            },
@ -365,8 +366,12 @@ class PBSIE(InfoExtractor):
                webpage, 'upload date', default=None))

            # tabbed frontline videos
-            tabbed_videos = re.findall(
-                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
+            MULTI_PART_REGEXES = (
+                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
+                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
+            )
+            for p in MULTI_PART_REGEXES:
+                tabbed_videos = re.findall(p, webpage)
                if tabbed_videos:
                    return tabbed_videos, presumptive_id, upload_date

@ -432,9 +437,21 @@ class PBSIE(InfoExtractor):
                for vid_id in video_id]
            return self.playlist_result(entries, display_id)

+        try:
            info = self._download_json(
                'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
-            display_id)
+                display_id, 'Downloading video info JSON')
+        except ExtractorError as e:
+            if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
+                raise
+            # videoInfo API may not work for some videos, fallback to portalplayer API
+            player = self._download_webpage(
+                'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
+            info = self._parse_json(
+                self._search_regex(
+                    r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
+                    player, 'video data', default='{}'),
+                display_id, transform_source=js_to_json, fatal=False)

        formats = []
        for encoding_name in ('recommended_encoding', 'alternate_encoding'):
@ -493,7 +510,7 @@ class PBSIE(InfoExtractor):
            'id': video_id,
            'display_id': display_id,
            'title': info['title'],
-            'description': info['program'].get('description'),
+            'description': info.get('description') or info.get('program', {}).get('description'),
            'thumbnail': info.get('image_url'),
            'duration': int_or_none(info.get('duration')),
            'age_limit': age_limit,
--- a/youtube_dl/extractor/plays.py
+++ b/youtube_dl/extractor/plays.py
@ -0,0 +1,51 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class PlaysTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
+    _TEST = {
+        'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
+        'md5': 'dfeac1198506652b5257a62762cec7bc',
+        'info_dict': {
+            'id': '56af17f56c95335490',
+            'ext': 'mp4',
+            'title': 'When you outplay the Azir wall',
+            'description': 'Posted by Bjergsen',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        content = self._parse_json(
+            self._search_regex(
+                r'R\.bindContent\(({.+?})\);', webpage,
+                'content'), video_id)['content']
+        mpd_url, sources = re.search(
+            r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
+            content).groups()
+        formats = self._extract_mpd_formats(
+            self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
+        for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
+            formats.append({
+                'url': self._proto_relative_url(format_url),
+                'format_id': 'http-' + format_id,
+                'height': int_or_none(height),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -20,7 +20,6 @@ from ..utils import (
    int_or_none,
    sanitized_Request,
    unsmuggle_url,
-    url_basename,
    xpath_with_ns,
 )

@ -283,8 +282,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
        first_video_id = None
        duration = None
        for item in entry['media$content']:
-            smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
-            cur_video_id = url_basename(smil_url)
+            smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
+            cur_video_id = ThePlatformIE._match_id(smil_url)
            if first_video_id is None:
                first_video_id = cur_video_id
                duration = float_or_none(item.get('plfile$duration'))
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -57,7 +57,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):

    def _extract_xsrft_and_vuid(self, webpage):
        xsrft = self._search_regex(
-            r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
+            r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
            webpage, 'login token', group='xsrft')
        vuid = self._search_regex(
            r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor):
            if error_note is not None and '因版权原因无法观看此视频' in error_note:
                raise ExtractorError(
                    'Youku said: Sorry, this video is available in China only', expected=True)
+            elif error_note and '该视频被设为私密' in error_note:
+                raise ExtractorError(
+                    'Youku said: Sorry, this video is private', expected=True)
            else:
                msg = 'Youku server reported error %i' % error.get('code')
                if error_note is not None:
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -375,7 +375,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
    IE_NAME = 'youtube'
    _TESTS = [
        {
-            'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
+            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
            'info_dict': {
                'id': 'BaW_jenozKc',
                'ext': 'mp4',
@ -441,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            }
        },
        {
-            'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
+            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
            'note': 'Use the first video ID in the URL',
            'info_dict': {
                'id': 'BaW_jenozKc',
@ -704,6 +704,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'skip_download': True,
            },
        },
+        {
+            # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
+            'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
+            'info_dict': {
+                'id': 'gVfLd0zydlo',
+                'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
+            },
+            'playlist_count': 2,
+        },
        {
            'url': 'http://vid.plus/FlRa-iH7PGw',
            'only_matching': True,
@ -1196,9 +1205,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            if not self._downloader.params.get('noplaylist'):
                entries = []
                feed_ids = []
-                multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
+                multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
                for feed in multifeed_metadata_list.split(','):
-                    feed_data = compat_parse_qs(feed)
+                    # Unquote should take place before split on comma (,) since textual
+                    # fields may contain comma as well (see
+                    # https://github.com/rg3/youtube-dl/issues/8536)
+                    feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
                    entries.append({
                        '_type': 'url_transparent',
                        'ie_key': 'Youtube',
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -56,7 +56,7 @@ from .compat import (
 compiled_regex_type = type(re.compile(''))

 std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.02.09.1'
+__version__ = '2016.02.13'
Author	SHA1	Message	Date
Philipp Hagemeister	35ced3985a	release 2016.02.13	2016-02-13 08:25:05 +01:00
Sergey M․	3e18700d45	[nbc] Correct test	2016-02-13 07:45:32 +06:00
Sergey M․	f9f49d87c2	[youtube] Add test for #8536	2016-02-13 05:18:58 +06:00
Sergey M․	6863631c26	[youtube] Improve multifeed videos extraction (Closes #8536 )	2016-02-13 05:01:20 +06:00
Sergey M․	9d939cec48	[extractor/generic] Add direct mpd url test	2016-02-13 00:36:47 +06:00
Sergey M․	4c77d3f52a	[YoutubeDL] Allow bestvideo+bestaudio for any extractor	2016-02-13 00:23:14 +06:00
Sergey M․	7be747b921	[extractor/generic] Pass mpd base url to _parse_mpd_formats	2016-02-13 00:15:59 +06:00
Sergey M․	bb20526b64	[extractor/common] Improve base url construction	2016-02-13 00:13:56 +06:00
remitamine	bcbb1b08b2	Revert "[aenetworks] extract http formats" This reverts commit `3d98f97c64`.	2016-02-12 17:56:06 +01:00
remitamine	3d98f97c64	[aenetworks] extract http formats	2016-02-12 17:39:32 +01:00
remitamine	c349456ef6	[extractor/common] strip http urls in smil manifest	2016-02-12 17:38:48 +01:00
Sergey M․	5a4905924d	[extractor/generic] Improve dailymotion embed detection (Closes #8521 , closes #8325 )	2016-02-12 22:03:10 +06:00
Sergey M․	b826035dd5	[vimeo] Fix authentication (Closes #8520 )	2016-02-12 03:16:26 +06:00
remitamine	a7cab4d039	[theplatform] remove unused import and change smil url for ThePlatformFeedIE	2016-02-11 18:50:14 +01:00
remitamine	fc3810f6d1	Merge branch 'master' of github.com:rg3/youtube-dl	2016-02-11 18:13:56 +01:00
remitamine	3dc71d82ce	[theplatform] fix pid extraction in the platform feed	2016-02-11 18:13:03 +01:00
Sergey M․	9c7b38981c	[utils] Bump Firefox version in User-Agent Old version number causes Youtube not to serve some formats in ytplayer.config	2016-02-11 23:12:30 +06:00
remitamine	8b85ac3fd9	[cbc] Add new extractor(closes #3803 )(closes #4731 )(closes #5309 )	2016-02-11 18:10:32 +01:00
remitamine	81e1c4e2fc	[extractor/common] remove duplicate rtmp formats in smil manifest	2016-02-11 17:58:48 +01:00
Sergey M․	388ae76b52	[YoutubeDL] Fix format resolution when height is missing	2016-02-11 22:46:13 +06:00
Sergey M․	b67d63149d	[youtube] Fix typos	2016-02-11 22:33:08 +06:00
Sergey M․	28280e8ded	[plays] PEP 8	2016-02-11 22:02:57 +06:00
Sergey M․	6b3fbd3425	[pbs] Fix multi part videos extraction	2016-02-11 22:02:37 +06:00
Sergey M․	a7ab46375b	[pbs] Update some tests	2016-02-11 21:43:01 +06:00
Sergey M․	b14d5e26f6	[pbs] Improve description extraction	2016-02-11 21:28:09 +06:00
Sergey M․	9a61dfba0c	[pbs] Revert prefer portalplayer	2016-02-11 21:22:57 +06:00
remitamine	154c209e2d	[extractor/common] improve dash format ids	2016-02-11 10:33:26 +01:00
remitamine	d1ea5e171f	[plays] Add new extractor(#8458 )	2016-02-11 10:30:31 +01:00
remitamine	a1188d0ed0	[crackle] add prefix to format ids	2016-02-10 22:39:33 +01:00
remitamine	47d205a646	[crackle] improve format sorting	2016-02-10 22:23:56 +01:00
remitamine	80f772c28a	[crackle] Add new extractor	2016-02-10 22:16:21 +01:00
Philipp Hagemeister	f817d9bec1	release 2016.02.10	2016-02-10 16:17:38 +01:00
Sergey M․	e2effb08a4	[YoutubeDL] Sanitize format_id (Closes #8494 )	2016-02-10 21:16:58 +06:00
Sergey M․	7fcea295c5	[pbs] Switch to portal player by default (Closes #8491 )	2016-02-10 20:46:38 +06:00
Sergey M․	cc799437ea	[youku] Report private videos (Closes #8498 )	2016-02-10 20:05:17 +06:00
Sergey M․	89d23f37f2	[hotstar] Relax _VALID_URL (Closes #8487 )	2016-02-10 04:43:00 +06:00