Compare commits
31 Commits
2016.02.10
...
2016.02.13
Author | SHA1 | Date | |
---|---|---|---|
35ced3985a | |||
3e18700d45 | |||
f9f49d87c2 | |||
6863631c26 | |||
9d939cec48 | |||
4c77d3f52a | |||
7be747b921 | |||
bb20526b64 | |||
bcbb1b08b2 | |||
3d98f97c64 | |||
c349456ef6 | |||
5a4905924d | |||
b826035dd5 | |||
a7cab4d039 | |||
fc3810f6d1 | |||
3dc71d82ce | |||
9c7b38981c | |||
8b85ac3fd9 | |||
81e1c4e2fc | |||
388ae76b52 | |||
b67d63149d | |||
28280e8ded | |||
6b3fbd3425 | |||
a7ab46375b | |||
b14d5e26f6 | |||
9a61dfba0c | |||
154c209e2d | |||
d1ea5e171f | |||
a1188d0ed0 | |||
47d205a646 | |||
80f772c28a |
@ -89,6 +89,8 @@
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canvas**
|
||||
- **CBC**
|
||||
- **CBCPlayer**
|
||||
- **CBS**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
@ -120,6 +122,7 @@
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
@ -445,6 +448,7 @@
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
- **Playwire**
|
||||
|
@ -1341,7 +1341,6 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format_list = []
|
||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||
info_dict['extractor'] in ['youtube', 'ted'] and
|
||||
not info_dict.get('is_live')):
|
||||
merger = FFmpegMergerPP(self)
|
||||
if merger.available and merger.can_merge():
|
||||
@ -1798,7 +1797,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
res = '%sp' % format['height']
|
||||
elif format.get('width') is not None:
|
||||
res = '?x%d' % format['width']
|
||||
res = '%dx?' % format['width']
|
||||
else:
|
||||
res = default
|
||||
return res
|
||||
|
@ -89,6 +89,10 @@ from .camdemy import (
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
@ -126,6 +130,7 @@ from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crunchyroll import (
|
||||
@ -533,6 +538,7 @@ from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
|
113
youtube_dl/extractor/cbc.py
Normal file
113
youtube_dl/extractor/cbc.py
Normal file
@ -0,0 +1,113 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||
'info_dict': {
|
||||
'id': '2682904050',
|
||||
'ext': 'flv',
|
||||
'title': 'Don Cherry – All-Stars',
|
||||
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||
'timestamp': 1454475540,
|
||||
'upload_date': '20160203',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with clipId
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'info_dict': {
|
||||
'id': '2487345465',
|
||||
'ext': 'flv',
|
||||
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '2680832926',
|
||||
'ext': 'flv',
|
||||
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '2658915080',
|
||||
'ext': 'flv',
|
||||
'title': 'Fly like an eagle!',
|
||||
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_init = self._search_regex(
|
||||
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
|
||||
default=None)
|
||||
if player_init:
|
||||
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
else:
|
||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'info_dict': {
|
||||
'id': '2683190193',
|
||||
'ext': 'flv',
|
||||
'title': 'Gerry Runs a Sweat Shop',
|
||||
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||
'timestamp': 1455067800,
|
||||
'upload_date': '20160210',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
||||
'ThePlatformFeed', video_id)
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
||||
'info_dict': {
|
||||
'id': 'miranda-sings-happy-thanksgiving-miranda',
|
||||
'id': '2494164',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141127',
|
||||
'timestamp': 1417107600,
|
||||
'duration': 1232,
|
||||
'title': 'Happy Thanksgiving Miranda',
|
||||
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
||||
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires ffmpeg',
|
||||
@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):
|
||||
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
||||
display_id)['videoData']
|
||||
|
||||
video_id = full_data['activeVideo']['video']
|
||||
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
||||
display_id = full_data['activeVideo']['video']
|
||||
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
|
||||
video_id = compat_str(video_data['mediaId'])
|
||||
thumbnails = [{
|
||||
'url': video_data['images']['thumb'],
|
||||
}, {
|
||||
'url': video_data['images']['poster'],
|
||||
}]
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['mediaUrl'], video_id, ext='mp4')
|
||||
|
||||
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
||||
video_data.get('pubDate'))
|
||||
@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):
|
||||
video_data.get('duration'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'crackle:%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_data['title'],
|
||||
@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
||||
}
|
||||
|
@ -1186,12 +1186,13 @@ class InfoExtractor(object):
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
|
||||
src_urls = []
|
||||
srcs = []
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
if not src or src in srcs:
|
||||
continue
|
||||
srcs.append(src)
|
||||
|
||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
@ -1223,9 +1224,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
if src_url in src_urls:
|
||||
continue
|
||||
src_urls.append(src_url)
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
@ -1436,14 +1435,16 @@ class InfoExtractor(object):
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
break
|
||||
if not re.match(r'^https?://', base_url):
|
||||
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
f = {
|
||||
'format_id': mpd_id or representation_id,
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
|
95
youtube_dl/extractor/crackle.py
Normal file
95
youtube_dl/extractor/crackle.py
Normal file
@ -0,0 +1,95 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
||||
'info_dict': {
|
||||
'id': '2496419',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heavy Lies the Head',
|
||||
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
||||
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
||||
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
'height': 306,
|
||||
},
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id).find('i')
|
||||
title = item.attrib['t']
|
||||
|
||||
thumbnail = None
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||
formats.append({
|
||||
'url': http_base_url + mfs_path,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
for cc in item.findall('cc'):
|
||||
locale = cc.attrib.get('l')
|
||||
v = cc.attrib.get('v')
|
||||
if locale and v:
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
subtitles[locale] = [{
|
||||
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': item.attrib.get('d'),
|
||||
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
'thumbnail': thumbnail,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
@ -224,6 +224,20 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# MPD from http://dash-mse-test.appspot.com/media.html
|
||||
{
|
||||
'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
|
||||
'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
|
||||
'info_dict': {
|
||||
'id': 'car-20120827-manifest',
|
||||
'ext': 'mp4',
|
||||
'title': 'car-20120827-manifest',
|
||||
'formats': 'mincount:9',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@ -1302,7 +1316,8 @@ class GenericIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'formats': self._parse_mpd_formats(doc, video_id),
|
||||
'formats': self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0]),
|
||||
}
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
@ -1413,7 +1428,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
|
@ -57,7 +57,7 @@ class NBCIE(InfoExtractor):
|
||||
{
|
||||
# This video has expired but with an escaped embedURL
|
||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||
'skip': 'Expired'
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@ -200,7 +201,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||
'duration': 3190,
|
||||
},
|
||||
'params': {
|
||||
@ -214,7 +215,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||
'duration': 5050,
|
||||
},
|
||||
'params': {
|
||||
@ -228,7 +229,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2201174722',
|
||||
'ext': 'mp4',
|
||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
||||
'description': 'md5:95a19f568689d09a166dff9edada3301',
|
||||
'duration': 801,
|
||||
},
|
||||
},
|
||||
@ -238,8 +239,8 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2365297708',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
||||
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@ -279,7 +280,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'player',
|
||||
'ext': 'mp4',
|
||||
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
||||
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
|
||||
'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
|
||||
'duration': 682,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@ -288,20 +289,19 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.pbs.org/video/2365367186/',
|
||||
'url': 'http://www.pbs.org/video/2365245528/',
|
||||
'info_dict': {
|
||||
'id': '2365367186',
|
||||
'display_id': '2365367186',
|
||||
'id': '2365245528',
|
||||
'display_id': '2365245528',
|
||||
'ext': 'mp4',
|
||||
'title': 'To Catch A Comet - Full Episode',
|
||||
'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
|
||||
'duration': 3342,
|
||||
'title': 'FRONTLINE - United States of Secrets (Part One)',
|
||||
'description': 'md5:55756bd5c551519cc4b7703e373e217e',
|
||||
'duration': 6851,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
'skip': 'Expired',
|
||||
},
|
||||
{
|
||||
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
||||
@ -313,7 +313,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||
'ext': 'mp4',
|
||||
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
||||
'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
|
||||
'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
|
||||
'duration': 1480,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@ -329,7 +329,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'the-atomic-artists',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - The Atomic Artists',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
|
||||
'duration': 723,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@ -366,10 +366,14 @@ class PBSIE(InfoExtractor):
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
# tabbed frontline videos
|
||||
tabbed_videos = re.findall(
|
||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date
|
||||
MULTI_PART_REGEXES = (
|
||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
|
||||
r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
|
||||
)
|
||||
for p in MULTI_PART_REGEXES:
|
||||
tabbed_videos = re.findall(p, webpage)
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date
|
||||
|
||||
MEDIA_ID_REGEXES = [
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
@ -433,20 +437,21 @@ class PBSIE(InfoExtractor):
|
||||
for vid_id in video_id]
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
player = self._download_webpage(
|
||||
'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
player, 'video data', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
# Fallback to old videoInfo API
|
||||
if not info:
|
||||
try:
|
||||
info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id, 'Downloading video info JSON')
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
|
||||
raise
|
||||
# videoInfo API may not work for some videos, fallback to portalplayer API
|
||||
player = self._download_webpage(
|
||||
'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
player, 'video data', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
formats = []
|
||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||
@ -505,7 +510,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'description': info['program'].get('description'),
|
||||
'description': info.get('description') or info.get('program', {}).get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'age_limit': age_limit,
|
||||
|
51
youtube_dl/extractor/plays.py
Normal file
51
youtube_dl/extractor/plays.py
Normal file
@ -0,0 +1,51 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PlaysTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
|
||||
_TEST = {
|
||||
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
|
||||
'md5': 'dfeac1198506652b5257a62762cec7bc',
|
||||
'info_dict': {
|
||||
'id': '56af17f56c95335490',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you outplay the Azir wall',
|
||||
'description': 'Posted by Bjergsen',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
content = self._parse_json(
|
||||
self._search_regex(
|
||||
r'R\.bindContent\(({.+?})\);', webpage,
|
||||
'content'), video_id)['content']
|
||||
mpd_url, sources = re.search(
|
||||
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
|
||||
content).groups()
|
||||
formats = self._extract_mpd_formats(
|
||||
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
|
||||
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(format_url),
|
||||
'format_id': 'http-' + format_id,
|
||||
'height': int_or_none(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
@ -20,7 +20,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
@ -283,8 +282,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
first_video_id = None
|
||||
duration = None
|
||||
for item in entry['media$content']:
|
||||
smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
|
||||
cur_video_id = url_basename(smil_url)
|
||||
smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
|
||||
cur_video_id = ThePlatformIE._match_id(smil_url)
|
||||
if first_video_id is None:
|
||||
first_video_id = cur_video_id
|
||||
duration = float_or_none(item.get('plfile$duration'))
|
||||
|
@ -57,7 +57,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
def _extract_xsrft_and_vuid(self, webpage):
|
||||
xsrft = self._search_regex(
|
||||
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
webpage, 'login token', group='xsrft')
|
||||
vuid = self._search_regex(
|
||||
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
|
||||
|
@ -375,7 +375,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = 'youtube'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
|
||||
'info_dict': {
|
||||
'id': 'BaW_jenozKc',
|
||||
'ext': 'mp4',
|
||||
@ -441,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
|
||||
'note': 'Use the first video ID in the URL',
|
||||
'info_dict': {
|
||||
'id': 'BaW_jenozKc',
|
||||
@ -704,6 +704,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
|
||||
'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
|
||||
'info_dict': {
|
||||
'id': 'gVfLd0zydlo',
|
||||
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
},
|
||||
{
|
||||
'url': 'http://vid.plus/FlRa-iH7PGw',
|
||||
'only_matching': True,
|
||||
@ -1196,9 +1205,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not self._downloader.params.get('noplaylist'):
|
||||
entries = []
|
||||
feed_ids = []
|
||||
multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
|
||||
multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
|
||||
for feed in multifeed_metadata_list.split(','):
|
||||
feed_data = compat_parse_qs(feed)
|
||||
# Unquote should take place before split on comma (,) since textual
|
||||
# fields may contain comma as well (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8536)
|
||||
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Youtube',
|
||||
|
@ -56,7 +56,7 @@ from .compat import (
|
||||
compiled_regex_type = type(re.compile(''))
|
||||
|
||||
std_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
|
||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.02.10'
|
||||
__version__ = '2016.02.13'
|
||||
|
Reference in New Issue
Block a user