Compare commits
46 Commits
2016.02.09
...
2016.02.13
Author | SHA1 | Date | |
---|---|---|---|
35ced3985a | |||
3e18700d45 | |||
f9f49d87c2 | |||
6863631c26 | |||
9d939cec48 | |||
4c77d3f52a | |||
7be747b921 | |||
bb20526b64 | |||
bcbb1b08b2 | |||
3d98f97c64 | |||
c349456ef6 | |||
5a4905924d | |||
b826035dd5 | |||
a7cab4d039 | |||
fc3810f6d1 | |||
3dc71d82ce | |||
9c7b38981c | |||
8b85ac3fd9 | |||
81e1c4e2fc | |||
388ae76b52 | |||
b67d63149d | |||
28280e8ded | |||
6b3fbd3425 | |||
a7ab46375b | |||
b14d5e26f6 | |||
9a61dfba0c | |||
154c209e2d | |||
d1ea5e171f | |||
a1188d0ed0 | |||
47d205a646 | |||
80f772c28a | |||
f817d9bec1 | |||
e2effb08a4 | |||
7fcea295c5 | |||
cc799437ea | |||
89d23f37f2 | |||
b92071ef00 | |||
47246ae26c | |||
9c15869c28 | |||
51e9094f4a | |||
5e3a6fec33 | |||
d413095f7e | |||
1bedf4de06 | |||
3967a761f4 | |||
b081350bd9 | |||
16f1430ba6 |
@ -89,6 +89,8 @@
|
|||||||
- **canalc2.tv**
|
- **canalc2.tv**
|
||||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
- **Canvas**
|
- **Canvas**
|
||||||
|
- **CBC**
|
||||||
|
- **CBCPlayer**
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||||
@ -120,6 +122,7 @@
|
|||||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
|
- **Crackle**
|
||||||
- **Criterion**
|
- **Criterion**
|
||||||
- **CrooksAndLiars**
|
- **CrooksAndLiars**
|
||||||
- **Crunchyroll**
|
- **Crunchyroll**
|
||||||
@ -445,6 +448,7 @@
|
|||||||
- **PlanetaPlay**
|
- **PlanetaPlay**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
- **played.to**
|
- **played.to**
|
||||||
|
- **PlaysTV**
|
||||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||||
- **Playvid**
|
- **Playvid**
|
||||||
- **Playwire**
|
- **Playwire**
|
||||||
|
@ -1288,6 +1288,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
|
else:
|
||||||
|
# Sanitize format_id from characters used in format selector expression
|
||||||
|
format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
|
||||||
format_id = format['format_id']
|
format_id = format['format_id']
|
||||||
if format_id not in formats_dict:
|
if format_id not in formats_dict:
|
||||||
formats_dict[format_id] = []
|
formats_dict[format_id] = []
|
||||||
@ -1338,7 +1341,6 @@ class YoutubeDL(object):
|
|||||||
if req_format is None:
|
if req_format is None:
|
||||||
req_format_list = []
|
req_format_list = []
|
||||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||||
info_dict['extractor'] in ['youtube', 'ted'] and
|
|
||||||
not info_dict.get('is_live')):
|
not info_dict.get('is_live')):
|
||||||
merger = FFmpegMergerPP(self)
|
merger = FFmpegMergerPP(self)
|
||||||
if merger.available and merger.can_merge():
|
if merger.available and merger.can_merge():
|
||||||
@ -1795,7 +1797,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
res = '%sp' % format['height']
|
res = '%sp' % format['height']
|
||||||
elif format.get('width') is not None:
|
elif format.get('width') is not None:
|
||||||
res = '?x%d' % format['width']
|
res = '%dx?' % format['width']
|
||||||
else:
|
else:
|
||||||
res = default
|
res = default
|
||||||
return res
|
return res
|
||||||
|
@ -89,6 +89,10 @@ from .camdemy import (
|
|||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .canvas import CanvasIE
|
from .canvas import CanvasIE
|
||||||
|
from .cbc import (
|
||||||
|
CBCIE,
|
||||||
|
CBCPlayerIE,
|
||||||
|
)
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import (
|
from .cbsnews import (
|
||||||
CBSNewsIE,
|
CBSNewsIE,
|
||||||
@ -126,6 +130,7 @@ from .comcarcoff import ComCarCoffIE
|
|||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
|
from .crackle import CrackleIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
from .crooksandliars import CrooksAndLiarsIE
|
from .crooksandliars import CrooksAndLiarsIE
|
||||||
from .crunchyroll import (
|
from .crunchyroll import (
|
||||||
@ -533,6 +538,7 @@ from .planetaplay import PlanetaPlayIE
|
|||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .played import PlayedIE
|
from .played import PlayedIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
|
from .plays import PlaysTVIE
|
||||||
from .playtvak import PlaytvakIE
|
from .playtvak import PlaytvakIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .playwire import PlaywireIE
|
from .playwire import PlaywireIE
|
||||||
|
113
youtube_dl/extractor/cbc.py
Normal file
113
youtube_dl/extractor/cbc.py
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import js_to_json
|
||||||
|
|
||||||
|
|
||||||
|
class CBCIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# with mediaId
|
||||||
|
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2682904050',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Don Cherry – All-Stars',
|
||||||
|
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||||
|
'timestamp': 1454475540,
|
||||||
|
'upload_date': '20160203',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# with clipId
|
||||||
|
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2487345465',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||||
|
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||||
|
'upload_date': '19700101',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# multiple iframes
|
||||||
|
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2680832926',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||||
|
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||||
|
'upload_date': '19700101',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2658915080',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Fly like an eagle!',
|
||||||
|
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||||
|
'upload_date': '19700101',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
player_init = self._search_regex(
|
||||||
|
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
|
||||||
|
default=None)
|
||||||
|
if player_init:
|
||||||
|
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||||
|
media_id = player_info.get('mediaId')
|
||||||
|
if not media_id:
|
||||||
|
clip_id = player_info['clipId']
|
||||||
|
media_id = self._download_json(
|
||||||
|
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||||
|
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||||
|
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||||
|
else:
|
||||||
|
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
|
||||||
|
class CBCPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2683190193',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Gerry Runs a Sweat Shop',
|
||||||
|
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||||
|
'timestamp': 1455067800,
|
||||||
|
'upload_date': '20160210',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
||||||
|
'ThePlatformFeed', video_id)
|
@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'miranda-sings-happy-thanksgiving-miranda',
|
'id': '2494164',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20141127',
|
'upload_date': '20141127',
|
||||||
'timestamp': 1417107600,
|
'timestamp': 1417107600,
|
||||||
'duration': 1232,
|
'duration': 1232,
|
||||||
'title': 'Happy Thanksgiving Miranda',
|
'title': 'Happy Thanksgiving Miranda',
|
||||||
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
||||||
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'requires ffmpeg',
|
'skip_download': 'requires ffmpeg',
|
||||||
@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
||||||
display_id)['videoData']
|
display_id)['videoData']
|
||||||
|
|
||||||
video_id = full_data['activeVideo']['video']
|
display_id = full_data['activeVideo']['video']
|
||||||
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
|
||||||
|
video_id = compat_str(video_data['mediaId'])
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'url': video_data['images']['thumb'],
|
'url': video_data['images']['thumb'],
|
||||||
}, {
|
}, {
|
||||||
'url': video_data['images']['poster'],
|
'url': video_data['images']['poster'],
|
||||||
}]
|
}]
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
video_data['mediaUrl'], video_id, ext='mp4')
|
|
||||||
|
|
||||||
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
||||||
video_data.get('pubDate'))
|
video_data.get('pubDate'))
|
||||||
@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
video_data.get('duration'))
|
video_data.get('duration'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'crackle:%s' % video_id,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video_data['title'],
|
'title': video_data['title'],
|
||||||
@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):
|
|||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
'season_number': int_or_none(video_data.get('season')),
|
||||||
|
'episode_number': int_or_none(video_data.get('episode')),
|
||||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
||||||
}
|
}
|
||||||
|
@ -1186,11 +1186,13 @@ class InfoExtractor(object):
|
|||||||
http_count = 0
|
http_count = 0
|
||||||
m3u8_count = 0
|
m3u8_count = 0
|
||||||
|
|
||||||
|
srcs = []
|
||||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||||
for video in videos:
|
for video in videos:
|
||||||
src = video.get('src')
|
src = video.get('src')
|
||||||
if not src:
|
if not src or src in srcs:
|
||||||
continue
|
continue
|
||||||
|
srcs.append(src)
|
||||||
|
|
||||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||||
@ -1222,6 +1224,7 @@ class InfoExtractor(object):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||||
|
src_url = src_url.strip()
|
||||||
|
|
||||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
@ -1267,11 +1270,13 @@ class InfoExtractor(object):
|
|||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||||
|
urls = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||||
src = textstream.get('src')
|
src = textstream.get('src')
|
||||||
if not src:
|
if not src or src in urls:
|
||||||
continue
|
continue
|
||||||
|
urls.append(src)
|
||||||
ext = textstream.get('ext') or determine_ext(src)
|
ext = textstream.get('ext') or determine_ext(src)
|
||||||
if not ext:
|
if not ext:
|
||||||
type_ = textstream.get('type')
|
type_ = textstream.get('type')
|
||||||
@ -1430,12 +1435,16 @@ class InfoExtractor(object):
|
|||||||
base_url = base_url_e.text + base_url
|
base_url = base_url_e.text + base_url
|
||||||
if re.match(r'^https?://', base_url):
|
if re.match(r'^https?://', base_url):
|
||||||
break
|
break
|
||||||
if not re.match(r'^https?://', base_url):
|
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||||
|
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||||
|
mpd_base_url += '/'
|
||||||
base_url = mpd_base_url + base_url
|
base_url = mpd_base_url + base_url
|
||||||
representation_id = representation_attrib.get('id')
|
representation_id = representation_attrib.get('id')
|
||||||
lang = representation_attrib.get('lang')
|
lang = representation_attrib.get('lang')
|
||||||
|
url_el = representation.find(_add_ns('BaseURL'))
|
||||||
|
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||||
f = {
|
f = {
|
||||||
'format_id': mpd_id or representation_id,
|
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||||
'url': base_url,
|
'url': base_url,
|
||||||
'width': int_or_none(representation_attrib.get('width')),
|
'width': int_or_none(representation_attrib.get('width')),
|
||||||
'height': int_or_none(representation_attrib.get('height')),
|
'height': int_or_none(representation_attrib.get('height')),
|
||||||
@ -1446,6 +1455,7 @@ class InfoExtractor(object):
|
|||||||
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
||||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||||
'format_note': 'DASH %s' % content_type,
|
'format_note': 'DASH %s' % content_type,
|
||||||
|
'filesize': filesize,
|
||||||
}
|
}
|
||||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||||
|
95
youtube_dl/extractor/crackle.py
Normal file
95
youtube_dl/extractor/crackle.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class CrackleIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2496419',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Heavy Lies the Head',
|
||||||
|
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
||||||
|
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
||||||
|
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
||||||
|
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||||
|
|
||||||
|
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||||
|
_MEDIA_FILE_SLOTS = {
|
||||||
|
'c544.flv': {
|
||||||
|
'width': 544,
|
||||||
|
'height': 306,
|
||||||
|
},
|
||||||
|
'360p.mp4': {
|
||||||
|
'width': 640,
|
||||||
|
'height': 360,
|
||||||
|
},
|
||||||
|
'480p.mp4': {
|
||||||
|
'width': 852,
|
||||||
|
'height': 478,
|
||||||
|
},
|
||||||
|
'480p_1mbps.mp4': {
|
||||||
|
'width': 852,
|
||||||
|
'height': 478,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
item = self._download_xml(
|
||||||
|
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||||
|
video_id).find('i')
|
||||||
|
title = item.attrib['t']
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
subtitles = {}
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
||||||
|
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||||
|
path = item.attrib.get('p')
|
||||||
|
if path:
|
||||||
|
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||||
|
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||||
|
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||||
|
formats.append({
|
||||||
|
'url': http_base_url + mfs_path,
|
||||||
|
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||||
|
'width': mfs_info['width'],
|
||||||
|
'height': mfs_info['height'],
|
||||||
|
})
|
||||||
|
for cc in item.findall('cc'):
|
||||||
|
locale = cc.attrib.get('l')
|
||||||
|
v = cc.attrib.get('v')
|
||||||
|
if locale and v:
|
||||||
|
if locale not in subtitles:
|
||||||
|
subtitles[locale] = []
|
||||||
|
subtitles[locale] = [{
|
||||||
|
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
||||||
|
'ext': 'ttml',
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': item.attrib.get('d'),
|
||||||
|
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
||||||
|
'series': item.attrib.get('sn'),
|
||||||
|
'season_number': int_or_none(item.attrib.get('se')),
|
||||||
|
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||||
|
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '255180355939',
|
'id': '255180355939',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):
|
|||||||
'duration': 129,
|
'duration': 129,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -29,7 +26,7 @@ class FOXIE(InfoExtractor):
|
|||||||
|
|
||||||
release_url = self._parse_json(self._search_regex(
|
release_url = self._parse_json(self._search_regex(
|
||||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||||
video_id)['release_url'] + '&manifest=m3u'
|
video_id)['release_url'] + '&switch=http'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
|
@ -224,6 +224,20 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# MPD from http://dash-mse-test.appspot.com/media.html
|
||||||
|
{
|
||||||
|
'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
|
||||||
|
'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'car-20120827-manifest',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'car-20120827-manifest',
|
||||||
|
'formats': 'mincount:9',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
},
|
||||||
# google redirect
|
# google redirect
|
||||||
{
|
{
|
||||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
@ -1302,7 +1316,8 @@ class GenericIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||||
'formats': self._parse_mpd_formats(doc, video_id),
|
'formats': self._parse_mpd_formats(
|
||||||
|
doc, video_id, mpd_base_url=url.rpartition('/')[0]),
|
||||||
}
|
}
|
||||||
except compat_xml_parse_error:
|
except compat_xml_parse_error:
|
||||||
pass
|
pass
|
||||||
@ -1413,7 +1428,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded Dailymotion player
|
# Look for embedded Dailymotion player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
matches, lambda m: unescapeHTML(m[1]))
|
matches, lambda m: unescapeHTML(m[1]))
|
||||||
|
@ -10,8 +10,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class HotStarIE(InfoExtractor):
|
class HotStarIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
|
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1000076273',
|
'id': '1000076273',
|
||||||
@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.hotstar.com/1000000515',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
||||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
||||||
|
@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MailRuIE(InfoExtractor):
|
class MailRuIE(InfoExtractor):
|
||||||
@ -34,14 +38,30 @@ class MailRuIE(InfoExtractor):
|
|||||||
'id': '46843144_1263',
|
'id': '46843144_1263',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||||
'timestamp': 1397217632,
|
'timestamp': 1397039888,
|
||||||
'upload_date': '20140411',
|
'upload_date': '20140409',
|
||||||
'uploader': 'hitech',
|
'uploader': 'hitech@corp.mail.ru',
|
||||||
'uploader_id': 'hitech@corp.mail.ru',
|
'uploader_id': 'hitech@corp.mail.ru',
|
||||||
'duration': 245,
|
'duration': 245,
|
||||||
},
|
},
|
||||||
'skip': 'Not accessible from Travis CI server',
|
'skip': 'Not accessible from Travis CI server',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# only available via metaUrl API
|
||||||
|
'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
|
||||||
|
'md5': '3b26d2491c6949d031a32b96bd97c096',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '56664382_502',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': ':8336',
|
||||||
|
'timestamp': 1449094163,
|
||||||
|
'upload_date': '20151202',
|
||||||
|
'uploader': '720pizle@mail.ru',
|
||||||
|
'uploader_id': '720pizle@mail.ru',
|
||||||
|
'duration': 6001,
|
||||||
|
},
|
||||||
|
'skip': 'Not accessible from Travis CI server',
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -51,32 +71,55 @@ class MailRuIE(InfoExtractor):
|
|||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||||
|
|
||||||
video_data = self._download_json(
|
webpage = self._download_webpage(url, video_id)
|
||||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
|
||||||
|
|
||||||
author = video_data['author']
|
video_data = None
|
||||||
uploader = author['name']
|
|
||||||
uploader_id = author.get('id') or author.get('email')
|
page_config = self._parse_json(self._search_regex(
|
||||||
view_count = video_data.get('views_count')
|
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||||
|
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||||
|
if page_config:
|
||||||
|
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
|
||||||
|
if meta_url:
|
||||||
|
video_data = self._download_json(
|
||||||
|
meta_url, video_id, 'Downloading video meta JSON', fatal=False)
|
||||||
|
|
||||||
|
# Fallback old approach
|
||||||
|
if not video_data:
|
||||||
|
video_data = self._download_json(
|
||||||
|
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for f in video_data['videos']:
|
||||||
|
video_url = f.get('url')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
format_id = f.get('key')
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
meta_data = video_data['meta']
|
meta_data = video_data['meta']
|
||||||
content_id = '%s_%s' % (
|
title = remove_end(meta_data['title'], '.mp4')
|
||||||
meta_data.get('accId', ''), meta_data['itemId'])
|
|
||||||
title = meta_data['title']
|
|
||||||
if title.endswith('.mp4'):
|
|
||||||
title = title[:-4]
|
|
||||||
thumbnail = meta_data['poster']
|
|
||||||
duration = meta_data['duration']
|
|
||||||
timestamp = meta_data['timestamp']
|
|
||||||
|
|
||||||
formats = [
|
author = video_data.get('author')
|
||||||
{
|
uploader = author.get('name')
|
||||||
'url': video['url'],
|
uploader_id = author.get('id') or author.get('email')
|
||||||
'format_id': video['key'],
|
view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
|
||||||
'height': int(video['key'].rstrip('p'))
|
|
||||||
} for video in video_data['videos']
|
acc_id = meta_data.get('accId')
|
||||||
]
|
item_id = meta_data.get('itemId')
|
||||||
self._sort_formats(formats)
|
content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
|
||||||
|
|
||||||
|
thumbnail = meta_data.get('poster')
|
||||||
|
duration = int_or_none(meta_data.get('duration'))
|
||||||
|
timestamp = int_or_none(meta_data.get('timestamp'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': content_id,
|
'id': content_id,
|
||||||
|
@ -57,7 +57,7 @@ class NBCIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
# This video has expired but with an escaped embedURL
|
# This video has expired but with an escaped embedURL
|
||||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||||
'skip': 'Expired'
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
US_RATINGS,
|
US_RATINGS,
|
||||||
@ -199,7 +201,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': '2365006249',
|
'id': '2365006249',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||||
'duration': 3190,
|
'duration': 3190,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -213,7 +215,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': '2365297690',
|
'id': '2365297690',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FRONTLINE - Losing Iraq',
|
'title': 'FRONTLINE - Losing Iraq',
|
||||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||||
'duration': 5050,
|
'duration': 5050,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -227,7 +229,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': '2201174722',
|
'id': '2201174722',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||||
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
'description': 'md5:95a19f568689d09a166dff9edada3301',
|
||||||
'duration': 801,
|
'duration': 801,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -237,8 +239,8 @@ class PBSIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2365297708',
|
'id': '2365297708',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
|
||||||
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||||
|
'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
|
||||||
'duration': 6559,
|
'duration': 6559,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
@ -278,7 +280,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'display_id': 'player',
|
'display_id': 'player',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
||||||
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
|
'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
|
||||||
'duration': 682,
|
'duration': 682,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
@ -287,20 +289,19 @@ class PBSIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://video.pbs.org/video/2365367186/',
|
'url': 'http://www.pbs.org/video/2365245528/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2365367186',
|
'id': '2365245528',
|
||||||
'display_id': '2365367186',
|
'display_id': '2365245528',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'To Catch A Comet - Full Episode',
|
'title': 'FRONTLINE - United States of Secrets (Part One)',
|
||||||
'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
|
'description': 'md5:55756bd5c551519cc4b7703e373e217e',
|
||||||
'duration': 3342,
|
'duration': 6851,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
},
|
},
|
||||||
'skip': 'Expired',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
||||||
@ -312,7 +313,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
||||||
'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
|
'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
|
||||||
'duration': 1480,
|
'duration': 1480,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
@ -328,7 +329,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'display_id': 'the-atomic-artists',
|
'display_id': 'the-atomic-artists',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'FRONTLINE - The Atomic Artists',
|
'title': 'FRONTLINE - The Atomic Artists',
|
||||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
|
||||||
'duration': 723,
|
'duration': 723,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
@ -365,10 +366,14 @@ class PBSIE(InfoExtractor):
|
|||||||
webpage, 'upload date', default=None))
|
webpage, 'upload date', default=None))
|
||||||
|
|
||||||
# tabbed frontline videos
|
# tabbed frontline videos
|
||||||
tabbed_videos = re.findall(
|
MULTI_PART_REGEXES = (
|
||||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
|
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
|
||||||
if tabbed_videos:
|
r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
|
||||||
return tabbed_videos, presumptive_id, upload_date
|
)
|
||||||
|
for p in MULTI_PART_REGEXES:
|
||||||
|
tabbed_videos = re.findall(p, webpage)
|
||||||
|
if tabbed_videos:
|
||||||
|
return tabbed_videos, presumptive_id, upload_date
|
||||||
|
|
||||||
MEDIA_ID_REGEXES = [
|
MEDIA_ID_REGEXES = [
|
||||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||||
@ -432,9 +437,21 @@ class PBSIE(InfoExtractor):
|
|||||||
for vid_id in video_id]
|
for vid_id in video_id]
|
||||||
return self.playlist_result(entries, display_id)
|
return self.playlist_result(entries, display_id)
|
||||||
|
|
||||||
info = self._download_json(
|
try:
|
||||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
info = self._download_json(
|
||||||
display_id)
|
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||||
|
display_id, 'Downloading video info JSON')
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
|
||||||
|
raise
|
||||||
|
# videoInfo API may not work for some videos, fallback to portalplayer API
|
||||||
|
player = self._download_webpage(
|
||||||
|
'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
|
||||||
|
info = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||||
|
player, 'video data', default='{}'),
|
||||||
|
display_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||||
@ -493,7 +510,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'description': info['program'].get('description'),
|
'description': info.get('description') or info.get('program', {}).get('description'),
|
||||||
'thumbnail': info.get('image_url'),
|
'thumbnail': info.get('image_url'),
|
||||||
'duration': int_or_none(info.get('duration')),
|
'duration': int_or_none(info.get('duration')),
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
51
youtube_dl/extractor/plays.py
Normal file
51
youtube_dl/extractor/plays.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class PlaysTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
|
||||||
|
'md5': 'dfeac1198506652b5257a62762cec7bc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '56af17f56c95335490',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'When you outplay the Azir wall',
|
||||||
|
'description': 'Posted by Bjergsen',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
content = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'R\.bindContent\(({.+?})\);', webpage,
|
||||||
|
'content'), video_id)['content']
|
||||||
|
mpd_url, sources = re.search(
|
||||||
|
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
|
||||||
|
content).groups()
|
||||||
|
formats = self._extract_mpd_formats(
|
||||||
|
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
|
||||||
|
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
|
||||||
|
formats.append({
|
||||||
|
'url': self._proto_relative_url(format_url),
|
||||||
|
'format_id': 'http-' + format_id,
|
||||||
|
'height': int_or_none(height),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -20,7 +20,6 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_basename,
|
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -283,8 +282,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
first_video_id = None
|
first_video_id = None
|
||||||
duration = None
|
duration = None
|
||||||
for item in entry['media$content']:
|
for item in entry['media$content']:
|
||||||
smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
|
smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
|
||||||
cur_video_id = url_basename(smil_url)
|
cur_video_id = ThePlatformIE._match_id(smil_url)
|
||||||
if first_video_id is None:
|
if first_video_id is None:
|
||||||
first_video_id = cur_video_id
|
first_video_id = cur_video_id
|
||||||
duration = float_or_none(item.get('plfile$duration'))
|
duration = float_or_none(item.get('plfile$duration'))
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.viddler.com/v/43903784',
|
'url': 'http://www.viddler.com/v/43903784',
|
||||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '43903784',
|
'id': '43903784',
|
||||||
'ext': 'mp4',
|
'ext': 'mov',
|
||||||
'title': 'Video Made Easy',
|
'title': 'Video Made Easy',
|
||||||
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||||
'uploader': 'viddler',
|
'uploader': 'viddler',
|
||||||
@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viddler.com/v/4d03aad9/',
|
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||||
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4d03aad9',
|
'id': '4d03aad9',
|
||||||
'ext': 'mp4',
|
'ext': 'ts',
|
||||||
'title': 'WALL-TO-GORTAT',
|
'title': 'WALL-TO-GORTAT',
|
||||||
'upload_date': '20150126',
|
'upload_date': '20150126',
|
||||||
'uploader': 'deadspin',
|
'uploader': 'deadspin',
|
||||||
@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||||
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '221ebbbd',
|
'id': '221ebbbd',
|
||||||
'ext': 'mp4',
|
'ext': 'mov',
|
||||||
'title': 'LETeens-Grammar-snack-third-conditional',
|
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||||
'description': ' ',
|
'description': ' ',
|
||||||
'upload_date': '20140929',
|
'upload_date': '20140929',
|
||||||
@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# secret protected
|
||||||
|
'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '890c0985',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Complete Property Training - Traineeships',
|
||||||
|
'description': ' ',
|
||||||
|
'upload_date': '20130606',
|
||||||
|
'uploader': 'TiffanyBowtell',
|
||||||
|
'timestamp': 1370496993,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
json_url = (
|
query = {
|
||||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
'video_id': video_id,
|
||||||
video_id)
|
'key': 'v0vhrt7bg2xq1vyxhkct',
|
||||||
|
}
|
||||||
|
|
||||||
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
|
secret = qs.get('secret', [None])[0]
|
||||||
|
if secret:
|
||||||
|
query['secret'] = secret
|
||||||
|
|
||||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||||
request = sanitized_Request(json_url, None, headers)
|
request = sanitized_Request(
|
||||||
|
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
|
||||||
|
% compat_urllib_parse.urlencode(query), None, headers)
|
||||||
data = self._download_json(request, video_id)['video']
|
data = self._download_json(request, video_id)['video']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -57,7 +57,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_xsrft_and_vuid(self, webpage):
|
def _extract_xsrft_and_vuid(self, webpage):
|
||||||
xsrft = self._search_regex(
|
xsrft = self._search_regex(
|
||||||
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||||
webpage, 'login token', group='xsrft')
|
webpage, 'login token', group='xsrft')
|
||||||
vuid = self._search_regex(
|
vuid = self._search_regex(
|
||||||
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
|
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
|
||||||
|
@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor):
|
|||||||
if error_note is not None and '因版权原因无法观看此视频' in error_note:
|
if error_note is not None and '因版权原因无法观看此视频' in error_note:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Youku said: Sorry, this video is available in China only', expected=True)
|
'Youku said: Sorry, this video is available in China only', expected=True)
|
||||||
|
elif error_note and '该视频被设为私密' in error_note:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Youku said: Sorry, this video is private', expected=True)
|
||||||
else:
|
else:
|
||||||
msg = 'Youku server reported error %i' % error.get('code')
|
msg = 'Youku server reported error %i' % error.get('code')
|
||||||
if error_note is not None:
|
if error_note is not None:
|
||||||
|
@ -375,7 +375,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
IE_NAME = 'youtube'
|
IE_NAME = 'youtube'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
|
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'BaW_jenozKc',
|
'id': 'BaW_jenozKc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -441,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
|
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
|
||||||
'note': 'Use the first video ID in the URL',
|
'note': 'Use the first video ID in the URL',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'BaW_jenozKc',
|
'id': 'BaW_jenozKc',
|
||||||
@ -704,6 +704,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
|
||||||
|
'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gVfLd0zydlo',
|
||||||
|
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://vid.plus/FlRa-iH7PGw',
|
'url': 'http://vid.plus/FlRa-iH7PGw',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -1196,9 +1205,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if not self._downloader.params.get('noplaylist'):
|
if not self._downloader.params.get('noplaylist'):
|
||||||
entries = []
|
entries = []
|
||||||
feed_ids = []
|
feed_ids = []
|
||||||
multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
|
multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
|
||||||
for feed in multifeed_metadata_list.split(','):
|
for feed in multifeed_metadata_list.split(','):
|
||||||
feed_data = compat_parse_qs(feed)
|
# Unquote should take place before split on comma (,) since textual
|
||||||
|
# fields may contain comma as well (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/8536)
|
||||||
|
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'Youtube',
|
'ie_key': 'Youtube',
|
||||||
|
@ -56,7 +56,7 @@ from .compat import (
|
|||||||
compiled_regex_type = type(re.compile(''))
|
compiled_regex_type = type(re.compile(''))
|
||||||
|
|
||||||
std_headers = {
|
std_headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
|
||||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
'Accept-Encoding': 'gzip, deflate',
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.02.09'
|
__version__ = '2016.02.13'
|
||||||
|
Reference in New Issue
Block a user