Compare commits
15 Commits
2016.02.09
...
2016.02.10
Author | SHA1 | Date | |
---|---|---|---|
f817d9bec1 | |||
e2effb08a4 | |||
7fcea295c5 | |||
cc799437ea | |||
89d23f37f2 | |||
b92071ef00 | |||
47246ae26c | |||
9c15869c28 | |||
51e9094f4a | |||
5e3a6fec33 | |||
d413095f7e | |||
1bedf4de06 | |||
3967a761f4 | |||
b081350bd9 | |||
16f1430ba6 |
@ -1288,6 +1288,9 @@ class YoutubeDL(object):
|
||||
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
else:
|
||||
# Sanitize format_id from characters used in format selector expression
|
||||
format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
|
||||
format_id = format['format_id']
|
||||
if format_id not in formats_dict:
|
||||
formats_dict[format_id] = []
|
||||
|
@ -1186,6 +1186,7 @@ class InfoExtractor(object):
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
|
||||
src_urls = []
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
@ -1222,6 +1223,9 @@ class InfoExtractor(object):
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
if src_url in src_urls:
|
||||
continue
|
||||
src_urls.append(src_url)
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
@ -1267,11 +1271,13 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
subtitles = {}
|
||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||
src = textstream.get('src')
|
||||
if not src:
|
||||
if not src or src in urls:
|
||||
continue
|
||||
urls.append(src)
|
||||
ext = textstream.get('ext') or determine_ext(src)
|
||||
if not ext:
|
||||
type_ = textstream.get('type')
|
||||
@ -1434,6 +1440,8 @@ class InfoExtractor(object):
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
f = {
|
||||
'format_id': mpd_id or representation_id,
|
||||
'url': base_url,
|
||||
@ -1446,6 +1454,7 @@ class InfoExtractor(object):
|
||||
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
|
@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||
'info_dict': {
|
||||
'id': '255180355939',
|
||||
'ext': 'mp4',
|
||||
@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):
|
||||
'duration': 129,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -29,7 +26,7 @@ class FOXIE(InfoExtractor):
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url'] + '&manifest=m3u'
|
||||
video_id)['release_url'] + '&switch=http'
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
|
@ -10,8 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class HotStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
||||
'info_dict': {
|
||||
'id': '1000076273',
|
||||
@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/1000000515',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
||||
|
@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class MailRuIE(InfoExtractor):
|
||||
@ -34,14 +38,30 @@ class MailRuIE(InfoExtractor):
|
||||
'id': '46843144_1263',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||
'timestamp': 1397217632,
|
||||
'upload_date': '20140411',
|
||||
'uploader': 'hitech',
|
||||
'timestamp': 1397039888,
|
||||
'upload_date': '20140409',
|
||||
'uploader': 'hitech@corp.mail.ru',
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
{
|
||||
# only available via metaUrl API
|
||||
'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
|
||||
'md5': '3b26d2491c6949d031a32b96bd97c096',
|
||||
'info_dict': {
|
||||
'id': '56664382_502',
|
||||
'ext': 'mp4',
|
||||
'title': ':8336',
|
||||
'timestamp': 1449094163,
|
||||
'upload_date': '20151202',
|
||||
'uploader': '720pizle@mail.ru',
|
||||
'uploader_id': '720pizle@mail.ru',
|
||||
'duration': 6001,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -51,32 +71,55 @@ class MailRuIE(InfoExtractor):
|
||||
if not video_id:
|
||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
author = video_data['author']
|
||||
uploader = author['name']
|
||||
uploader_id = author.get('id') or author.get('email')
|
||||
view_count = video_data.get('views_count')
|
||||
video_data = None
|
||||
|
||||
page_config = self._parse_json(self._search_regex(
|
||||
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||
if page_config:
|
||||
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
|
||||
if meta_url:
|
||||
video_data = self._download_json(
|
||||
meta_url, video_id, 'Downloading video meta JSON', fatal=False)
|
||||
|
||||
# Fallback old approach
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
for f in video_data['videos']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = f.get('key')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta_data = video_data['meta']
|
||||
content_id = '%s_%s' % (
|
||||
meta_data.get('accId', ''), meta_data['itemId'])
|
||||
title = meta_data['title']
|
||||
if title.endswith('.mp4'):
|
||||
title = title[:-4]
|
||||
thumbnail = meta_data['poster']
|
||||
duration = meta_data['duration']
|
||||
timestamp = meta_data['timestamp']
|
||||
title = remove_end(meta_data['title'], '.mp4')
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['key'],
|
||||
'height': int(video['key'].rstrip('p'))
|
||||
} for video in video_data['videos']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
author = video_data.get('author')
|
||||
uploader = author.get('name')
|
||||
uploader_id = author.get('id') or author.get('email')
|
||||
view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
|
||||
|
||||
acc_id = meta_data.get('accId')
|
||||
item_id = meta_data.get('itemId')
|
||||
content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
|
||||
|
||||
thumbnail = meta_data.get('poster')
|
||||
duration = int_or_none(meta_data.get('duration'))
|
||||
timestamp = int_or_none(meta_data.get('timestamp'))
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
@ -432,9 +433,20 @@ class PBSIE(InfoExtractor):
|
||||
for vid_id in video_id]
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
player = self._download_webpage(
|
||||
'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
player, 'video data', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
# Fallback to old videoInfo API
|
||||
if not info:
|
||||
info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id)
|
||||
display_id, 'Downloading video info JSON')
|
||||
|
||||
formats = []
|
||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||
|
@ -1,6 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viddler.com/v/43903784',
|
||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
||||
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
|
||||
'info_dict': {
|
||||
'id': '43903784',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mov',
|
||||
'title': 'Video Made Easy',
|
||||
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||
'uploader': 'viddler',
|
||||
@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
||||
'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'ext': 'ts',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'upload_date': '20150126',
|
||||
'uploader': 'deadspin',
|
||||
@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
||||
'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
|
||||
'info_dict': {
|
||||
'id': '221ebbbd',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mov',
|
||||
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||
'description': ' ',
|
||||
'upload_date': '20140929',
|
||||
@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
# secret protected
|
||||
'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
|
||||
'info_dict': {
|
||||
'id': '890c0985',
|
||||
'ext': 'mp4',
|
||||
'title': 'Complete Property Training - Traineeships',
|
||||
'description': ' ',
|
||||
'upload_date': '20130606',
|
||||
'uploader': 'TiffanyBowtell',
|
||||
'timestamp': 1370496993,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
json_url = (
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
||||
video_id)
|
||||
query = {
|
||||
'video_id': video_id,
|
||||
'key': 'v0vhrt7bg2xq1vyxhkct',
|
||||
}
|
||||
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
secret = qs.get('secret', [None])[0]
|
||||
if secret:
|
||||
query['secret'] = secret
|
||||
|
||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||
request = sanitized_Request(json_url, None, headers)
|
||||
request = sanitized_Request(
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
|
||||
% compat_urllib_parse.urlencode(query), None, headers)
|
||||
data = self._download_json(request, video_id)['video']
|
||||
|
||||
formats = []
|
||||
|
@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor):
|
||||
if error_note is not None and '因版权原因无法观看此视频' in error_note:
|
||||
raise ExtractorError(
|
||||
'Youku said: Sorry, this video is available in China only', expected=True)
|
||||
elif error_note and '该视频被设为私密' in error_note:
|
||||
raise ExtractorError(
|
||||
'Youku said: Sorry, this video is private', expected=True)
|
||||
else:
|
||||
msg = 'Youku server reported error %i' % error.get('code')
|
||||
if error_note is not None:
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.02.09'
|
||||
__version__ = '2016.02.10'
|
||||
|
Reference in New Issue
Block a user