Compare commits
136 Commits
2016.01.23
...
2016.02.04
Author | SHA1 | Date | |
---|---|---|---|
|
f1ed3acae5 | ||
|
920d21b9d3 | ||
|
2fb35d1c28 | ||
|
09be85b8dd | ||
|
eadc3ccd50 | ||
|
58be922079 | ||
|
c84d3a557d | ||
|
6ad2b01e14 | ||
|
fd3a1f3d60 | ||
|
87de7069b9 | ||
|
6fba62c87a | ||
|
1df4141196 | ||
|
fae45ede08 | ||
|
4e0cff2a50 | ||
|
0436157b95 | ||
|
ae0db349c1 | ||
|
08411970d5 | ||
|
dc724e0c8b | ||
|
0a5d1ec706 | ||
|
58250eff2b | ||
|
11a4efc505 | ||
|
7537b35fb8 | ||
|
33cc74eeeb | ||
|
f021acee49 | ||
|
abe694ca95 | ||
|
b286f201a8 | ||
|
bd93a12e85 | ||
|
92769650fa | ||
|
dc4fe5c6d7 | ||
|
566bda51f2 | ||
|
f63757ec35 | ||
|
7a0ed06909 | ||
|
9934fe76be | ||
|
a8aad21001 | ||
|
d055bf91cc | ||
|
0e1b1a011d | ||
|
eab3c2895c | ||
|
163da6a484 | ||
|
324916d11a | ||
|
3ccb0655c1 | ||
|
e04398e397 | ||
|
231ea2a3bb | ||
|
b99d88c6a1 | ||
|
189d72d5fd | ||
|
a7aab0c23e | ||
|
a69bee4762 | ||
|
9acd33094d | ||
|
8e7aad2075 | ||
|
ce5879fa14 | ||
|
7b7507d6e1 | ||
|
14823decf3 | ||
|
673fb82e65 | ||
|
181cf24bc0 | ||
|
89f2602880 | ||
|
db9b1dbcd9 | ||
|
e881c4bcab | ||
|
670ad51ade | ||
|
eb6fc7d32a | ||
|
ed1a390583 | ||
|
809e1857c5 | ||
|
7c38af48b9 | ||
|
60ad3eb970 | ||
|
a7685b3a6b | ||
|
8f1fddc816 | ||
|
1bf996fa5c | ||
|
248ae880b6 | ||
|
2d2fa82d17 | ||
|
c94678957f | ||
|
16f38a699f | ||
|
a6c2c24479 | ||
|
b8c9926c0a | ||
|
df374b5222 | ||
|
5ea1eb78f5 | ||
|
5d2c0fd9ba | ||
|
0803753fea | ||
|
2c2f1efdcd | ||
|
b323e1707d | ||
|
09104e9930 | ||
|
5fa1702ca6 | ||
|
17b598d30c | ||
|
53be8894e4 | ||
|
c3deacd562 | ||
|
8ab3fe81d8 | ||
|
2f0a33d8a3 | ||
|
05d0d131a7 | ||
|
c140629995 | ||
|
7d106a65ca | ||
|
0179f6a830 | ||
|
830afe85dc | ||
|
8bf39420b4 | ||
|
71d08b3e29 | ||
|
06ffa33485 | ||
|
874e05975b | ||
|
f5d30d521c | ||
|
e047922be0 | ||
|
83ab8a79cc | ||
|
350cf045d8 | ||
|
68a0ea15b4 | ||
|
2b4f5e68d1 | ||
|
055f417278 | ||
|
70029bc348 | ||
|
cf57433bbd | ||
|
1ac6e794cb | ||
|
a853427427 | ||
|
50e989e263 | ||
|
10e6ed9341 | ||
|
38c84acae5 | ||
|
29f46c2bee | ||
|
39c10a2b6e | ||
|
b913348d5f | ||
|
2b14cb566f | ||
|
b0df5223be | ||
|
ed7cd1e859 | ||
|
f125d9115b | ||
|
a9d5f12fec | ||
|
7f32e5dc35 | ||
|
c3111ab34f | ||
|
9339774af2 | ||
|
b0d21deda9 | ||
|
fab6f0e65b | ||
|
b6c33fd544 | ||
|
fb4b345800 | ||
|
af9c2a07ae | ||
|
ab180fc648 | ||
|
682f8c43b5 | ||
|
f693213567 | ||
|
9165d6bab9 | ||
|
2975fe1a7b | ||
|
de691a498d | ||
|
2e6e742c3c | ||
|
e9bd0f772b | ||
|
77f785076f | ||
|
94278f7202 | ||
|
a0d8d704df | ||
|
f6861ec96f | ||
|
e1a0bfdffe |
1
AUTHORS
1
AUTHORS
@@ -155,3 +155,4 @@ Vignesh Venkat
|
||||
Tom Gijselinck
|
||||
Founder Fang
|
||||
Andrew Alexeyew
|
||||
Saso Bezlaj
|
||||
|
@@ -173,6 +173,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
ffmpeg (experimental)
|
||||
--hls-use-mpegts Use the mpegts container for HLS videos,
|
||||
allowing to play the video while
|
||||
downloading (some players may not be able
|
||||
to play it)
|
||||
--external-downloader COMMAND Use the specified external downloader.
|
||||
Currently supports
|
||||
aria2c,axel,curl,httpie,wget
|
||||
@@ -451,6 +455,8 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
||||
|
||||
Note that some of the aforementioned sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
@@ -55,6 +55,7 @@
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
@@ -90,6 +91,7 @@
|
||||
- **Canvas**
|
||||
- **CBS**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
@@ -133,6 +135,8 @@
|
||||
- **DailymotionCloud**
|
||||
- **daum.net**
|
||||
- **daum.net:clip**
|
||||
- **daum.net:playlist**
|
||||
- **daum.net:user**
|
||||
- **DBTV**
|
||||
- **DCN**
|
||||
- **dcn:live**
|
||||
@@ -180,6 +184,7 @@
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **facebook**
|
||||
- **facebook:post**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **Fczenit**
|
||||
@@ -314,6 +319,7 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **MakerTV**
|
||||
- **Malemotion**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **metacafe**
|
||||
@@ -506,6 +512,7 @@
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
- **schooltv**
|
||||
- **SciVee**
|
||||
- **screen.yahoo:search**: Yahoo screen search
|
||||
- **Screencast**
|
||||
|
@@ -14,6 +14,7 @@ from test.helper import FakeYDL, assertRegexpMatches
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_str, compat_urllib_error
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.postprocessor.common import PostProcessor
|
||||
from youtube_dl.utils import ExtractorError, match_filter_func
|
||||
|
||||
@@ -221,6 +222,16 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||
|
||||
formats = [
|
||||
{'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
||||
@@ -237,6 +248,17 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def format_info(f_id):
|
||||
info = YoutubeIE._formats[f_id].copy()
|
||||
|
||||
# XXX: In real cases InfoExtractor._parse_mpd() fills up 'acodec'
|
||||
# and 'vcodec', while in tests such information is incomplete since
|
||||
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
|
||||
# test_YoutubeDL.test_youtube_format_selection is broken without
|
||||
# this fix
|
||||
if 'acodec' in info and 'vcodec' not in info:
|
||||
info['vcodec'] = 'none'
|
||||
elif 'vcodec' in info and 'acodec' not in info:
|
||||
info['acodec'] = 'none'
|
||||
|
||||
info['format_id'] = f_id
|
||||
info['url'] = 'url:' + f_id
|
||||
return info
|
||||
@@ -636,6 +658,42 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
|
||||
|
||||
def test_do_not_override_ie_key_in_url_transparent(self):
|
||||
ydl = YDL()
|
||||
|
||||
class Foo1IE(InfoExtractor):
|
||||
_VALID_URL = r'foo1:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'foo2:',
|
||||
'ie_key': 'Foo2',
|
||||
}
|
||||
|
||||
class Foo2IE(InfoExtractor):
|
||||
_VALID_URL = r'foo2:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'foo3:',
|
||||
'ie_key': 'Foo3',
|
||||
}
|
||||
|
||||
class Foo3IE(InfoExtractor):
|
||||
_VALID_URL = r'foo3:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return _make_result([{'url': TEST_URL}])
|
||||
|
||||
ydl.add_info_extractor(Foo1IE(ydl))
|
||||
ydl.add_info_extractor(Foo2IE(ydl))
|
||||
ydl.add_info_extractor(Foo3IE(ydl))
|
||||
ydl.extract_info('foo1:')
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -56,7 +56,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||
|
@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
|
||||
NPOIE,
|
||||
ComedyCentralIE,
|
||||
NRKTVIE,
|
||||
RaiIE,
|
||||
RaiTVIE,
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
@@ -65,16 +65,16 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
for lang in ['it', 'fr', 'de']:
|
||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||
self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
|
||||
for lang in ['fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
def test_youtube_subtitles_ttml_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'sbv'
|
||||
self.DL.params['subtitlesformat'] = 'ttml'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
|
||||
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
@@ -260,7 +260,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
||||
|
||||
class TestRaiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
IE = RaiIE
|
||||
IE = RaiTVIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
|
@@ -34,7 +34,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = result['entries']
|
||||
entries = list(result['entries'])
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
@@ -263,7 +263,7 @@ class YoutubeDL(object):
|
||||
the downloader (see youtube_dl/downloader/common.py):
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||
xattr_set_filesize, external_downloader_args.
|
||||
xattr_set_filesize, external_downloader_args, hls_use_mpegts.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
@@ -707,7 +707,6 @@ class YoutubeDL(object):
|
||||
It will also download the videos if 'download'.
|
||||
Returns the resolved ie_result.
|
||||
"""
|
||||
|
||||
result_type = ie_result.get('_type', 'video')
|
||||
|
||||
if result_type in ('url', 'url_transparent'):
|
||||
@@ -736,7 +735,7 @@ class YoutubeDL(object):
|
||||
|
||||
force_properties = dict(
|
||||
(k, v) for k, v in ie_result.items() if v is not None)
|
||||
for f in ('_type', 'url'):
|
||||
for f in ('_type', 'url', 'ie_key'):
|
||||
if f in force_properties:
|
||||
del force_properties[f]
|
||||
new_result = info.copy()
|
||||
@@ -906,7 +905,7 @@ class YoutubeDL(object):
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9_-]+)
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.search(filter_spec)
|
||||
|
@@ -369,6 +369,7 @@ def _real_main(argv=None):
|
||||
'no_color': opts.no_color,
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'postprocessor_args': postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
|
@@ -45,6 +45,7 @@ class FileDownloader(object):
|
||||
(experimental)
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
|
@@ -273,15 +273,21 @@ class F4mFD(FragmentFD):
|
||||
return fragments_list
|
||||
|
||||
def _parse_bootstrap_node(self, node, base_url):
|
||||
if node.text is None:
|
||||
# Sometimes non empty inline bootstrap info can be specified along
|
||||
# with bootstrap url attribute (e.g. dummy inline bootstrap info
|
||||
# contains whitespace characters in [1]). We will prefer bootstrap
|
||||
# url over inline bootstrap info when present.
|
||||
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
|
||||
bootstrap_url = node.get('url')
|
||||
if bootstrap_url:
|
||||
bootstrap_url = compat_urlparse.urljoin(
|
||||
base_url, node.attrib['url'])
|
||||
base_url, bootstrap_url)
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
else:
|
||||
bootstrap_url = None
|
||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
return (boot_info, bootstrap_url)
|
||||
return boot_info, bootstrap_url
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
@@ -316,7 +322,8 @@ class F4mFD(FragmentFD):
|
||||
metadata = None
|
||||
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
if self.params.get('test', False):
|
||||
test = self.params.get('test', False)
|
||||
if test:
|
||||
# We only download the first fragment
|
||||
fragments_list = fragments_list[:1]
|
||||
total_frags = len(fragments_list)
|
||||
@@ -326,6 +333,7 @@ class F4mFD(FragmentFD):
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': total_frags,
|
||||
'live': live,
|
||||
}
|
||||
|
||||
self._prepare_frag_download(ctx)
|
||||
@@ -380,7 +388,7 @@ class F4mFD(FragmentFD):
|
||||
else:
|
||||
raise
|
||||
|
||||
if not fragments_list and live and bootstrap_url:
|
||||
if not fragments_list and not test and live and bootstrap_url:
|
||||
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
||||
total_frags += len(fragments_list)
|
||||
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
||||
|
@@ -26,7 +26,11 @@ class FragmentFD(FileDownloader):
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
self.to_screen(
|
||||
'[%s] Total fragments: %s'
|
||||
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
|
||||
self.report_destination(ctx['filename'])
|
||||
dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
@@ -74,14 +78,14 @@ class FragmentFD(FileDownloader):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
time_now = time.time()
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
state['elapsed'] = time_now - start
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['frag_index'] += 1
|
||||
@@ -91,9 +95,10 @@ class FragmentFD(FileDownloader):
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size,
|
||||
state['downloaded_bytes'])
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size,
|
||||
state['downloaded_bytes'])
|
||||
state['speed'] = s.get('speed')
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state)
|
||||
|
@@ -39,7 +39,11 @@ class HlsFD(FileDownloader):
|
||||
'-headers',
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||
|
||||
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
||||
args += ['-i', url, '-c', 'copy']
|
||||
if self.params.get('hls_use_mpegts', False):
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
|
@@ -50,7 +50,7 @@ from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .azubu import AzubuIE
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
@@ -90,7 +90,10 @@ from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
from .cbs import CBSIE
|
||||
from .cbsnews import CBSNewsIE
|
||||
from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
@@ -142,6 +145,8 @@ from .dailymotion import (
|
||||
from .daum import (
|
||||
DaumIE,
|
||||
DaumClipIE,
|
||||
DaumPlaylistIE,
|
||||
DaumUserIE,
|
||||
)
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import (
|
||||
@@ -196,7 +201,10 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPostIE,
|
||||
)
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fczenit import FczenitIE
|
||||
@@ -369,6 +377,7 @@ from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .makertv import MakerTVIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
@@ -479,6 +488,7 @@ from .npo import (
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
SchoolTVIE,
|
||||
VPROIE,
|
||||
WNLIE
|
||||
)
|
||||
|
@@ -8,11 +8,7 @@ from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class ACastBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||
|
||||
|
||||
class ACastIE(ACastBaseIE):
|
||||
class ACastIE(InfoExtractor):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||
_TEST = {
|
||||
@@ -23,14 +19,19 @@ class ACastIE(ACastBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||
'timestamp': 1196172000000,
|
||||
'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'duration': 211,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id)
|
||||
cast_data = self._parse_json(self._search_regex(
|
||||
r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'),
|
||||
display_id)['GetAcast/%s/%s' % (channel, display_id)]
|
||||
|
||||
return {
|
||||
'id': compat_str(cast_data['id']),
|
||||
@@ -44,7 +45,7 @@ class ACastIE(ACastBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class ACastChannelIE(ACastBaseIE):
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
||||
_TEST = {
|
||||
@@ -56,6 +57,7 @@ class ACastChannelIE(ACastBaseIE):
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
|
@@ -8,6 +8,8 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
xpath_element,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,7 +33,7 @@ class AllocineIE(InfoExtractor):
|
||||
'id': '19540403',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planes 2 Bande-annonce VF',
|
||||
'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
|
||||
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
@@ -41,7 +43,7 @@ class AllocineIE(InfoExtractor):
|
||||
'id': '19544709',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||
'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
|
||||
'description': 'md5:601d15393ac40f249648ef000720e7e3',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
@@ -59,14 +61,18 @@ class AllocineIE(InfoExtractor):
|
||||
if typ == 'film':
|
||||
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
||||
else:
|
||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
|
||||
|
||||
player_data = json.loads(player)
|
||||
video_id = compat_str(player_data['refMedia'])
|
||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
|
||||
if player:
|
||||
player_data = json.loads(player)
|
||||
video_id = compat_str(player_data['refMedia'])
|
||||
else:
|
||||
model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
|
||||
model_data = self._parse_json(unescapeHTML(model), display_id)
|
||||
video_id = compat_str(model_data['id'])
|
||||
|
||||
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
||||
|
||||
video = xml.find('.//AcVisionVideo').attrib
|
||||
video = xpath_element(xml, './/AcVisionVideo').attrib
|
||||
quality = qualities(['ld', 'md', 'hd'])
|
||||
|
||||
formats = []
|
||||
|
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class AzubuIE(InfoExtractor):
|
||||
@@ -91,3 +95,37 @@ class AzubuIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
user = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
|
||||
user)['data']
|
||||
if info['type'] != 'STREAM':
|
||||
raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
|
||||
|
||||
req = sanitized_Request(
|
||||
'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
|
||||
req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
|
||||
bc_info = self._download_json(req, user)
|
||||
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
|
||||
|
||||
return {
|
||||
'id': info['id'],
|
||||
'title': self._live_title(info['title']),
|
||||
'uploader_id': user,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'thumbnail': bc_info['poster'],
|
||||
}
|
||||
|
@@ -193,6 +193,19 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
|
||||
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
||||
'info_dict': {
|
||||
'id': 'p028bfkj',
|
||||
'ext': 'flv',
|
||||
'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||
'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@@ -482,9 +495,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title')
|
||||
(r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
|
||||
r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
(r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
|
||||
webpage, 'description', default=None)
|
||||
if not description:
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
@@ -1,7 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
@@ -10,7 +16,8 @@ class BpbIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': '0792086e8e2bfbac9cdf27835d5f2093',
|
||||
# md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
@@ -25,13 +32,26 @@ class BpbIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)',
|
||||
webpage, 'video URL')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
|
||||
quality = video_info['quality']
|
||||
video_url = video_info['src']
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'preference': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
@@ -1,16 +1,14 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class CBSNewsIE(InfoExtractor):
|
||||
class CBSNewsIE(ThePlatformIE):
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -31,7 +29,7 @@ class CBSNewsIE(InfoExtractor):
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
'info_dict': {
|
||||
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 205,
|
||||
@@ -42,54 +40,26 @@ class CBSNewsIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = json.loads(self._html_search_regex(
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||
webpage, 'video JSON info'))
|
||||
webpage, 'video JSON info'), video_id)
|
||||
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
title = item.get('articleTitle') or item.get('hed')
|
||||
duration = item.get('duration')
|
||||
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
||||
|
||||
formats = []
|
||||
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
||||
uri = item.get('media' + format_id + 'URI')
|
||||
if not uri:
|
||||
continue
|
||||
uri = remove_start(uri, '{manifest:none}')
|
||||
fmt = {
|
||||
'url': uri,
|
||||
'format_id': format_id,
|
||||
}
|
||||
if uri.startswith('rtmp'):
|
||||
play_path = re.sub(
|
||||
r'{slistFilePath}', '',
|
||||
uri.split('<break>')[-1].split('{break}')[-1])
|
||||
play_path = re.sub(
|
||||
r'{manifest:.+}.*$', '', play_path)
|
||||
fmt.update({
|
||||
'app': 'ondemand?auth=cbs',
|
||||
'play_path': 'mp4:' + play_path,
|
||||
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
|
||||
'page_url': 'http://www.cbsnews.com',
|
||||
'ext': 'flv',
|
||||
})
|
||||
elif uri.endswith('.m3u8'):
|
||||
fmt['ext'] = 'mp4'
|
||||
formats.append(fmt)
|
||||
|
||||
subtitles = {}
|
||||
if 'mpxRefId' in video_info:
|
||||
subtitles['en'] = [{
|
||||
@@ -97,6 +67,17 @@ class CBSNewsIE(InfoExtractor):
|
||||
'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'],
|
||||
}]
|
||||
|
||||
formats = []
|
||||
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
||||
pid = item.get('media' + format_id)
|
||||
if not pid:
|
||||
continue
|
||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -105,3 +86,41 @@ class CBSNewsIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
'ext': 'flv',
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
|
||||
|
||||
hdcore_sign = 'hdcore=3.3.1'
|
||||
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
|
||||
if f4m_formats:
|
||||
for entry in f4m_formats:
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['headline'],
|
||||
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||
'duration': parse_duration(video_info.get('segmentDur')),
|
||||
'formats': f4m_formats,
|
||||
}
|
||||
|
@@ -825,6 +825,12 @@ class InfoExtractor(object):
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found')
|
||||
|
||||
for f in formats:
|
||||
# Automatically determine tbr when missing based on abr and vbr (improves
|
||||
# formats sorting in some cases)
|
||||
if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
|
||||
f['tbr'] = f['abr'] + f['vbr']
|
||||
|
||||
def _formats_key(f):
|
||||
# TODO remove the following workaround
|
||||
from ..utils import determine_ext
|
||||
@@ -1014,6 +1020,18 @@ class InfoExtractor(object):
|
||||
return []
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
# A Media Playlist Tag MUST NOT appear in a Master Playlist
|
||||
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||
# The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
|
||||
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc:
|
||||
return [{
|
||||
'url': m3u8_url,
|
||||
'format_id': m3u8_id,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
@@ -1058,9 +1076,9 @@ class InfoExtractor(object):
|
||||
# TODO: looks like video codec is not always necessarily goes first
|
||||
va_codecs = codecs.split(',')
|
||||
if va_codecs[0]:
|
||||
f['vcodec'] = va_codecs[0].partition('.')[0]
|
||||
f['vcodec'] = va_codecs[0]
|
||||
if len(va_codecs) > 1 and va_codecs[1]:
|
||||
f['acodec'] = va_codecs[1].partition('.')[0]
|
||||
f['acodec'] = va_codecs[1]
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
if resolution:
|
||||
width_str, height_str = resolution.split('x')
|
||||
@@ -1164,6 +1182,7 @@ class InfoExtractor(object):
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
@@ -1203,8 +1222,17 @@ class InfoExtractor(object):
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
|
||||
if len(m3u8_formats) == 1:
|
||||
m3u8_count += 1
|
||||
m3u8_formats[0].update({
|
||||
'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
@@ -1302,6 +1330,83 @@ class InfoExtractor(object):
|
||||
})
|
||||
return entries
|
||||
|
||||
def _download_dash_manifest(self, dash_manifest_url, video_id, fatal=True):
|
||||
return self._download_xml(
|
||||
dash_manifest_url, video_id,
|
||||
note='Downloading DASH manifest',
|
||||
errnote='Could not download DASH manifest',
|
||||
fatal=fatal)
|
||||
|
||||
def _extract_dash_manifest_formats(self, dash_manifest_url, video_id, fatal=True, namespace=None, formats_dict={}):
|
||||
dash_doc = self._download_dash_manifest(dash_manifest_url, video_id, fatal)
|
||||
if dash_doc is False:
|
||||
return []
|
||||
|
||||
return self._parse_dash_manifest(
|
||||
dash_doc, namespace=namespace, formats_dict=formats_dict)
|
||||
|
||||
def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}):
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, namespace)
|
||||
|
||||
formats = []
|
||||
for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')):
|
||||
mime_type = a.attrib.get('mimeType')
|
||||
for r in a.findall(_add_ns('Representation')):
|
||||
mime_type = r.attrib.get('mimeType') or mime_type
|
||||
url_el = r.find(_add_ns('BaseURL'))
|
||||
if mime_type == 'text/vtt':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
|
||||
segment_list = r.find(_add_ns('SegmentList'))
|
||||
format_id = r.attrib['id']
|
||||
video_url = url_el.text if url_el is not None else None
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(r.attrib.get('width')),
|
||||
'height': int_or_none(r.attrib.get('height')),
|
||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
||||
'filesize': filesize,
|
||||
'fps': int_or_none(r.attrib.get('frameRate')),
|
||||
}
|
||||
if segment_list is not None:
|
||||
initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL']
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == format_id)
|
||||
except StopIteration:
|
||||
full_info = formats_dict.get(format_id, {}).copy()
|
||||
full_info.update(f)
|
||||
codecs = r.attrib.get('codecs')
|
||||
if codecs:
|
||||
if mime_type.startswith('video/'):
|
||||
vcodec, acodec = codecs, 'none'
|
||||
else: # mime_type.startswith('audio/')
|
||||
vcodec, acodec = 'none', codecs
|
||||
|
||||
full_info.update({
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.append(full_info)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
|
@@ -68,11 +68,16 @@ class CSpanIE(InfoExtractor):
|
||||
video_type, video_id = matches.groups()
|
||||
video_type = 'clip' if video_type == 'id' else 'program'
|
||||
else:
|
||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||
if senate_isvp_url:
|
||||
title = self._og_search_title(webpage)
|
||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||
m = re.search(r'data-(?P<type>clip|prog)id=["\'](?P<id>\d+)', webpage)
|
||||
if m:
|
||||
video_id = m.group('id')
|
||||
video_type = 'program' if m.group('type') == 'prog' else 'clip'
|
||||
else:
|
||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||
if senate_isvp_url:
|
||||
title = self._og_search_title(webpage)
|
||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||
if video_type is None or video_id is None:
|
||||
raise ExtractorError('unable to find video id and type')
|
||||
|
||||
@@ -107,6 +112,13 @@ class CSpanIE(InfoExtractor):
|
||||
'height': int_or_none(get_text_attr(quality, 'height')),
|
||||
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
||||
})
|
||||
if not formats:
|
||||
path = unescapeHTML(get_text_attr(f, 'path'))
|
||||
if not path:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': '%s_%d' % (video_id, partnum + 1),
|
||||
|
@@ -2,17 +2,26 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
xpath_text,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DaumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
|
||||
IE_NAME = 'daum.net'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -23,25 +32,57 @@ class DaumIE(InfoExtractor):
|
||||
'title': '마크 헌트 vs 안토니오 실바',
|
||||
'description': 'Mark Hunt vs Antonio Silva',
|
||||
'upload_date': '20131217',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 2117,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.tvpot.daum.net/v/65139429',
|
||||
'info_dict': {
|
||||
'id': '65139429',
|
||||
'ext': 'mp4',
|
||||
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
|
||||
'description': 'md5:79794514261164ff27e36a21ad229fc5',
|
||||
'upload_date': '20150604',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 154,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
|
||||
'info_dict': {
|
||||
'id': 'vwIpVpCQsT8$',
|
||||
'ext': 'flv',
|
||||
'title': '01-Korean War ( Trouble on the horizon )',
|
||||
'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
|
||||
'upload_date': '20080223',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 249,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||
query = compat_urllib_parse.urlencode({'vid': video_id})
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
'Downloading video info')
|
||||
movie_data = self._download_json(
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
||||
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
||||
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
||||
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
'Downloading video info')
|
||||
|
||||
formats = []
|
||||
for format_el in movie_data['output_list']['output_list']:
|
||||
profile = format_el['profile']
|
||||
@@ -76,8 +117,9 @@ class DaumIE(InfoExtractor):
|
||||
|
||||
|
||||
class DaumClipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
||||
IE_NAME = 'daum.net:clip'
|
||||
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||
@@ -87,11 +129,19 @@ class DaumClipIE(InfoExtractor):
|
||||
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
'upload_date': '20130831',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 3868,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
clip_info = self._download_json(
|
||||
@@ -102,7 +152,7 @@ class DaumClipIE(InfoExtractor):
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
||||
'title': clip_info['title'],
|
||||
'title': unescapeHTML(clip_info['title']),
|
||||
'thumbnail': clip_info.get('thumb_url'),
|
||||
'description': clip_info.get('contents'),
|
||||
'duration': int_or_none(clip_info.get('duration')),
|
||||
@@ -110,3 +160,139 @@ class DaumClipIE(InfoExtractor):
|
||||
'view_count': int_or_none(clip_info.get('play_count')),
|
||||
'ie_key': 'Daum',
|
||||
}
|
||||
|
||||
|
||||
class DaumListIE(InfoExtractor):
|
||||
def _get_entries(self, list_id, list_id_type):
|
||||
name = None
|
||||
entries = []
|
||||
for pagenum in itertools.count(1):
|
||||
list_info = self._download_json(
|
||||
'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
|
||||
pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
|
||||
|
||||
entries.extend([
|
||||
self.url_result(
|
||||
'http://tvpot.daum.net/v/%s' % clip['vid'])
|
||||
for clip in list_info['clip_list']
|
||||
])
|
||||
|
||||
if not name:
|
||||
name = list_info.get('playlist_bean', {}).get('name') or \
|
||||
list_info.get('potInfo', {}).get('name')
|
||||
|
||||
if not list_info.get('has_more'):
|
||||
break
|
||||
|
||||
return name, entries
|
||||
|
||||
def _check_clip(self, url, list_id):
|
||||
query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||
if 'clipid' in query_dict:
|
||||
clip_id = query_dict['clipid'][0]
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
|
||||
return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
|
||||
|
||||
|
||||
class DaumPlaylistIE(DaumListIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
|
||||
IE_NAME = 'daum.net:playlist'
|
||||
_URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s'
|
||||
|
||||
_TESTS = [{
|
||||
'note': 'Playlist url with clipid',
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
|
||||
'info_dict': {
|
||||
'id': '6213966',
|
||||
'title': 'Woorissica Official',
|
||||
},
|
||||
'playlist_mincount': 181
|
||||
}, {
|
||||
'note': 'Playlist url with clipid - noplaylist',
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
|
||||
'info_dict': {
|
||||
'id': '73806844',
|
||||
'ext': 'mp4',
|
||||
'title': '151017 Airport',
|
||||
'upload_date': '20160117',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
clip_result = self._check_clip(url, list_id)
|
||||
if clip_result:
|
||||
return clip_result
|
||||
|
||||
name, entries = self._get_entries(list_id, 'playlistid')
|
||||
|
||||
return self.playlist_result(entries, list_id, name)
|
||||
|
||||
|
||||
class DaumUserIE(DaumListIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
|
||||
IE_NAME = 'daum.net:user'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
|
||||
'info_dict': {
|
||||
'id': 'o2scDLIVbHc0',
|
||||
'title': '마이 리틀 텔레비전',
|
||||
},
|
||||
'playlist_mincount': 213
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
|
||||
'info_dict': {
|
||||
'id': '73801156',
|
||||
'ext': 'mp4',
|
||||
'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
|
||||
'upload_date': '20160117',
|
||||
'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
|
||||
'info_dict': {
|
||||
'id': '6196631',
|
||||
'title': '마이 리틀 텔레비전 - 20160109',
|
||||
},
|
||||
'playlist_count': 11
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
clip_result = self._check_clip(url, list_id)
|
||||
if clip_result:
|
||||
return clip_result
|
||||
|
||||
query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||
if 'playlistid' in query_dict:
|
||||
playlist_id = query_dict['playlistid'][0]
|
||||
return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
|
||||
|
||||
name, entries = self._get_entries(list_id, 'ownerid')
|
||||
|
||||
return self.playlist_result(entries, list_id, name)
|
||||
|
@@ -53,8 +53,8 @@ class ESPNIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'class="video-play-button"[^>]+data-id="(\d+)',
|
||||
webpage, 'video id')
|
||||
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
|
||||
webpage, 'video id', group='id')
|
||||
|
||||
cms = 'espn'
|
||||
if 'data-source="intl"' in webpage:
|
||||
|
@@ -6,9 +6,11 @@ import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
@@ -23,19 +25,30 @@ from ..utils import (
|
||||
|
||||
class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:\w+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
|
||||
(?:v|video_id)=|
|
||||
[^/]+/videos/(?:[^/]+/)?
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?:.*)'''
|
||||
(?:
|
||||
https?://
|
||||
(?:\w+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
video/video\.php|
|
||||
photo\.php|
|
||||
video\.php|
|
||||
video/embed
|
||||
)\?(?:.*?)(?:v|video_id)=|
|
||||
[^/]+/videos/(?:[^/]+/)?
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
'''
|
||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = 'facebook'
|
||||
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||
@@ -57,6 +70,16 @@ class FacebookIE(InfoExtractor):
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
]
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
'md5': '54706e4db4f5ad58fbad82dde1f1213f',
|
||||
'info_dict': {
|
||||
'id': '957955867617029',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||
'uploader': 'Demy de Zeeuw',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@@ -66,6 +89,9 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'facebook:544765982287235',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -136,13 +162,36 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
|
||||
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_data = None
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});\n'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||
if not m:
|
||||
if m:
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data = json.loads(params_raw)['video_data']
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
for item in video_data:
|
||||
format_id = item['stream_type']
|
||||
ret.setdefault(format_id, []).append(item)
|
||||
return ret
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
|
||||
for item in server_js_data['instances']:
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
break
|
||||
|
||||
if not video_data:
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
if m_msg is not None:
|
||||
raise ExtractorError(
|
||||
@@ -150,12 +199,9 @@ class FacebookIE(InfoExtractor):
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
|
||||
formats = []
|
||||
for format_id, f in params['video_data'].items():
|
||||
for format_id, f in video_data.items():
|
||||
if not f or not isinstance(f, list):
|
||||
continue
|
||||
for quality in ('sd', 'hd'):
|
||||
@@ -167,9 +213,16 @@ class FacebookIE(InfoExtractor):
|
||||
'url': src,
|
||||
'preference': -10 if format_id == 'progressive' else 0,
|
||||
})
|
||||
dash_manifest = f[0].get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_dash_manifest(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)),
|
||||
namespace='urn:mpeg:dash:schema:mpd:2011'))
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
||||
default=None)
|
||||
@@ -188,3 +241,33 @@ class FacebookIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
class FacebookPostIE(InfoExtractor):
|
||||
IE_NAME = 'facebook:post'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
|
||||
'info_dict': {
|
||||
'id': '544765982287235',
|
||||
'ext': 'mp4',
|
||||
'title': '"What are you doing running in the snow?"',
|
||||
'uploader': 'FailArmy',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, post_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||
for video_id in self._parse_json(
|
||||
self._search_regex(
|
||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
|
||||
webpage, 'video ids', group='ids'),
|
||||
post_id)]
|
||||
|
||||
return self.playlist_result(entries, post_id)
|
||||
|
@@ -6,24 +6,29 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
# YouTube embed video
|
||||
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
'md5': '5208d3a17adeaef829a7861887cb9029',
|
||||
'info_dict': {
|
||||
'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
|
||||
'id': 'HkSQKetlGOU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
||||
'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review',
|
||||
'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ',
|
||||
'uploader': 'Gamekings Vault',
|
||||
'upload_date': '20151123',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# vimeo video
|
||||
'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
|
||||
'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/',
|
||||
'md5': '12bf04dfd238e70058046937657ea68d',
|
||||
'info_dict': {
|
||||
'id': 'the-legend-of-zelda-majoras-mask',
|
||||
@@ -33,7 +38,7 @@ class GamekingsIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||
'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -43,7 +48,11 @@ class GamekingsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
|
||||
r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id')
|
||||
|
||||
# Check if a YouTube embed is used
|
||||
if YoutubeIE.suitable(playlist_id):
|
||||
return self.url_result(playlist_id, ie='Youtube')
|
||||
|
||||
playlist = self._download_xml(
|
||||
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
||||
|
@@ -1229,19 +1229,24 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = head_response.headers.get('Content-Type', '')
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
||||
if m:
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
formats = []
|
||||
if m.group('format_id').endswith('mpegurl'):
|
||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'direct': True,
|
||||
'formats': [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}],
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
@@ -1819,6 +1824,17 @@ class GenericIE(InfoExtractor):
|
||||
if digiteka_url:
|
||||
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
return self.url_result('limelight:%s:%s' % (
|
||||
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||
|
@@ -21,6 +21,18 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
}
|
||||
}, {
|
||||
# missing description
|
||||
'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears',
|
||||
'info_dict': {
|
||||
'id': 'BA-pQFBG8HZ',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'britneyspears',
|
||||
'title': 'Video by britneyspears',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
@@ -32,8 +44,8 @@ class InstagramIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
|
||||
fatal=False)
|
||||
desc = self._search_regex(
|
||||
r'"caption":"(.+?)"', webpage, 'description', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -2,12 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class KickStarterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
|
||||
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description',
|
||||
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
||||
'info_dict': {
|
||||
'id': '1404461844',
|
||||
@@ -27,7 +28,8 @@ class KickStarterIE(InfoExtractor):
|
||||
'uploader_id': 'pebble',
|
||||
'uploader': 'Pebble Technology',
|
||||
'title': 'Pebble iOS Notifications',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
|
||||
'info_dict': {
|
||||
@@ -43,7 +45,7 @@ class KickStarterIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>\s*(.*?)(?:\s*— Kickstarter)?\s*</title>',
|
||||
r'<title>\s*(.*?)(?:\s*—\s*Kickstarter)?\s*</title>',
|
||||
webpage, 'title')
|
||||
video_url = self._search_regex(
|
||||
r'data-video-url="(.*?)"',
|
||||
@@ -52,7 +54,7 @@ class KickStarterIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Generic',
|
||||
'url': url,
|
||||
'url': smuggle_url(url, {'to_generic': True}),
|
||||
'title': title,
|
||||
}
|
||||
|
||||
|
@@ -31,6 +31,10 @@ class KuwoBaseIE(InfoExtractor):
|
||||
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
)
|
||||
|
||||
if song_url == 'IPDeny':
|
||||
raise ExtractorError('This song is blocked in this region', expected=True)
|
||||
|
||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
|
@@ -5,11 +5,13 @@ import datetime
|
||||
import re
|
||||
import time
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -258,6 +260,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
|
||||
'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
|
||||
'info_dict': {
|
||||
'id': 'p7jnfw5hw9_ec93197892',
|
||||
'ext': 'mp4',
|
||||
@@ -265,6 +268,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
|
||||
'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
|
||||
'info_dict': {
|
||||
'id': 'p7jnfw5hw9_187060b6fd',
|
||||
'ext': 'mp4',
|
||||
@@ -272,21 +276,37 @@ class LetvCloudIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
uu_mobj = re.search('uu=([\w]+)', url)
|
||||
vu_mobj = re.search('vu=([\w]+)', url)
|
||||
@staticmethod
|
||||
def sign_data(obj):
|
||||
if obj['cf'] == 'flash':
|
||||
salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
|
||||
items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
|
||||
elif obj['cf'] == 'html5':
|
||||
salt = 'fbeh5player12c43eccf2bec3300344'
|
||||
items = ['cf', 'ran', 'uu', 'bver', 'vu']
|
||||
input_data = ''.join([item + obj[item] for item in items]) + salt
|
||||
obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()
|
||||
|
||||
if not uu_mobj or not vu_mobj:
|
||||
raise ExtractorError('Invalid URL: %s' % url, expected=True)
|
||||
def _get_formats(self, cf, uu, vu, media_id):
|
||||
def get_play_json(cf, timestamp):
|
||||
data = {
|
||||
'cf': cf,
|
||||
'ver': '2.2',
|
||||
'bver': 'firefox44.0',
|
||||
'format': 'json',
|
||||
'uu': uu,
|
||||
'vu': vu,
|
||||
'ran': compat_str(timestamp),
|
||||
}
|
||||
self.sign_data(data)
|
||||
return self._download_json(
|
||||
'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data),
|
||||
media_id, 'Downloading playJson data for type %s' % cf)
|
||||
|
||||
uu = uu_mobj.group(1)
|
||||
vu = vu_mobj.group(1)
|
||||
media_id = uu + '_' + vu
|
||||
|
||||
play_json_req = sanitized_Request(
|
||||
'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' +
|
||||
'uu=' + uu + '&vu=' + vu)
|
||||
play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data')
|
||||
play_json = get_play_json(cf, time.time())
|
||||
# The server time may be different from local time
|
||||
if play_json.get('code') == 10071:
|
||||
play_json = get_play_json(cf, play_json['timestamp'])
|
||||
|
||||
if not play_json.get('data'):
|
||||
if play_json.get('message'):
|
||||
@@ -312,6 +332,21 @@ class LetvCloudIE(InfoExtractor):
|
||||
'width': int_or_none(play_url.get('vwidth')),
|
||||
'height': int_or_none(play_url.get('vheight')),
|
||||
})
|
||||
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
uu_mobj = re.search('uu=([\w]+)', url)
|
||||
vu_mobj = re.search('vu=([\w]+)', url)
|
||||
|
||||
if not uu_mobj or not vu_mobj:
|
||||
raise ExtractorError('Invalid URL: %s' % url, expected=True)
|
||||
|
||||
uu = uu_mobj.group(1)
|
||||
vu = vu_mobj.group(1)
|
||||
media_id = uu + '_' + vu
|
||||
|
||||
formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -40,7 +40,8 @@ class LimelightBaseIE(InfoExtractor):
|
||||
if not stream_url:
|
||||
continue
|
||||
if '.f4m' in stream_url:
|
||||
formats.extend(self._extract_f4m_formats(stream_url, video_id))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, fatal=False))
|
||||
else:
|
||||
fmt = {
|
||||
'url': stream_url,
|
||||
@@ -72,8 +73,8 @@ class LimelightBaseIE(InfoExtractor):
|
||||
format_id = mobile_url.get('targetMediaPlatform')
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=-1, m3u8_id=format_id))
|
||||
media_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
|
55
youtube_dl/extractor/matchtv.py
Normal file
55
youtube_dl/extractor/matchtv.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MatchTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://matchtv\.ru/?#live-player'
|
||||
_TEST = {
|
||||
'url': 'http://matchtv.ru/#live-player',
|
||||
'info_dict': {
|
||||
'id': 'matchtv-live',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'matchtv-live'
|
||||
request = sanitized_Request(
|
||||
'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({
|
||||
'ts': '',
|
||||
'quality': 'SD',
|
||||
'contentId': '561d2c0df7159b37178b4567',
|
||||
'sign': '',
|
||||
'includeHighlights': '0',
|
||||
'userId': '',
|
||||
'sessionId': random.randint(1, 1000000000),
|
||||
'contentType': 'channel',
|
||||
'timeShift': '0',
|
||||
'platform': 'portal',
|
||||
}),
|
||||
headers={
|
||||
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
|
||||
})
|
||||
video_url = self._download_json(request, video_id)['data']['videoUrl']
|
||||
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title('Матч ТВ - Прямой эфир'),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
@@ -18,13 +18,17 @@ class NBAIE(InfoExtractor):
|
||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
'timestamp': 1354638466,
|
||||
'upload_date': '20121204',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||
'only_matching': True,
|
||||
@@ -68,7 +72,7 @@ class NBAIE(InfoExtractor):
|
||||
if video_url.startswith('/'):
|
||||
continue
|
||||
if video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||
elif video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
|
@@ -19,32 +19,39 @@ class NBCIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||
# md5 checksum is not stable
|
||||
'info_dict': {
|
||||
'id': 'c9xnCo0YPOPH',
|
||||
'ext': 'flv',
|
||||
'id': '112966',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
|
||||
'info_dict': {
|
||||
'id': 'XwU9KZkp98TH',
|
||||
'id': '176',
|
||||
'ext': 'flv',
|
||||
'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
|
||||
'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
'skip': '404 Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||
'info_dict': {
|
||||
'id': '8iUuyzWDdYUZ',
|
||||
'ext': 'flv',
|
||||
'id': '2832821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Star Wars Teaser',
|
||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
},
|
||||
{
|
||||
@@ -66,7 +73,11 @@ class NBCIE(InfoExtractor):
|
||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
return self.url_result(smuggle_url(theplatform_url, {'source_url': url}))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(theplatform_url, {'source_url': url}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
@@ -193,7 +193,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||
src, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||
else:
|
||||
quality = f.get('quality')
|
||||
ff = {
|
||||
|
@@ -189,7 +189,7 @@ class NPOIE(NPOBaseIE):
|
||||
if not video_url:
|
||||
continue
|
||||
if format_id == 'adaptive':
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
@@ -406,6 +406,38 @@ class NPORadioFragmentIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class SchoolTVIE(InfoExtractor):
|
||||
IE_NAME = 'schooltv'
|
||||
_VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
|
||||
'info_dict': {
|
||||
'id': 'WO_NTR_429477',
|
||||
'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
|
||||
'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
|
||||
},
|
||||
'params': {
|
||||
# Skip because of m3u8 download
|
||||
'skip_download': True
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'NPO',
|
||||
'url': 'npo:%s' % video_id,
|
||||
'display_id': display_id
|
||||
}
|
||||
|
||||
|
||||
class VPROIE(NPOIE):
|
||||
IE_NAME = 'vpro'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||
|
@@ -133,26 +133,32 @@ class NRKTVIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'upload_date': '20140523',
|
||||
'duration': 1741.52,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
||||
'info_dict': {
|
||||
'id': 'mdfp15000514',
|
||||
'ext': 'flv',
|
||||
'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||
'upload_date': '20140524',
|
||||
'duration': 4605.0,
|
||||
'duration': 4605.08,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class OdnoklassnikiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
@@ -69,6 +69,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.ok.ru/videoembed/20648036891',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://m.ok.ru/video/20079905452',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mobile.ok.ru/video/20079905452',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -71,7 +71,7 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source['type'] == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id))
|
||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
|
||||
else:
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
@@ -107,7 +107,11 @@ class TeamFourIE(InfoExtractor):
|
||||
'upload_date': '20130401',
|
||||
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
|
||||
'title': 'A Moment With TFS Episode 4',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -53,17 +53,25 @@ class SenateISVPIE(InfoExtractor):
|
||||
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
||||
'info_dict': {
|
||||
'id': 'commerce011514',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player'
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
||||
# checksum differs each time
|
||||
|
@@ -34,11 +34,11 @@ class SpankBangIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
} for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
|
||||
} for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1>(.+?)</h1>', webpage, 'title')
|
||||
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'class="desc"[^>]*>([^<]+)',
|
||||
webpage, 'description', default=None)
|
||||
|
@@ -70,14 +70,11 @@ class SRGSSRIE(InfoExtractor):
|
||||
asset_url, media_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
else:
|
||||
ext = None
|
||||
if protocol == 'RTMP':
|
||||
ext = self._search_regex(r'([a-z0-9]+):[^/]+', asset_url, 'ext')
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': asset_url,
|
||||
'preference': preference(quality),
|
||||
'ext': ext,
|
||||
'ext': 'flv' if protocol == 'RTMP' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -17,18 +17,21 @@ class TV2IE(InfoExtractor):
|
||||
_VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tv2.no/v/916509/',
|
||||
'md5': '9cb9e3410b18b515d71892f27856e9b1',
|
||||
'info_dict': {
|
||||
'id': '916509',
|
||||
'ext': 'flv',
|
||||
'title': 'Se Gryttens hyllest av Steven Gerrard',
|
||||
'ext': 'mp4',
|
||||
'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
|
||||
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
|
||||
'timestamp': 1431715610,
|
||||
'upload_date': '20150515',
|
||||
'duration': 156.967,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -3,22 +3,20 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class VevoIE(InfoExtractor):
|
||||
"""
|
||||
'''
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE and MySpaceIE)
|
||||
"""
|
||||
'''
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
|
||||
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
||||
@@ -28,19 +26,15 @@ class VevoIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
"md5": "95ee28ee45e70130e3ab02b0f579ae23",
|
||||
'md5': '95ee28ee45e70130e3ab02b0f579ae23',
|
||||
'info_dict': {
|
||||
'id': 'GB1101300280',
|
||||
'ext': 'mp4',
|
||||
"upload_date": "20130624",
|
||||
"uploader": "Hurts",
|
||||
"title": "Somebody to Die For",
|
||||
"duration": 230.12,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
# timestamp and upload_date are often incorrect; seem to change randomly
|
||||
'timestamp': int,
|
||||
}
|
||||
'title': 'Somebody to Die For',
|
||||
'upload_date': '20130624',
|
||||
'uploader': 'Hurts',
|
||||
'timestamp': 1372057200,
|
||||
},
|
||||
}, {
|
||||
'note': 'v3 SMIL format',
|
||||
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
||||
@@ -48,28 +42,23 @@ class VevoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'USUV71302923',
|
||||
'ext': 'mp4',
|
||||
'title': 'I Wish I Could Break Your Heart',
|
||||
'upload_date': '20140219',
|
||||
'uploader': 'Cassadee Pope',
|
||||
'title': 'I Wish I Could Break Your Heart',
|
||||
'duration': 226.101,
|
||||
'age_limit': 0,
|
||||
'timestamp': int,
|
||||
}
|
||||
'timestamp': 1392796919,
|
||||
},
|
||||
}, {
|
||||
'note': 'Age-limited video',
|
||||
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
|
||||
'info_dict': {
|
||||
'id': 'USRV81300282',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
'title': 'Tunnel Vision (Explicit)',
|
||||
'upload_date': '20130703',
|
||||
'age_limit': 18,
|
||||
'uploader': 'Justin Timberlake',
|
||||
'upload_date': 're:2013070[34]',
|
||||
'timestamp': int,
|
||||
'timestamp': 1372888800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'true',
|
||||
}
|
||||
}, {
|
||||
'note': 'No video_info',
|
||||
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
|
||||
@@ -77,69 +66,46 @@ class VevoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'USUV71503000',
|
||||
'ext': 'mp4',
|
||||
'title': 'Till I Die - K Camp ft. T.I.',
|
||||
'duration': 193,
|
||||
'title': 'Till I Die',
|
||||
'upload_date': '20151207',
|
||||
'age_limit': 18,
|
||||
'uploader': 'K Camp',
|
||||
'timestamp': 1449468000,
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
}]
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
|
||||
_SOURCE_TYPES = {
|
||||
0: 'youtube',
|
||||
1: 'brightcove',
|
||||
2: 'http',
|
||||
3: 'hls_ios',
|
||||
4: 'hls',
|
||||
5: 'smil', # http
|
||||
7: 'f4m_cc',
|
||||
8: 'f4m_ak',
|
||||
9: 'f4m_l3',
|
||||
10: 'ism',
|
||||
13: 'smil', # rtmp
|
||||
18: 'dash',
|
||||
}
|
||||
_VERSIONS = {
|
||||
0: 'youtube', # only in AuthenticateVideo videoVersions
|
||||
1: 'level3',
|
||||
2: 'akamai',
|
||||
3: 'level3',
|
||||
4: 'amazon',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
req = sanitized_Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
webpage = self._download_webpage(
|
||||
req, None,
|
||||
note='Retrieving oauth token',
|
||||
errnote='Unable to retrieve oauth token',
|
||||
fatal=False)
|
||||
if webpage is False:
|
||||
self._oauth_token = None
|
||||
else:
|
||||
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
|
||||
raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
|
||||
|
||||
self._oauth_token = self._search_regex(
|
||||
r'access_token":\s*"([^"]+)"',
|
||||
webpage, 'access token', fatal=False)
|
||||
|
||||
def _formats_from_json(self, video_info):
|
||||
if not video_info:
|
||||
return []
|
||||
|
||||
last_version = {'version': -1}
|
||||
for version in video_info['videoVersions']:
|
||||
# These are the HTTP downloads, other types are for different manifests
|
||||
if version['sourceType'] == 2:
|
||||
if version['version'] > last_version['version']:
|
||||
last_version = version
|
||||
if last_version['version'] == -1:
|
||||
raise ExtractorError('Unable to extract last version of the video')
|
||||
|
||||
renditions = compat_etree_fromstring(last_version['data'])
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
formats = []
|
||||
# Already sorted from worst to best quality
|
||||
for rend in renditions.findall('rendition'):
|
||||
attr = rend.attrib
|
||||
format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
|
||||
formats.append({
|
||||
'url': attr['url'],
|
||||
'format_id': attr['name'],
|
||||
'format_note': format_note,
|
||||
'height': int(attr['frameheight']),
|
||||
'width': int(attr['frameWidth']),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _formats_from_smil(self, smil_doc):
|
||||
formats = []
|
||||
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
for el in els:
|
||||
src = el.attrib['src']
|
||||
m = re.match(r'''(?xi)
|
||||
(?P<ext>[a-z0-9]+):
|
||||
(?P<path>
|
||||
[/a-z0-9]+ # The directory and main part of the URL
|
||||
_(?P<cbr>[0-9]+)k
|
||||
_(?P<tbr>[0-9]+)k
|
||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
_(?P<vcodec>[a-z0-9]+)
|
||||
_(?P<vbr>[0-9]+)
|
||||
@@ -153,9 +119,10 @@ class VevoIE(InfoExtractor):
|
||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'SMIL_' + m.group('cbr'),
|
||||
'format_id': 'smil_' + m.group('tbr'),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'tbr': int(m.group('tbr')),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'abr': int(m.group('abr')),
|
||||
'ext': m.group('ext'),
|
||||
@@ -164,48 +131,148 @@ class VevoIE(InfoExtractor):
|
||||
})
|
||||
return formats
|
||||
|
||||
def _download_api_formats(self, video_id, video_url):
|
||||
if not self._oauth_token:
|
||||
self._downloader.report_warning(
|
||||
'No oauth token available, skipping API HLS download')
|
||||
return []
|
||||
def _initialize_api(self, video_id):
|
||||
req = sanitized_Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
webpage = self._download_webpage(
|
||||
req, None,
|
||||
note='Retrieving oauth token',
|
||||
errnote='Unable to retrieve oauth token')
|
||||
|
||||
api_url = compat_urlparse.urljoin(video_url, '//apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
|
||||
video_id, self._oauth_token))
|
||||
api_data = self._download_json(
|
||||
api_url, video_id,
|
||||
note='Downloading HLS formats',
|
||||
errnote='Failed to download HLS format list', fatal=False)
|
||||
if api_data is None:
|
||||
return []
|
||||
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
|
||||
raise ExtractorError(
|
||||
'%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
|
||||
|
||||
m3u8_url = api_data[0]['url']
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||
preference=0)
|
||||
auth_info = self._parse_json(webpage, video_id)
|
||||
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
|
||||
|
||||
def _call_api(self, path, video_id, note, errnote, fatal=True):
|
||||
return self._download_json(self._api_url_template % path, video_id, note, errnote)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = None
|
||||
|
||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||
response = self._download_json(json_url, video_id)
|
||||
video_info = response['video'] or {}
|
||||
|
||||
if not video_info and response.get('statusCode') != 909:
|
||||
if 'statusMessage' in response:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)
|
||||
raise ExtractorError('Unable to extract videos')
|
||||
response = self._download_json(
|
||||
json_url, video_id, 'Downloading video info', 'Unable to download info')
|
||||
video_info = response.get('video') or {}
|
||||
video_versions = video_info.get('videoVersions')
|
||||
uploader = None
|
||||
timestamp = None
|
||||
view_count = None
|
||||
formats = []
|
||||
|
||||
if not video_info:
|
||||
if url.startswith('vevo:'):
|
||||
raise ExtractorError('Please specify full Vevo URL for downloading', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if response.get('statusCode') != 909:
|
||||
ytid = response.get('errorInfo', {}).get('ytid')
|
||||
if ytid:
|
||||
self.report_warning(
|
||||
'Video is geoblocked, trying with the YouTube video %s' % ytid)
|
||||
return self.url_result(ytid, 'Youtube', ytid)
|
||||
|
||||
title = video_info.get('title') or self._og_search_title(webpage)
|
||||
if 'statusMessage' in response:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, response['statusMessage']), expected=True)
|
||||
raise ExtractorError('Unable to extract videos')
|
||||
|
||||
formats = self._formats_from_json(video_info)
|
||||
self._initialize_api(video_id)
|
||||
video_info = self._call_api(
|
||||
'video/%s' % video_id, video_id, 'Downloading api video info',
|
||||
'Failed to download video info')
|
||||
|
||||
video_versions = self._call_api(
|
||||
'video/%s/streams' % video_id, video_id,
|
||||
'Downloading video versions info',
|
||||
'Failed to download video versions info')
|
||||
|
||||
timestamp = parse_iso8601(video_info.get('releaseDate'))
|
||||
artists = video_info.get('artists')
|
||||
if artists:
|
||||
uploader = artists[0]['name']
|
||||
view_count = int_or_none(video_info.get('views', {}).get('total'))
|
||||
|
||||
for video_version in video_versions:
|
||||
version = self._VERSIONS.get(video_version['version'])
|
||||
version_url = video_version.get('url')
|
||||
if not version_url:
|
||||
continue
|
||||
|
||||
if '.mpd' in version_url or '.ism' in version_url:
|
||||
continue
|
||||
elif '.m3u8' in version_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
version_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls-%s' % version,
|
||||
note='Downloading %s m3u8 information' % version,
|
||||
errnote='Failed to download %s m3u8 information' % version,
|
||||
fatal=False))
|
||||
else:
|
||||
m = re.search(r'''(?xi)
|
||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
_(?P<vcodec>[a-z0-9]+)
|
||||
_(?P<vbr>[0-9]+)
|
||||
_(?P<acodec>[a-z0-9]+)
|
||||
_(?P<abr>[0-9]+)
|
||||
\.(?P<ext>[a-z0-9]+)''', version_url)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': version_url,
|
||||
'format_id': 'http-%s-%s' % (version, video_version['quality']),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'abr': int(m.group('abr')),
|
||||
'ext': m.group('ext'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
else:
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'/Date\((\d+)\)/',
|
||||
video_info['releaseDate'], 'release date', fatal=False),
|
||||
scale=1000)
|
||||
artists = video_info.get('mainArtists')
|
||||
if artists:
|
||||
uploader = artists[0]['artistName']
|
||||
|
||||
smil_parsed = False
|
||||
for video_version in video_info['videoVersions']:
|
||||
version = self._VERSIONS.get(video_version['version'])
|
||||
if version == 'youtube':
|
||||
continue
|
||||
else:
|
||||
source_type = self._SOURCE_TYPES.get(video_version['sourceType'])
|
||||
renditions = compat_etree_fromstring(video_version['data'])
|
||||
if source_type == 'http':
|
||||
for rend in renditions.findall('rendition'):
|
||||
attr = rend.attrib
|
||||
formats.append({
|
||||
'url': attr['url'],
|
||||
'format_id': 'http-%s-%s' % (version, attr['name']),
|
||||
'height': int_or_none(attr.get('frameheight')),
|
||||
'width': int_or_none(attr.get('frameWidth')),
|
||||
'tbr': int_or_none(attr.get('totalBitrate')),
|
||||
'vbr': int_or_none(attr.get('videoBitrate')),
|
||||
'abr': int_or_none(attr.get('audioBitrate')),
|
||||
'vcodec': attr.get('videoCodec'),
|
||||
'acodec': attr.get('audioCodec'),
|
||||
})
|
||||
elif source_type == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
renditions.find('rendition').attrib['url'], video_id,
|
||||
'mp4', 'm3u8_native', m3u8_id='hls-%s' % version,
|
||||
note='Downloading %s m3u8 information' % version,
|
||||
errnote='Failed to download %s m3u8 information' % version,
|
||||
fatal=False))
|
||||
elif source_type == 'smil' and version == 'level3' and not smil_parsed:
|
||||
formats.extend(self._extract_smil_formats(
|
||||
renditions.find('rendition').attrib['url'], video_id, False))
|
||||
smil_parsed = True
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video_info['title']
|
||||
|
||||
is_explicit = video_info.get('isExplicit')
|
||||
if is_explicit is True:
|
||||
@@ -215,43 +282,16 @@ class VevoIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
# Download via HLS API
|
||||
formats.extend(self._download_api_formats(video_id, url))
|
||||
|
||||
# Download SMIL
|
||||
smil_blocks = sorted((
|
||||
f for f in video_info.get('videoVersions', [])
|
||||
if f['sourceType'] == 13),
|
||||
key=lambda f: f['version'])
|
||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||
if smil_blocks:
|
||||
smil_url_m = self._search_regex(
|
||||
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
||||
default=None)
|
||||
if smil_url_m is not None:
|
||||
smil_url = smil_url_m
|
||||
if smil_url:
|
||||
smil_doc = self._download_smil(smil_url, video_id, fatal=False)
|
||||
if smil_doc:
|
||||
formats.extend(self._formats_from_smil(smil_doc))
|
||||
|
||||
self._sort_formats(formats)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'/Date\((\d+)\)/',
|
||||
video_info['launchDate'], 'launch date', fatal=False),
|
||||
scale=1000) if video_info else None
|
||||
|
||||
duration = video_info.get('duration') or int_or_none(
|
||||
self._html_search_meta('video:duration', webpage))
|
||||
duration = video_info.get('duration')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_info.get('imageUrl'),
|
||||
'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': video_info['mainArtists'][0]['artistName'] if video_info else None,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -86,10 +86,9 @@ class VGTVIE(XstreamIE):
|
||||
{
|
||||
# streamType: wasLive
|
||||
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
|
||||
'md5': '458f4841239dab414343b50e5af8869c',
|
||||
'info_dict': {
|
||||
'id': '113063',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'V75 fra Solvalla 30.05.15',
|
||||
'description': 'md5:b3743425765355855f88e096acc93231',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
@@ -98,6 +97,10 @@ class VGTVIE(XstreamIE):
|
||||
'upload_date': '20150530',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class VidziIE(InfoExtractor):
|
||||
@@ -13,6 +14,11 @@ class VidziIE(InfoExtractor):
|
||||
'id': 'cghql9yq6emu',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
|
||||
'uploader': 'vidzi.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -20,19 +26,14 @@ class VidziIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_host = self._html_search_regex(
|
||||
r'id=\'vplayer\'><img src="http://(.*?)/i', webpage,
|
||||
'video host')
|
||||
video_hash = self._html_search_regex(
|
||||
r'\|([a-z0-9]+)\|hls\|type', webpage, 'video_hash')
|
||||
ext = self._html_search_regex(
|
||||
r'\|tracks\|([a-z0-9]+)\|', webpage, 'video ext')
|
||||
video_url = 'http://' + video_host + '/' + video_hash + '/v.' + ext
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||
|
||||
# Vidzi now uses jwplayer, which can be handled by GenericIE
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'url': smuggle_url(url, {'to_generic': True}),
|
||||
'ie_key': 'Generic',
|
||||
}
|
||||
|
@@ -45,6 +45,10 @@ class ViideaIE(InfoExtractor):
|
||||
'upload_date': '20130627',
|
||||
'duration': 565,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video with invalid direct format links (HTTP 403)
|
||||
'url': 'http://videolectures.net/russir2010_filippova_nlp/',
|
||||
|
@@ -321,7 +321,7 @@ class VKIE(InfoExtractor):
|
||||
class VKUserVideosIE(InfoExtractor):
|
||||
IE_NAME = 'vk:uservideos'
|
||||
IE_DESC = "VK - User's Videos"
|
||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
|
||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
|
||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://vk.com/videos205387401',
|
||||
@@ -333,6 +333,9 @@ class VKUserVideosIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://vk.com/videos-77521',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vk.com/videos-97664626?section=all',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -34,19 +34,20 @@ class XuiteIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# Video with only one format
|
||||
'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2',
|
||||
'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e',
|
||||
'url': 'http://vlog.xuite.net/play/WUxxR2xCLTI1OTI1MDk5LmZsdg==',
|
||||
'md5': '21f7b39c009b5a4615b4463df6eb7a46',
|
||||
'info_dict': {
|
||||
'id': '3441629',
|
||||
'id': '25925099',
|
||||
'ext': 'mp4',
|
||||
'title': '孫燕姿 - 眼淚成詩',
|
||||
'title': 'BigBuckBunny_320x180',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 217.399,
|
||||
'timestamp': 1299383640,
|
||||
'upload_date': '20110306',
|
||||
'uploader': 'Valen',
|
||||
'uploader_id': '10400126',
|
||||
'categories': ['影視娛樂'],
|
||||
'duration': 596.458,
|
||||
'timestamp': 1454242500,
|
||||
'upload_date': '20160131',
|
||||
'uploader': 'yan12125',
|
||||
'uploader_id': '12158353',
|
||||
'categories': ['個人短片'],
|
||||
'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4',
|
||||
},
|
||||
}, {
|
||||
# Video with two formats
|
||||
|
@@ -114,15 +114,13 @@ class YouPornIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']video-description["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'description', default=None)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']videoInfoBy["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>',
|
||||
r'(?s)<div[^>]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>',
|
||||
|
@@ -32,6 +32,7 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
remove_quotes,
|
||||
@@ -180,7 +181,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return
|
||||
|
||||
|
||||
class YoutubeEntryListBaseInfoExtractor(InfoExtractor):
|
||||
class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
# Extract entries from page with "Load more" button
|
||||
def _entries(self, page, playlist_id):
|
||||
more_widget_html = content_html = page
|
||||
@@ -232,7 +233,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
|
||||
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||
def _process_page(self, content):
|
||||
for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
|
||||
for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
|
||||
yield self.url_result(
|
||||
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
|
||||
|
||||
@@ -277,93 +278,93 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
$"""
|
||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||
_formats = {
|
||||
'5': {'ext': 'flv', 'width': 400, 'height': 240},
|
||||
'6': {'ext': 'flv', 'width': 450, 'height': 270},
|
||||
'13': {'ext': '3gp'},
|
||||
'17': {'ext': '3gp', 'width': 176, 'height': 144},
|
||||
'18': {'ext': 'mp4', 'width': 640, 'height': 360},
|
||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
|
||||
'34': {'ext': 'flv', 'width': 640, 'height': 360},
|
||||
'35': {'ext': 'flv', 'width': 854, 'height': 480},
|
||||
'36': {'ext': '3gp', 'width': 320, 'height': 240},
|
||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
|
||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
|
||||
'43': {'ext': 'webm', 'width': 640, 'height': 360},
|
||||
'44': {'ext': 'webm', 'width': 854, 'height': 480},
|
||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720},
|
||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
|
||||
'59': {'ext': 'mp4', 'width': 854, 'height': 480},
|
||||
'78': {'ext': 'mp4', 'width': 854, 'height': 480},
|
||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
|
||||
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
|
||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||
'36': {'ext': '3gp', 'width': 320, 'height': 240, 'acodec': 'aac', 'abr': 32, 'vcodec': 'mp4v'},
|
||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
||||
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
||||
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||
|
||||
|
||||
# 3d videos
|
||||
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
|
||||
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
|
||||
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
|
||||
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
|
||||
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
|
||||
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
|
||||
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
|
||||
# 3D videos
|
||||
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
|
||||
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
|
||||
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
|
||||
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
|
||||
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
|
||||
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
|
||||
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
|
||||
|
||||
# Apple HTTP Live Streaming
|
||||
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
|
||||
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
|
||||
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
|
||||
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
|
||||
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
|
||||
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
|
||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
|
||||
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
||||
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
||||
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
||||
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
|
||||
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
|
||||
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
|
||||
|
||||
# DASH mp4 video
|
||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
|
||||
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
|
||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
|
||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||
|
||||
# Dash mp4 audio
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
|
||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
|
||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
|
||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
||||
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
||||
|
||||
# Dash webm audio with opus inside
|
||||
'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
|
||||
'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
|
||||
'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
|
||||
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
|
||||
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
|
||||
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
|
||||
|
||||
# RTMP (unnamed)
|
||||
'_rtmp': {'protocol': 'rtmp'},
|
||||
@@ -917,7 +918,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if lang in sub_lang_list:
|
||||
continue
|
||||
sub_formats = []
|
||||
for ext in ['sbv', 'vtt', 'srt']:
|
||||
for ext in ['ttml', 'vtt']:
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': lang,
|
||||
'v': video_id,
|
||||
@@ -1034,73 +1035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||
|
||||
def _parse_dash_manifest(
|
||||
self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
|
||||
def decrypt_sig(mobj):
|
||||
s = mobj.group(1)
|
||||
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
||||
return '/signature/%s' % dec_s
|
||||
dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
|
||||
dash_doc = self._download_xml(
|
||||
dash_manifest_url, video_id,
|
||||
note='Downloading DASH manifest',
|
||||
errnote='Could not download DASH manifest',
|
||||
fatal=fatal)
|
||||
|
||||
if dash_doc is False:
|
||||
return []
|
||||
|
||||
formats = []
|
||||
for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
|
||||
mime_type = a.attrib.get('mimeType')
|
||||
for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
||||
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
|
||||
if url_el is None:
|
||||
continue
|
||||
if mime_type == 'text/vtt':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
|
||||
segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
|
||||
format_id = r.attrib['id']
|
||||
video_url = url_el.text
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(r.attrib.get('width')),
|
||||
'height': int_or_none(r.attrib.get('height')),
|
||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
||||
'filesize': filesize,
|
||||
'fps': int_or_none(r.attrib.get('frameRate')),
|
||||
}
|
||||
if segment_list is not None:
|
||||
f.update({
|
||||
'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
|
||||
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == format_id)
|
||||
except StopIteration:
|
||||
full_info = self._formats.get(format_id, {}).copy()
|
||||
full_info.update(f)
|
||||
codecs = r.attrib.get('codecs')
|
||||
if codecs:
|
||||
if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
|
||||
full_info['vcodec'] = codecs
|
||||
elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
|
||||
full_info['acodec'] = codecs
|
||||
formats.append(full_info)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
@@ -1461,15 +1395,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
|
||||
dct = {
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'player_url': player_url,
|
||||
}
|
||||
if format_id in self._formats:
|
||||
dct.update(self._formats[format_id])
|
||||
|
||||
# Some itags are not included in DASH manifest thus corresponding formats will
|
||||
# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
|
||||
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
|
||||
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
|
||||
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
|
||||
dct = {
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'player_url': player_url,
|
||||
|
||||
more_fields = {
|
||||
'filesize': int_or_none(url_data.get('clen', [None])[0]),
|
||||
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
|
||||
'width': width,
|
||||
@@ -1477,13 +1417,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'fps': int_or_none(url_data.get('fps', [None])[0]),
|
||||
'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
|
||||
}
|
||||
for key, value in more_fields.items():
|
||||
if value:
|
||||
dct[key] = value
|
||||
type_ = url_data.get('type', [None])[0]
|
||||
if type_:
|
||||
type_split = type_.split(';')
|
||||
kind_ext = type_split[0].split('/')
|
||||
if len(kind_ext) == 2:
|
||||
kind, ext = kind_ext
|
||||
dct['ext'] = ext
|
||||
kind, _ = kind_ext
|
||||
dct['ext'] = mimetype2ext(type_split[0])
|
||||
if kind in ('audio', 'video'):
|
||||
codecs = None
|
||||
for mobj in re.finditer(
|
||||
@@ -1501,8 +1444,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if format_id in self._formats:
|
||||
dct.update(self._formats[format_id])
|
||||
formats.append(dct)
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
@@ -1525,8 +1466,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
for dash_manifest_url in dash_mpds:
|
||||
dash_formats = {}
|
||||
try:
|
||||
for df in self._parse_dash_manifest(
|
||||
video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
|
||||
def decrypt_sig(mobj):
|
||||
s = mobj.group(1)
|
||||
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
||||
return '/signature/%s' % dec_s
|
||||
|
||||
dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
|
||||
|
||||
for df in self._extract_dash_manifest_formats(
|
||||
dash_manifest_url, video_id, fatal=dash_mpd_fatal,
|
||||
namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats):
|
||||
# Do not overwrite DASH format found in some previous DASH manifest
|
||||
if df['format_id'] not in dash_formats:
|
||||
dash_formats[df['format_id']] = df
|
||||
@@ -1594,7 +1543,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
|
||||
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com playlists'
|
||||
_VALID_URL = r"""(?x)(?:
|
||||
(?:https?://)?
|
||||
@@ -1838,7 +1787,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
|
||||
class YoutubeUserIE(YoutubeChannelIE):
|
||||
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
||||
IE_NAME = 'youtube:user'
|
||||
|
||||
|
@@ -415,6 +415,11 @@ def parseOpts(overrideArguments=None):
|
||||
'--hls-prefer-native',
|
||||
dest='hls_prefer_native', action='store_true',
|
||||
help='Use the native HLS downloader instead of ffmpeg (experimental)')
|
||||
downloader.add_option(
|
||||
'--hls-use-mpegts',
|
||||
dest='hls_use_mpegts', action='store_true',
|
||||
help='Use the mpegts container for HLS videos, allowing to play the '
|
||||
'video while downloading (some players may not be able to play it)')
|
||||
downloader.add_option(
|
||||
'--external-downloader',
|
||||
dest='external_downloader', metavar='COMMAND',
|
||||
|
@@ -391,6 +391,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
for (name, value) in metadata.items():
|
||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||
|
||||
# https://github.com/rg3/youtube-dl/issues/8350
|
||||
if info['protocol'] == 'm3u8_native' or self._downloader.params.get('hls_prefer_native', False):
|
||||
options.extend(['-bsf:a', 'aac_adtstoasc'])
|
||||
|
||||
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
os.remove(encodeFilename(filename))
|
||||
@@ -479,6 +483,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
|
||||
return [], info
|
||||
self._downloader.to_screen('[ffmpeg] Converting subtitles')
|
||||
sub_filenames = []
|
||||
for lang, sub in subs.items():
|
||||
ext = sub['ext']
|
||||
if ext == new_ext:
|
||||
@@ -486,6 +491,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
'[ffmpeg] Subtitle file for %s is already in the requested'
|
||||
'format' % new_ext)
|
||||
continue
|
||||
old_file = subtitles_filename(filename, lang, ext)
|
||||
sub_filenames.append(old_file)
|
||||
new_file = subtitles_filename(filename, lang, new_ext)
|
||||
|
||||
if ext == 'dfxp' or ext == 'ttml':
|
||||
@@ -493,7 +500,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
'You have requested to convert dfxp (TTML) subtitles into another format, '
|
||||
'which results in style information loss')
|
||||
|
||||
dfxp_file = subtitles_filename(filename, lang, ext)
|
||||
dfxp_file = old_file
|
||||
srt_file = subtitles_filename(filename, lang, 'srt')
|
||||
|
||||
with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
|
||||
@@ -511,9 +518,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
if new_ext == 'srt':
|
||||
continue
|
||||
|
||||
self.run_ffmpeg(
|
||||
subtitles_filename(filename, lang, ext),
|
||||
new_file, ['-f', new_format])
|
||||
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
|
||||
|
||||
with io.open(new_file, 'rt', encoding='utf-8') as f:
|
||||
subs[lang] = {
|
||||
@@ -521,4 +526,4 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
'data': f.read(),
|
||||
}
|
||||
|
||||
return [], info
|
||||
return sub_filenames, info
|
||||
|
@@ -1828,9 +1828,11 @@ def mimetype2ext(mt):
|
||||
_, _, res = mt.rpartition('/')
|
||||
|
||||
return {
|
||||
'x-ms-wmv': 'wmv',
|
||||
'x-mp4-fragmented': 'mp4',
|
||||
'3gpp': '3gp',
|
||||
'ttml+xml': 'ttml',
|
||||
'x-flv': 'flv',
|
||||
'x-mp4-fragmented': 'mp4',
|
||||
'x-ms-wmv': 'wmv',
|
||||
}.get(res, res)
|
||||
|
||||
|
||||
@@ -2015,20 +2017,27 @@ def dfxp2srt(dfxp_data):
|
||||
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
||||
})
|
||||
|
||||
class TTMLPElementParser(object):
|
||||
out = ''
|
||||
|
||||
def start(self, tag, attrib):
|
||||
if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
||||
self.out += '\n'
|
||||
|
||||
def end(self, tag):
|
||||
pass
|
||||
|
||||
def data(self, data):
|
||||
self.out += data
|
||||
|
||||
def close(self):
|
||||
return self.out.strip()
|
||||
|
||||
def parse_node(node):
|
||||
str_or_empty = functools.partial(str_or_none, default='')
|
||||
|
||||
out = str_or_empty(node.text)
|
||||
|
||||
for child in node:
|
||||
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
||||
out += '\n' + str_or_empty(child.tail)
|
||||
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
|
||||
out += str_or_empty(parse_node(child))
|
||||
else:
|
||||
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
||||
|
||||
return out
|
||||
target = TTMLPElementParser()
|
||||
parser = xml.etree.ElementTree.XMLParser(target=target)
|
||||
parser.feed(xml.etree.ElementTree.tostring(node))
|
||||
return parser.close()
|
||||
|
||||
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
||||
out = []
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.01.23'
|
||||
__version__ = '2016.02.04'
|
||||
|
Reference in New Issue
Block a user