Compare commits
136 Commits
2016.01.23
...
2016.02.04
Author | SHA1 | Date | |
---|---|---|---|
|
f1ed3acae5 | ||
|
920d21b9d3 | ||
|
2fb35d1c28 | ||
|
09be85b8dd | ||
|
eadc3ccd50 | ||
|
58be922079 | ||
|
c84d3a557d | ||
|
6ad2b01e14 | ||
|
fd3a1f3d60 | ||
|
87de7069b9 | ||
|
6fba62c87a | ||
|
1df4141196 | ||
|
fae45ede08 | ||
|
4e0cff2a50 | ||
|
0436157b95 | ||
|
ae0db349c1 | ||
|
08411970d5 | ||
|
dc724e0c8b | ||
|
0a5d1ec706 | ||
|
58250eff2b | ||
|
11a4efc505 | ||
|
7537b35fb8 | ||
|
33cc74eeeb | ||
|
f021acee49 | ||
|
abe694ca95 | ||
|
b286f201a8 | ||
|
bd93a12e85 | ||
|
92769650fa | ||
|
dc4fe5c6d7 | ||
|
566bda51f2 | ||
|
f63757ec35 | ||
|
7a0ed06909 | ||
|
9934fe76be | ||
|
a8aad21001 | ||
|
d055bf91cc | ||
|
0e1b1a011d | ||
|
eab3c2895c | ||
|
163da6a484 | ||
|
324916d11a | ||
|
3ccb0655c1 | ||
|
e04398e397 | ||
|
231ea2a3bb | ||
|
b99d88c6a1 | ||
|
189d72d5fd | ||
|
a7aab0c23e | ||
|
a69bee4762 | ||
|
9acd33094d | ||
|
8e7aad2075 | ||
|
ce5879fa14 | ||
|
7b7507d6e1 | ||
|
14823decf3 | ||
|
673fb82e65 | ||
|
181cf24bc0 | ||
|
89f2602880 | ||
|
db9b1dbcd9 | ||
|
e881c4bcab | ||
|
670ad51ade | ||
|
eb6fc7d32a | ||
|
ed1a390583 | ||
|
809e1857c5 | ||
|
7c38af48b9 | ||
|
60ad3eb970 | ||
|
a7685b3a6b | ||
|
8f1fddc816 | ||
|
1bf996fa5c | ||
|
248ae880b6 | ||
|
2d2fa82d17 | ||
|
c94678957f | ||
|
16f38a699f | ||
|
a6c2c24479 | ||
|
b8c9926c0a | ||
|
df374b5222 | ||
|
5ea1eb78f5 | ||
|
5d2c0fd9ba | ||
|
0803753fea | ||
|
2c2f1efdcd | ||
|
b323e1707d | ||
|
09104e9930 | ||
|
5fa1702ca6 | ||
|
17b598d30c | ||
|
53be8894e4 | ||
|
c3deacd562 | ||
|
8ab3fe81d8 | ||
|
2f0a33d8a3 | ||
|
05d0d131a7 | ||
|
c140629995 | ||
|
7d106a65ca | ||
|
0179f6a830 | ||
|
830afe85dc | ||
|
8bf39420b4 | ||
|
71d08b3e29 | ||
|
06ffa33485 | ||
|
874e05975b | ||
|
f5d30d521c | ||
|
e047922be0 | ||
|
83ab8a79cc | ||
|
350cf045d8 | ||
|
68a0ea15b4 | ||
|
2b4f5e68d1 | ||
|
055f417278 | ||
|
70029bc348 | ||
|
cf57433bbd | ||
|
1ac6e794cb | ||
|
a853427427 | ||
|
50e989e263 | ||
|
10e6ed9341 | ||
|
38c84acae5 | ||
|
29f46c2bee | ||
|
39c10a2b6e | ||
|
b913348d5f | ||
|
2b14cb566f | ||
|
b0df5223be | ||
|
ed7cd1e859 | ||
|
f125d9115b | ||
|
a9d5f12fec | ||
|
7f32e5dc35 | ||
|
c3111ab34f | ||
|
9339774af2 | ||
|
b0d21deda9 | ||
|
fab6f0e65b | ||
|
b6c33fd544 | ||
|
fb4b345800 | ||
|
af9c2a07ae | ||
|
ab180fc648 | ||
|
682f8c43b5 | ||
|
f693213567 | ||
|
9165d6bab9 | ||
|
2975fe1a7b | ||
|
de691a498d | ||
|
2e6e742c3c | ||
|
e9bd0f772b | ||
|
77f785076f | ||
|
94278f7202 | ||
|
a0d8d704df | ||
|
f6861ec96f | ||
|
e1a0bfdffe |
1
AUTHORS
1
AUTHORS
@@ -155,3 +155,4 @@ Vignesh Venkat
|
|||||||
Tom Gijselinck
|
Tom Gijselinck
|
||||||
Founder Fang
|
Founder Fang
|
||||||
Andrew Alexeyew
|
Andrew Alexeyew
|
||||||
|
Saso Bezlaj
|
||||||
|
@@ -173,6 +173,10 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
expected filesize (experimental)
|
expected filesize (experimental)
|
||||||
--hls-prefer-native Use the native HLS downloader instead of
|
--hls-prefer-native Use the native HLS downloader instead of
|
||||||
ffmpeg (experimental)
|
ffmpeg (experimental)
|
||||||
|
--hls-use-mpegts Use the mpegts container for HLS videos,
|
||||||
|
allowing to play the video while
|
||||||
|
downloading (some players may not be able
|
||||||
|
to play it)
|
||||||
--external-downloader COMMAND Use the specified external downloader.
|
--external-downloader COMMAND Use the specified external downloader.
|
||||||
Currently supports
|
Currently supports
|
||||||
aria2c,axel,curl,httpie,wget
|
aria2c,axel,curl,httpie,wget
|
||||||
@@ -451,6 +455,8 @@ The `-o` option allows users to indicate a template for the output file names. T
|
|||||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||||
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
||||||
|
|
||||||
|
Note that some of the aforementioned sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||||
|
|
||||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||||
|
|
||||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||||
|
@@ -55,6 +55,7 @@
|
|||||||
- **audiomack**
|
- **audiomack**
|
||||||
- **audiomack:album**
|
- **audiomack:album**
|
||||||
- **Azubu**
|
- **Azubu**
|
||||||
|
- **AzubuLive**
|
||||||
- **BaiduVideo**: 百度视频
|
- **BaiduVideo**: 百度视频
|
||||||
- **bambuser**
|
- **bambuser**
|
||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
@@ -90,6 +91,7 @@
|
|||||||
- **Canvas**
|
- **Canvas**
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
|
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||||
- **CBSSports**
|
- **CBSSports**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
- **channel9**: Channel 9
|
- **channel9**: Channel 9
|
||||||
@@ -133,6 +135,8 @@
|
|||||||
- **DailymotionCloud**
|
- **DailymotionCloud**
|
||||||
- **daum.net**
|
- **daum.net**
|
||||||
- **daum.net:clip**
|
- **daum.net:clip**
|
||||||
|
- **daum.net:playlist**
|
||||||
|
- **daum.net:user**
|
||||||
- **DBTV**
|
- **DBTV**
|
||||||
- **DCN**
|
- **DCN**
|
||||||
- **dcn:live**
|
- **dcn:live**
|
||||||
@@ -180,6 +184,7 @@
|
|||||||
- **ExpoTV**
|
- **ExpoTV**
|
||||||
- **ExtremeTube**
|
- **ExtremeTube**
|
||||||
- **facebook**
|
- **facebook**
|
||||||
|
- **facebook:post**
|
||||||
- **faz.net**
|
- **faz.net**
|
||||||
- **fc2**
|
- **fc2**
|
||||||
- **Fczenit**
|
- **Fczenit**
|
||||||
@@ -314,6 +319,7 @@
|
|||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
- **Malemotion**
|
- **Malemotion**
|
||||||
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
@@ -506,6 +512,7 @@
|
|||||||
- **Sapo**: SAPO Vídeos
|
- **Sapo**: SAPO Vídeos
|
||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
- **SBS**: sbs.com.au
|
- **SBS**: sbs.com.au
|
||||||
|
- **schooltv**
|
||||||
- **SciVee**
|
- **SciVee**
|
||||||
- **screen.yahoo:search**: Yahoo screen search
|
- **screen.yahoo:search**: Yahoo screen search
|
||||||
- **Screencast**
|
- **Screencast**
|
||||||
|
@@ -14,6 +14,7 @@ from test.helper import FakeYDL, assertRegexpMatches
|
|||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_str, compat_urllib_error
|
from youtube_dl.compat import compat_str, compat_urllib_error
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.postprocessor.common import PostProcessor
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
from youtube_dl.utils import ExtractorError, match_filter_func
|
from youtube_dl.utils import ExtractorError, match_filter_func
|
||||||
|
|
||||||
@@ -221,6 +222,16 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
|
||||||
|
]
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'})
|
||||||
|
ydl.process_ie_result(info_dict.copy())
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
|
||||||
|
|
||||||
def test_youtube_format_selection(self):
|
def test_youtube_format_selection(self):
|
||||||
order = [
|
order = [
|
||||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
||||||
@@ -237,6 +248,17 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def format_info(f_id):
|
def format_info(f_id):
|
||||||
info = YoutubeIE._formats[f_id].copy()
|
info = YoutubeIE._formats[f_id].copy()
|
||||||
|
|
||||||
|
# XXX: In real cases InfoExtractor._parse_mpd() fills up 'acodec'
|
||||||
|
# and 'vcodec', while in tests such information is incomplete since
|
||||||
|
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
|
||||||
|
# test_YoutubeDL.test_youtube_format_selection is broken without
|
||||||
|
# this fix
|
||||||
|
if 'acodec' in info and 'vcodec' not in info:
|
||||||
|
info['vcodec'] = 'none'
|
||||||
|
elif 'vcodec' in info and 'acodec' not in info:
|
||||||
|
info['acodec'] = 'none'
|
||||||
|
|
||||||
info['format_id'] = f_id
|
info['format_id'] = f_id
|
||||||
info['url'] = 'url:' + f_id
|
info['url'] = 'url:' + f_id
|
||||||
return info
|
return info
|
||||||
@@ -636,6 +658,42 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
|
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
|
||||||
|
|
||||||
|
def test_do_not_override_ie_key_in_url_transparent(self):
|
||||||
|
ydl = YDL()
|
||||||
|
|
||||||
|
class Foo1IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'foo1:'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'foo2:',
|
||||||
|
'ie_key': 'Foo2',
|
||||||
|
}
|
||||||
|
|
||||||
|
class Foo2IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'foo2:'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'foo3:',
|
||||||
|
'ie_key': 'Foo3',
|
||||||
|
}
|
||||||
|
|
||||||
|
class Foo3IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'foo3:'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return _make_result([{'url': TEST_URL}])
|
||||||
|
|
||||||
|
ydl.add_info_extractor(Foo1IE(ydl))
|
||||||
|
ydl.add_info_extractor(Foo2IE(ydl))
|
||||||
|
ydl.add_info_extractor(Foo3IE(ydl))
|
||||||
|
ydl.extract_info('foo1:')
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['url'], TEST_URL)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -56,7 +56,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
|
|
||||||
def test_youtube_user_matching(self):
|
def test_youtube_user_matching(self):
|
||||||
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||||
|
|
||||||
def test_youtube_feeds(self):
|
def test_youtube_feeds(self):
|
||||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||||
|
@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
|
|||||||
NPOIE,
|
NPOIE,
|
||||||
ComedyCentralIE,
|
ComedyCentralIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
RaiIE,
|
RaiTVIE,
|
||||||
VikiIE,
|
VikiIE,
|
||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
ThePlatformFeedIE,
|
ThePlatformFeedIE,
|
||||||
@@ -65,16 +65,16 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles.keys()), 13)
|
self.assertEqual(len(subtitles.keys()), 13)
|
||||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
|
||||||
for lang in ['it', 'fr', 'de']:
|
for lang in ['fr', 'de']:
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
def test_youtube_subtitles_sbv_format(self):
|
def test_youtube_subtitles_ttml_format(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['subtitlesformat'] = 'sbv'
|
self.DL.params['subtitlesformat'] = 'ttml'
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
|
||||||
|
|
||||||
def test_youtube_subtitles_vtt_format(self):
|
def test_youtube_subtitles_vtt_format(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
@@ -260,7 +260,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
|||||||
|
|
||||||
class TestRaiSubtitles(BaseTestSubtitles):
|
class TestRaiSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||||
IE = RaiIE
|
IE = RaiTVIE
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
|
@@ -34,7 +34,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
# TODO find a > 100 (paginating?) videos course
|
# TODO find a > 100 (paginating?) videos course
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
entries = result['entries']
|
entries = list(result['entries'])
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||||
|
@@ -263,7 +263,7 @@ class YoutubeDL(object):
|
|||||||
the downloader (see youtube_dl/downloader/common.py):
|
the downloader (see youtube_dl/downloader/common.py):
|
||||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||||
xattr_set_filesize, external_downloader_args.
|
xattr_set_filesize, external_downloader_args, hls_use_mpegts.
|
||||||
|
|
||||||
The following options are used by the post processors:
|
The following options are used by the post processors:
|
||||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||||
@@ -707,7 +707,6 @@ class YoutubeDL(object):
|
|||||||
It will also download the videos if 'download'.
|
It will also download the videos if 'download'.
|
||||||
Returns the resolved ie_result.
|
Returns the resolved ie_result.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
result_type = ie_result.get('_type', 'video')
|
result_type = ie_result.get('_type', 'video')
|
||||||
|
|
||||||
if result_type in ('url', 'url_transparent'):
|
if result_type in ('url', 'url_transparent'):
|
||||||
@@ -736,7 +735,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
force_properties = dict(
|
force_properties = dict(
|
||||||
(k, v) for k, v in ie_result.items() if v is not None)
|
(k, v) for k, v in ie_result.items() if v is not None)
|
||||||
for f in ('_type', 'url'):
|
for f in ('_type', 'url', 'ie_key'):
|
||||||
if f in force_properties:
|
if f in force_properties:
|
||||||
del force_properties[f]
|
del force_properties[f]
|
||||||
new_result = info.copy()
|
new_result = info.copy()
|
||||||
@@ -906,7 +905,7 @@ class YoutubeDL(object):
|
|||||||
str_operator_rex = re.compile(r'''(?x)
|
str_operator_rex = re.compile(r'''(?x)
|
||||||
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||||
\s*(?P<value>[a-zA-Z0-9_-]+)
|
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||||
\s*$
|
\s*$
|
||||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||||
m = str_operator_rex.search(filter_spec)
|
m = str_operator_rex.search(filter_spec)
|
||||||
|
@@ -369,6 +369,7 @@ def _real_main(argv=None):
|
|||||||
'no_color': opts.no_color,
|
'no_color': opts.no_color,
|
||||||
'ffmpeg_location': opts.ffmpeg_location,
|
'ffmpeg_location': opts.ffmpeg_location,
|
||||||
'hls_prefer_native': opts.hls_prefer_native,
|
'hls_prefer_native': opts.hls_prefer_native,
|
||||||
|
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||||
'external_downloader_args': external_downloader_args,
|
'external_downloader_args': external_downloader_args,
|
||||||
'postprocessor_args': postprocessor_args,
|
'postprocessor_args': postprocessor_args,
|
||||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||||
|
@@ -45,6 +45,7 @@ class FileDownloader(object):
|
|||||||
(experimental)
|
(experimental)
|
||||||
external_downloader_args: A list of additional command-line arguments for the
|
external_downloader_args: A list of additional command-line arguments for the
|
||||||
external downloader.
|
external downloader.
|
||||||
|
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||||
|
|
||||||
Subclasses of this one must re-define the real_download method.
|
Subclasses of this one must re-define the real_download method.
|
||||||
"""
|
"""
|
||||||
|
@@ -273,15 +273,21 @@ class F4mFD(FragmentFD):
|
|||||||
return fragments_list
|
return fragments_list
|
||||||
|
|
||||||
def _parse_bootstrap_node(self, node, base_url):
|
def _parse_bootstrap_node(self, node, base_url):
|
||||||
if node.text is None:
|
# Sometimes non empty inline bootstrap info can be specified along
|
||||||
|
# with bootstrap url attribute (e.g. dummy inline bootstrap info
|
||||||
|
# contains whitespace characters in [1]). We will prefer bootstrap
|
||||||
|
# url over inline bootstrap info when present.
|
||||||
|
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
|
||||||
|
bootstrap_url = node.get('url')
|
||||||
|
if bootstrap_url:
|
||||||
bootstrap_url = compat_urlparse.urljoin(
|
bootstrap_url = compat_urlparse.urljoin(
|
||||||
base_url, node.attrib['url'])
|
base_url, bootstrap_url)
|
||||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||||
else:
|
else:
|
||||||
bootstrap_url = None
|
bootstrap_url = None
|
||||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||||
boot_info = read_bootstrap_info(bootstrap)
|
boot_info = read_bootstrap_info(bootstrap)
|
||||||
return (boot_info, bootstrap_url)
|
return boot_info, bootstrap_url
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
@@ -316,7 +322,8 @@ class F4mFD(FragmentFD):
|
|||||||
metadata = None
|
metadata = None
|
||||||
|
|
||||||
fragments_list = build_fragments_list(boot_info)
|
fragments_list = build_fragments_list(boot_info)
|
||||||
if self.params.get('test', False):
|
test = self.params.get('test', False)
|
||||||
|
if test:
|
||||||
# We only download the first fragment
|
# We only download the first fragment
|
||||||
fragments_list = fragments_list[:1]
|
fragments_list = fragments_list[:1]
|
||||||
total_frags = len(fragments_list)
|
total_frags = len(fragments_list)
|
||||||
@@ -326,6 +333,7 @@ class F4mFD(FragmentFD):
|
|||||||
ctx = {
|
ctx = {
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'total_frags': total_frags,
|
'total_frags': total_frags,
|
||||||
|
'live': live,
|
||||||
}
|
}
|
||||||
|
|
||||||
self._prepare_frag_download(ctx)
|
self._prepare_frag_download(ctx)
|
||||||
@@ -380,7 +388,7 @@ class F4mFD(FragmentFD):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if not fragments_list and live and bootstrap_url:
|
if not fragments_list and not test and live and bootstrap_url:
|
||||||
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
||||||
total_frags += len(fragments_list)
|
total_frags += len(fragments_list)
|
||||||
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
||||||
|
@@ -26,7 +26,11 @@ class FragmentFD(FileDownloader):
|
|||||||
self._start_frag_download(ctx)
|
self._start_frag_download(ctx)
|
||||||
|
|
||||||
def _prepare_frag_download(self, ctx):
|
def _prepare_frag_download(self, ctx):
|
||||||
self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
|
if 'live' not in ctx:
|
||||||
|
ctx['live'] = False
|
||||||
|
self.to_screen(
|
||||||
|
'[%s] Total fragments: %s'
|
||||||
|
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
|
||||||
self.report_destination(ctx['filename'])
|
self.report_destination(ctx['filename'])
|
||||||
dl = HttpQuietDownloader(
|
dl = HttpQuietDownloader(
|
||||||
self.ydl,
|
self.ydl,
|
||||||
@@ -74,14 +78,14 @@ class FragmentFD(FileDownloader):
|
|||||||
if s['status'] not in ('downloading', 'finished'):
|
if s['status'] not in ('downloading', 'finished'):
|
||||||
return
|
return
|
||||||
|
|
||||||
frag_total_bytes = s.get('total_bytes') or 0
|
|
||||||
|
|
||||||
estimated_size = (
|
|
||||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
|
||||||
(state['frag_index'] + 1) * total_frags)
|
|
||||||
time_now = time.time()
|
time_now = time.time()
|
||||||
state['total_bytes_estimate'] = estimated_size
|
|
||||||
state['elapsed'] = time_now - start
|
state['elapsed'] = time_now - start
|
||||||
|
frag_total_bytes = s.get('total_bytes') or 0
|
||||||
|
if not ctx['live']:
|
||||||
|
estimated_size = (
|
||||||
|
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
||||||
|
(state['frag_index'] + 1) * total_frags)
|
||||||
|
state['total_bytes_estimate'] = estimated_size
|
||||||
|
|
||||||
if s['status'] == 'finished':
|
if s['status'] == 'finished':
|
||||||
state['frag_index'] += 1
|
state['frag_index'] += 1
|
||||||
@@ -91,9 +95,10 @@ class FragmentFD(FileDownloader):
|
|||||||
else:
|
else:
|
||||||
frag_downloaded_bytes = s['downloaded_bytes']
|
frag_downloaded_bytes = s['downloaded_bytes']
|
||||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||||
state['eta'] = self.calc_eta(
|
if not ctx['live']:
|
||||||
start, time_now, estimated_size,
|
state['eta'] = self.calc_eta(
|
||||||
state['downloaded_bytes'])
|
start, time_now, estimated_size,
|
||||||
|
state['downloaded_bytes'])
|
||||||
state['speed'] = s.get('speed')
|
state['speed'] = s.get('speed')
|
||||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||||
self._hook_progress(state)
|
self._hook_progress(state)
|
||||||
|
@@ -39,7 +39,11 @@ class HlsFD(FileDownloader):
|
|||||||
'-headers',
|
'-headers',
|
||||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||||
|
|
||||||
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
args += ['-i', url, '-c', 'copy']
|
||||||
|
if self.params.get('hls_use_mpegts', False):
|
||||||
|
args += ['-f', 'mpegts']
|
||||||
|
else:
|
||||||
|
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||||
|
|
||||||
args = [encodeArgument(opt) for opt in args]
|
args = [encodeArgument(opt) for opt in args]
|
||||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||||
|
@@ -50,7 +50,7 @@ from .atresplayer import AtresPlayerIE
|
|||||||
from .atttechchannel import ATTTechChannelIE
|
from .atttechchannel import ATTTechChannelIE
|
||||||
from .audimedia import AudiMediaIE
|
from .audimedia import AudiMediaIE
|
||||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||||
from .azubu import AzubuIE
|
from .azubu import AzubuIE, AzubuLiveIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
@@ -90,7 +90,10 @@ from .canalplus import CanalplusIE
|
|||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .canvas import CanvasIE
|
from .canvas import CanvasIE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import CBSNewsIE
|
from .cbsnews import (
|
||||||
|
CBSNewsIE,
|
||||||
|
CBSNewsLiveVideoIE,
|
||||||
|
)
|
||||||
from .cbssports import CBSSportsIE
|
from .cbssports import CBSSportsIE
|
||||||
from .ccc import CCCIE
|
from .ccc import CCCIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
@@ -142,6 +145,8 @@ from .dailymotion import (
|
|||||||
from .daum import (
|
from .daum import (
|
||||||
DaumIE,
|
DaumIE,
|
||||||
DaumClipIE,
|
DaumClipIE,
|
||||||
|
DaumPlaylistIE,
|
||||||
|
DaumUserIE,
|
||||||
)
|
)
|
||||||
from .dbtv import DBTVIE
|
from .dbtv import DBTVIE
|
||||||
from .dcn import (
|
from .dcn import (
|
||||||
@@ -196,7 +201,10 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
|||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
from .expotv import ExpoTVIE
|
from .expotv import ExpoTVIE
|
||||||
from .extremetube import ExtremeTubeIE
|
from .extremetube import ExtremeTubeIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import (
|
||||||
|
FacebookIE,
|
||||||
|
FacebookPostIE,
|
||||||
|
)
|
||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
from .fc2 import FC2IE
|
from .fc2 import FC2IE
|
||||||
from .fczenit import FczenitIE
|
from .fczenit import FczenitIE
|
||||||
@@ -369,6 +377,7 @@ from .macgamestore import MacGameStoreIE
|
|||||||
from .mailru import MailRuIE
|
from .mailru import MailRuIE
|
||||||
from .makertv import MakerTVIE
|
from .makertv import MakerTVIE
|
||||||
from .malemotion import MalemotionIE
|
from .malemotion import MalemotionIE
|
||||||
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
@@ -479,6 +488,7 @@ from .npo import (
|
|||||||
NPOLiveIE,
|
NPOLiveIE,
|
||||||
NPORadioIE,
|
NPORadioIE,
|
||||||
NPORadioFragmentIE,
|
NPORadioFragmentIE,
|
||||||
|
SchoolTVIE,
|
||||||
VPROIE,
|
VPROIE,
|
||||||
WNLIE
|
WNLIE
|
||||||
)
|
)
|
||||||
|
@@ -8,11 +8,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class ACastBaseIE(InfoExtractor):
|
class ACastIE(InfoExtractor):
|
||||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
|
||||||
|
|
||||||
|
|
||||||
class ACastIE(ACastBaseIE):
|
|
||||||
IE_NAME = 'acast'
|
IE_NAME = 'acast'
|
||||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@@ -23,14 +19,19 @@ class ACastIE(ACastBaseIE):
|
|||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||||
'timestamp': 1196172000000,
|
'timestamp': 1196172000000,
|
||||||
'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
|
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||||
'duration': 211,
|
'duration': 211,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
|
|
||||||
|
embed_page = self._download_webpage(
|
||||||
|
re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id)
|
||||||
|
cast_data = self._parse_json(self._search_regex(
|
||||||
|
r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'),
|
||||||
|
display_id)['GetAcast/%s/%s' % (channel, display_id)]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(cast_data['id']),
|
'id': compat_str(cast_data['id']),
|
||||||
@@ -44,7 +45,7 @@ class ACastIE(ACastBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ACastChannelIE(ACastBaseIE):
|
class ACastChannelIE(InfoExtractor):
|
||||||
IE_NAME = 'acast:channel'
|
IE_NAME = 'acast:channel'
|
||||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@@ -56,6 +57,7 @@ class ACastChannelIE(ACastBaseIE):
|
|||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 20,
|
||||||
}
|
}
|
||||||
|
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
|
@@ -8,6 +8,8 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
qualities,
|
qualities,
|
||||||
|
unescapeHTML,
|
||||||
|
xpath_element,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,7 +33,7 @@ class AllocineIE(InfoExtractor):
|
|||||||
'id': '19540403',
|
'id': '19540403',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Planes 2 Bande-annonce VF',
|
'title': 'Planes 2 Bande-annonce VF',
|
||||||
'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
|
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@@ -41,7 +43,7 @@ class AllocineIE(InfoExtractor):
|
|||||||
'id': '19544709',
|
'id': '19544709',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||||
'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
|
'description': 'md5:601d15393ac40f249648ef000720e7e3',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@@ -59,14 +61,18 @@ class AllocineIE(InfoExtractor):
|
|||||||
if typ == 'film':
|
if typ == 'film':
|
||||||
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
||||||
else:
|
else:
|
||||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
|
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
|
||||||
|
if player:
|
||||||
player_data = json.loads(player)
|
player_data = json.loads(player)
|
||||||
video_id = compat_str(player_data['refMedia'])
|
video_id = compat_str(player_data['refMedia'])
|
||||||
|
else:
|
||||||
|
model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
|
||||||
|
model_data = self._parse_json(unescapeHTML(model), display_id)
|
||||||
|
video_id = compat_str(model_data['id'])
|
||||||
|
|
||||||
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
||||||
|
|
||||||
video = xml.find('.//AcVisionVideo').attrib
|
video = xpath_element(xml, './/AcVisionVideo').attrib
|
||||||
quality = qualities(['ld', 'md', 'hd'])
|
quality = qualities(['ld', 'md', 'hd'])
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import float_or_none
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
sanitized_Request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AzubuIE(InfoExtractor):
|
class AzubuIE(InfoExtractor):
|
||||||
@@ -91,3 +95,37 @@ class AzubuIE(InfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AzubuLiveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user = self._match_id(url)
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
|
||||||
|
user)['data']
|
||||||
|
if info['type'] != 'STREAM':
|
||||||
|
raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
|
||||||
|
|
||||||
|
req = sanitized_Request(
|
||||||
|
'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
|
||||||
|
req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
|
||||||
|
bc_info = self._download_json(req, user)
|
||||||
|
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': info['id'],
|
||||||
|
'title': self._live_title(info['title']),
|
||||||
|
'uploader_id': user,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
'thumbnail': bc_info['poster'],
|
||||||
|
}
|
||||||
|
@@ -193,6 +193,19 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p028bfkj',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||||
|
'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -482,9 +495,11 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
if programme_id:
|
if programme_id:
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||||
r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title')
|
(r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
|
||||||
|
r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
|
||||||
description = self._search_regex(
|
description = self._search_regex(
|
||||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
(r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||||
|
r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
|
||||||
webpage, 'description', default=None)
|
webpage, 'description', default=None)
|
||||||
if not description:
|
if not description:
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
|
@@ -1,7 +1,13 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BpbIE(InfoExtractor):
|
class BpbIE(InfoExtractor):
|
||||||
@@ -10,7 +16,8 @@ class BpbIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||||
'md5': '0792086e8e2bfbac9cdf27835d5f2093',
|
# md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
|
||||||
|
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '297',
|
'id': '297',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -25,13 +32,26 @@ class BpbIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||||
video_url = self._html_search_regex(
|
video_info_dicts = re.findall(
|
||||||
r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)',
|
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
|
||||||
webpage, 'video URL')
|
|
||||||
|
formats = []
|
||||||
|
for video_info in video_info_dicts:
|
||||||
|
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
|
||||||
|
quality = video_info['quality']
|
||||||
|
video_url = video_info['src']
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'preference': 10 if quality == 'high' else 0,
|
||||||
|
'format_note': quality,
|
||||||
|
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
}
|
}
|
||||||
|
@@ -1,16 +1,14 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_start
|
from .theplatform import ThePlatformIE
|
||||||
|
from ..utils import parse_duration
|
||||||
|
|
||||||
|
|
||||||
class CBSNewsIE(InfoExtractor):
|
class CBSNewsIE(ThePlatformIE):
|
||||||
IE_DESC = 'CBS News'
|
IE_DESC = 'CBS News'
|
||||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
|
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -31,7 +29,7 @@ class CBSNewsIE(InfoExtractor):
|
|||||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
|
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'duration': 205,
|
'duration': 205,
|
||||||
@@ -42,54 +40,26 @@ class CBSNewsIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_info = json.loads(self._html_search_regex(
|
video_info = self._parse_json(self._html_search_regex(
|
||||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||||
webpage, 'video JSON info'))
|
webpage, 'video JSON info'), video_id)
|
||||||
|
|
||||||
item = video_info['item'] if 'item' in video_info else video_info
|
item = video_info['item'] if 'item' in video_info else video_info
|
||||||
title = item.get('articleTitle') or item.get('hed')
|
title = item.get('articleTitle') or item.get('hed')
|
||||||
duration = item.get('duration')
|
duration = item.get('duration')
|
||||||
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
|
||||||
uri = item.get('media' + format_id + 'URI')
|
|
||||||
if not uri:
|
|
||||||
continue
|
|
||||||
uri = remove_start(uri, '{manifest:none}')
|
|
||||||
fmt = {
|
|
||||||
'url': uri,
|
|
||||||
'format_id': format_id,
|
|
||||||
}
|
|
||||||
if uri.startswith('rtmp'):
|
|
||||||
play_path = re.sub(
|
|
||||||
r'{slistFilePath}', '',
|
|
||||||
uri.split('<break>')[-1].split('{break}')[-1])
|
|
||||||
play_path = re.sub(
|
|
||||||
r'{manifest:.+}.*$', '', play_path)
|
|
||||||
fmt.update({
|
|
||||||
'app': 'ondemand?auth=cbs',
|
|
||||||
'play_path': 'mp4:' + play_path,
|
|
||||||
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
|
|
||||||
'page_url': 'http://www.cbsnews.com',
|
|
||||||
'ext': 'flv',
|
|
||||||
})
|
|
||||||
elif uri.endswith('.m3u8'):
|
|
||||||
fmt['ext'] = 'mp4'
|
|
||||||
formats.append(fmt)
|
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
if 'mpxRefId' in video_info:
|
if 'mpxRefId' in video_info:
|
||||||
subtitles['en'] = [{
|
subtitles['en'] = [{
|
||||||
@@ -97,6 +67,17 @@ class CBSNewsIE(InfoExtractor):
|
|||||||
'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'],
|
'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
||||||
|
pid = item.get('media' + format_id)
|
||||||
|
if not pid:
|
||||||
|
continue
|
||||||
|
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
|
||||||
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
||||||
|
formats.extend(tp_formats)
|
||||||
|
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -105,3 +86,41 @@ class CBSNewsIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||||
|
IE_DESC = 'CBS News Live Videos'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||||
|
'duration': 334,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_info = self._parse_json(self._html_search_regex(
|
||||||
|
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
|
||||||
|
|
||||||
|
hdcore_sign = 'hdcore=3.3.1'
|
||||||
|
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
|
||||||
|
if f4m_formats:
|
||||||
|
for entry in f4m_formats:
|
||||||
|
# URLs without the extra param induce an 404 error
|
||||||
|
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_info['headline'],
|
||||||
|
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||||
|
'duration': parse_duration(video_info.get('segmentDur')),
|
||||||
|
'formats': f4m_formats,
|
||||||
|
}
|
||||||
|
@@ -825,6 +825,12 @@ class InfoExtractor(object):
|
|||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('No video formats found')
|
raise ExtractorError('No video formats found')
|
||||||
|
|
||||||
|
for f in formats:
|
||||||
|
# Automatically determine tbr when missing based on abr and vbr (improves
|
||||||
|
# formats sorting in some cases)
|
||||||
|
if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
|
||||||
|
f['tbr'] = f['abr'] + f['vbr']
|
||||||
|
|
||||||
def _formats_key(f):
|
def _formats_key(f):
|
||||||
# TODO remove the following workaround
|
# TODO remove the following workaround
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
@@ -1014,6 +1020,18 @@ class InfoExtractor(object):
|
|||||||
return []
|
return []
|
||||||
m3u8_doc, urlh = res
|
m3u8_doc, urlh = res
|
||||||
m3u8_url = urlh.geturl()
|
m3u8_url = urlh.geturl()
|
||||||
|
# A Media Playlist Tag MUST NOT appear in a Master Playlist
|
||||||
|
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||||
|
# The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
|
||||||
|
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||||
|
if '#EXT-X-TARGETDURATION' in m3u8_doc:
|
||||||
|
return [{
|
||||||
|
'url': m3u8_url,
|
||||||
|
'format_id': m3u8_id,
|
||||||
|
'ext': ext,
|
||||||
|
'protocol': entry_protocol,
|
||||||
|
'preference': preference,
|
||||||
|
}]
|
||||||
last_info = None
|
last_info = None
|
||||||
last_media = None
|
last_media = None
|
||||||
kv_rex = re.compile(
|
kv_rex = re.compile(
|
||||||
@@ -1058,9 +1076,9 @@ class InfoExtractor(object):
|
|||||||
# TODO: looks like video codec is not always necessarily goes first
|
# TODO: looks like video codec is not always necessarily goes first
|
||||||
va_codecs = codecs.split(',')
|
va_codecs = codecs.split(',')
|
||||||
if va_codecs[0]:
|
if va_codecs[0]:
|
||||||
f['vcodec'] = va_codecs[0].partition('.')[0]
|
f['vcodec'] = va_codecs[0]
|
||||||
if len(va_codecs) > 1 and va_codecs[1]:
|
if len(va_codecs) > 1 and va_codecs[1]:
|
||||||
f['acodec'] = va_codecs[1].partition('.')[0]
|
f['acodec'] = va_codecs[1]
|
||||||
resolution = last_info.get('RESOLUTION')
|
resolution = last_info.get('RESOLUTION')
|
||||||
if resolution:
|
if resolution:
|
||||||
width_str, height_str = resolution.split('x')
|
width_str, height_str = resolution.split('x')
|
||||||
@@ -1164,6 +1182,7 @@ class InfoExtractor(object):
|
|||||||
formats = []
|
formats = []
|
||||||
rtmp_count = 0
|
rtmp_count = 0
|
||||||
http_count = 0
|
http_count = 0
|
||||||
|
m3u8_count = 0
|
||||||
|
|
||||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||||
for video in videos:
|
for video in videos:
|
||||||
@@ -1203,8 +1222,17 @@ class InfoExtractor(object):
|
|||||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||||
|
|
||||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False))
|
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
if len(m3u8_formats) == 1:
|
||||||
|
m3u8_count += 1
|
||||||
|
m3u8_formats[0].update({
|
||||||
|
'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
|
||||||
|
'tbr': bitrate,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if src_ext == 'f4m':
|
if src_ext == 'f4m':
|
||||||
@@ -1302,6 +1330,83 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
def _download_dash_manifest(self, dash_manifest_url, video_id, fatal=True):
|
||||||
|
return self._download_xml(
|
||||||
|
dash_manifest_url, video_id,
|
||||||
|
note='Downloading DASH manifest',
|
||||||
|
errnote='Could not download DASH manifest',
|
||||||
|
fatal=fatal)
|
||||||
|
|
||||||
|
def _extract_dash_manifest_formats(self, dash_manifest_url, video_id, fatal=True, namespace=None, formats_dict={}):
|
||||||
|
dash_doc = self._download_dash_manifest(dash_manifest_url, video_id, fatal)
|
||||||
|
if dash_doc is False:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return self._parse_dash_manifest(
|
||||||
|
dash_doc, namespace=namespace, formats_dict=formats_dict)
|
||||||
|
|
||||||
|
def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}):
|
||||||
|
def _add_ns(path):
|
||||||
|
return self._xpath_ns(path, namespace)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')):
|
||||||
|
mime_type = a.attrib.get('mimeType')
|
||||||
|
for r in a.findall(_add_ns('Representation')):
|
||||||
|
mime_type = r.attrib.get('mimeType') or mime_type
|
||||||
|
url_el = r.find(_add_ns('BaseURL'))
|
||||||
|
if mime_type == 'text/vtt':
|
||||||
|
# TODO implement WebVTT downloading
|
||||||
|
pass
|
||||||
|
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
|
||||||
|
segment_list = r.find(_add_ns('SegmentList'))
|
||||||
|
format_id = r.attrib['id']
|
||||||
|
video_url = url_el.text if url_el is not None else None
|
||||||
|
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||||
|
f = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': video_url,
|
||||||
|
'width': int_or_none(r.attrib.get('width')),
|
||||||
|
'height': int_or_none(r.attrib.get('height')),
|
||||||
|
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
||||||
|
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
||||||
|
'filesize': filesize,
|
||||||
|
'fps': int_or_none(r.attrib.get('frameRate')),
|
||||||
|
}
|
||||||
|
if segment_list is not None:
|
||||||
|
initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL']
|
||||||
|
f.update({
|
||||||
|
'initialization_url': initialization_url,
|
||||||
|
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))],
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
})
|
||||||
|
if not f.get('url'):
|
||||||
|
f['url'] = initialization_url
|
||||||
|
try:
|
||||||
|
existing_format = next(
|
||||||
|
fo for fo in formats
|
||||||
|
if fo['format_id'] == format_id)
|
||||||
|
except StopIteration:
|
||||||
|
full_info = formats_dict.get(format_id, {}).copy()
|
||||||
|
full_info.update(f)
|
||||||
|
codecs = r.attrib.get('codecs')
|
||||||
|
if codecs:
|
||||||
|
if mime_type.startswith('video/'):
|
||||||
|
vcodec, acodec = codecs, 'none'
|
||||||
|
else: # mime_type.startswith('audio/')
|
||||||
|
vcodec, acodec = 'none', codecs
|
||||||
|
|
||||||
|
full_info.update({
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'acodec': acodec,
|
||||||
|
})
|
||||||
|
formats.append(full_info)
|
||||||
|
else:
|
||||||
|
existing_format.update(f)
|
||||||
|
else:
|
||||||
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _live_title(self, name):
|
def _live_title(self, name):
|
||||||
""" Generate the title for a live video """
|
""" Generate the title for a live video """
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
|
@@ -68,11 +68,16 @@ class CSpanIE(InfoExtractor):
|
|||||||
video_type, video_id = matches.groups()
|
video_type, video_id = matches.groups()
|
||||||
video_type = 'clip' if video_type == 'id' else 'program'
|
video_type = 'clip' if video_type == 'id' else 'program'
|
||||||
else:
|
else:
|
||||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
m = re.search(r'data-(?P<type>clip|prog)id=["\'](?P<id>\d+)', webpage)
|
||||||
if senate_isvp_url:
|
if m:
|
||||||
title = self._og_search_title(webpage)
|
video_id = m.group('id')
|
||||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
video_type = 'program' if m.group('type') == 'prog' else 'clip'
|
||||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
else:
|
||||||
|
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||||
|
if senate_isvp_url:
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||||
|
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||||
if video_type is None or video_id is None:
|
if video_type is None or video_id is None:
|
||||||
raise ExtractorError('unable to find video id and type')
|
raise ExtractorError('unable to find video id and type')
|
||||||
|
|
||||||
@@ -107,6 +112,13 @@ class CSpanIE(InfoExtractor):
|
|||||||
'height': int_or_none(get_text_attr(quality, 'height')),
|
'height': int_or_none(get_text_attr(quality, 'height')),
|
||||||
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
||||||
})
|
})
|
||||||
|
if not formats:
|
||||||
|
path = unescapeHTML(get_text_attr(f, 'path'))
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': '%s_%d' % (video_id, partnum + 1),
|
'id': '%s_%d' % (video_id, partnum + 1),
|
||||||
|
@@ -2,17 +2,26 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DaumIE(InfoExtractor):
|
class DaumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)'
|
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
|
||||||
IE_NAME = 'daum.net'
|
IE_NAME = 'daum.net'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -23,25 +32,57 @@ class DaumIE(InfoExtractor):
|
|||||||
'title': '마크 헌트 vs 안토니오 실바',
|
'title': '마크 헌트 vs 안토니오 실바',
|
||||||
'description': 'Mark Hunt vs Antonio Silva',
|
'description': 'Mark Hunt vs Antonio Silva',
|
||||||
'upload_date': '20131217',
|
'upload_date': '20131217',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||||
'duration': 2117,
|
'duration': 2117,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.tvpot.daum.net/v/65139429',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '65139429',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
|
||||||
|
'description': 'md5:79794514261164ff27e36a21ad229fc5',
|
||||||
|
'upload_date': '20150604',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'duration': 154,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vwIpVpCQsT8$',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '01-Korean War ( Trouble on the horizon )',
|
||||||
|
'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
|
||||||
|
'upload_date': '20080223',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'duration': 249,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||||
query = compat_urllib_parse.urlencode({'vid': video_id})
|
query = compat_urllib_parse.urlencode({'vid': video_id})
|
||||||
info = self._download_xml(
|
|
||||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
|
||||||
'Downloading video info')
|
|
||||||
movie_data = self._download_json(
|
movie_data = self._download_json(
|
||||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
||||||
video_id, 'Downloading video formats info')
|
video_id, 'Downloading video formats info')
|
||||||
|
|
||||||
|
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
||||||
|
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
||||||
|
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
||||||
|
|
||||||
|
info = self._download_xml(
|
||||||
|
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||||
|
'Downloading video info')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_el in movie_data['output_list']['output_list']:
|
for format_el in movie_data['output_list']['output_list']:
|
||||||
profile = format_el['profile']
|
profile = format_el['profile']
|
||||||
@@ -76,8 +117,9 @@ class DaumIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class DaumClipIE(InfoExtractor):
|
class DaumClipIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
||||||
IE_NAME = 'daum.net:clip'
|
IE_NAME = 'daum.net:clip'
|
||||||
|
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||||
@@ -87,11 +129,19 @@ class DaumClipIE(InfoExtractor):
|
|||||||
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||||
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||||
'upload_date': '20130831',
|
'upload_date': '20130831',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||||
'duration': 3868,
|
'duration': 3868,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
clip_info = self._download_json(
|
clip_info = self._download_json(
|
||||||
@@ -102,7 +152,7 @@ class DaumClipIE(InfoExtractor):
|
|||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
||||||
'title': clip_info['title'],
|
'title': unescapeHTML(clip_info['title']),
|
||||||
'thumbnail': clip_info.get('thumb_url'),
|
'thumbnail': clip_info.get('thumb_url'),
|
||||||
'description': clip_info.get('contents'),
|
'description': clip_info.get('contents'),
|
||||||
'duration': int_or_none(clip_info.get('duration')),
|
'duration': int_or_none(clip_info.get('duration')),
|
||||||
@@ -110,3 +160,139 @@ class DaumClipIE(InfoExtractor):
|
|||||||
'view_count': int_or_none(clip_info.get('play_count')),
|
'view_count': int_or_none(clip_info.get('play_count')),
|
||||||
'ie_key': 'Daum',
|
'ie_key': 'Daum',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DaumListIE(InfoExtractor):
|
||||||
|
def _get_entries(self, list_id, list_id_type):
|
||||||
|
name = None
|
||||||
|
entries = []
|
||||||
|
for pagenum in itertools.count(1):
|
||||||
|
list_info = self._download_json(
|
||||||
|
'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
|
||||||
|
pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
|
||||||
|
|
||||||
|
entries.extend([
|
||||||
|
self.url_result(
|
||||||
|
'http://tvpot.daum.net/v/%s' % clip['vid'])
|
||||||
|
for clip in list_info['clip_list']
|
||||||
|
])
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
name = list_info.get('playlist_bean', {}).get('name') or \
|
||||||
|
list_info.get('potInfo', {}).get('name')
|
||||||
|
|
||||||
|
if not list_info.get('has_more'):
|
||||||
|
break
|
||||||
|
|
||||||
|
return name, entries
|
||||||
|
|
||||||
|
def _check_clip(self, url, list_id):
|
||||||
|
query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
|
if 'clipid' in query_dict:
|
||||||
|
clip_id = query_dict['clipid'][0]
|
||||||
|
if self._downloader.params.get('noplaylist'):
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
|
||||||
|
return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
|
||||||
|
else:
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
|
||||||
|
|
||||||
|
|
||||||
|
class DaumPlaylistIE(DaumListIE):
|
||||||
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
|
||||||
|
IE_NAME = 'daum.net:playlist'
|
||||||
|
_URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Playlist url with clipid',
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6213966',
|
||||||
|
'title': 'Woorissica Official',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 181
|
||||||
|
}, {
|
||||||
|
'note': 'Playlist url with clipid - noplaylist',
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '73806844',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '151017 Airport',
|
||||||
|
'upload_date': '20160117',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
|
||||||
|
clip_result = self._check_clip(url, list_id)
|
||||||
|
if clip_result:
|
||||||
|
return clip_result
|
||||||
|
|
||||||
|
name, entries = self._get_entries(list_id, 'playlistid')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, list_id, name)
|
||||||
|
|
||||||
|
|
||||||
|
class DaumUserIE(DaumListIE):
|
||||||
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
IE_NAME = 'daum.net:user'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'o2scDLIVbHc0',
|
||||||
|
'title': '마이 리틀 텔레비전',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 213
|
||||||
|
}, {
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '73801156',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
|
||||||
|
'upload_date': '20160117',
|
||||||
|
'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6196631',
|
||||||
|
'title': '마이 리틀 텔레비전 - 20160109',
|
||||||
|
},
|
||||||
|
'playlist_count': 11
|
||||||
|
}, {
|
||||||
|
'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
|
||||||
|
clip_result = self._check_clip(url, list_id)
|
||||||
|
if clip_result:
|
||||||
|
return clip_result
|
||||||
|
|
||||||
|
query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
|
if 'playlistid' in query_dict:
|
||||||
|
playlist_id = query_dict['playlistid'][0]
|
||||||
|
return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
|
||||||
|
|
||||||
|
name, entries = self._get_entries(list_id, 'ownerid')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, list_id, name)
|
||||||
|
@@ -53,8 +53,8 @@ class ESPNIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'class="video-play-button"[^>]+data-id="(\d+)',
|
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
|
||||||
webpage, 'video id')
|
webpage, 'video id', group='id')
|
||||||
|
|
||||||
cms = 'espn'
|
cms = 'espn'
|
||||||
if 'data-source="intl"' in webpage:
|
if 'data-source="intl"' in webpage:
|
||||||
|
@@ -6,9 +6,11 @@ import socket
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_etree_fromstring,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_unquote_plus,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
@@ -23,19 +25,30 @@ from ..utils import (
|
|||||||
|
|
||||||
class FacebookIE(InfoExtractor):
|
class FacebookIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:\w+\.)?facebook\.com/
|
(?:
|
||||||
(?:[^#]*?\#!/)?
|
https?://
|
||||||
(?:
|
(?:\w+\.)?facebook\.com/
|
||||||
(?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
|
(?:[^#]*?\#!/)?
|
||||||
(?:v|video_id)=|
|
(?:
|
||||||
[^/]+/videos/(?:[^/]+/)?
|
(?:
|
||||||
)
|
video/video\.php|
|
||||||
(?P<id>[0-9]+)
|
photo\.php|
|
||||||
(?:.*)'''
|
video\.php|
|
||||||
|
video/embed
|
||||||
|
)\?(?:.*?)(?:v|video_id)=|
|
||||||
|
[^/]+/videos/(?:[^/]+/)?
|
||||||
|
)|
|
||||||
|
facebook:
|
||||||
|
)
|
||||||
|
(?P<id>[0-9]+)
|
||||||
|
'''
|
||||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
IE_NAME = 'facebook'
|
IE_NAME = 'facebook'
|
||||||
|
|
||||||
|
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||||
@@ -57,6 +70,16 @@ class FacebookIE(InfoExtractor):
|
|||||||
'expected_warnings': [
|
'expected_warnings': [
|
||||||
'title'
|
'title'
|
||||||
]
|
]
|
||||||
|
}, {
|
||||||
|
'note': 'Video with DASH manifest',
|
||||||
|
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||||
|
'md5': '54706e4db4f5ad58fbad82dde1f1213f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '957955867617029',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||||
|
'uploader': 'Demy de Zeeuw',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -66,6 +89,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'facebook:544765982287235',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@@ -136,13 +162,36 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
|
||||||
webpage = self._download_webpage(url, video_id)
|
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
video_data = None
|
||||||
|
|
||||||
BEFORE = '{swf.addParam(param[0], param[1]);});\n'
|
BEFORE = '{swf.addParam(param[0], param[1]);});\n'
|
||||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||||
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||||
if not m:
|
if m:
|
||||||
|
data = dict(json.loads(m.group(1)))
|
||||||
|
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||||
|
video_data = json.loads(params_raw)['video_data']
|
||||||
|
|
||||||
|
def video_data_list2dict(video_data):
|
||||||
|
ret = {}
|
||||||
|
for item in video_data:
|
||||||
|
format_id = item['stream_type']
|
||||||
|
ret.setdefault(format_id, []).append(item)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
|
server_js_data = self._parse_json(self._search_regex(
|
||||||
|
r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
|
||||||
|
for item in server_js_data['instances']:
|
||||||
|
if item[1][0] == 'VideoConfig':
|
||||||
|
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||||
|
break
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||||
if m_msg is not None:
|
if m_msg is not None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -150,12 +199,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
expected=True)
|
expected=True)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
data = dict(json.loads(m.group(1)))
|
|
||||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
|
||||||
params = json.loads(params_raw)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, f in params['video_data'].items():
|
for format_id, f in video_data.items():
|
||||||
if not f or not isinstance(f, list):
|
if not f or not isinstance(f, list):
|
||||||
continue
|
continue
|
||||||
for quality in ('sd', 'hd'):
|
for quality in ('sd', 'hd'):
|
||||||
@@ -167,9 +213,16 @@ class FacebookIE(InfoExtractor):
|
|||||||
'url': src,
|
'url': src,
|
||||||
'preference': -10 if format_id == 'progressive' else 0,
|
'preference': -10 if format_id == 'progressive' else 0,
|
||||||
})
|
})
|
||||||
|
dash_manifest = f[0].get('dash_manifest')
|
||||||
|
if dash_manifest:
|
||||||
|
formats.extend(self._parse_dash_manifest(
|
||||||
|
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)),
|
||||||
|
namespace='urn:mpeg:dash:schema:mpd:2011'))
|
||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('Cannot find video formats')
|
raise ExtractorError('Cannot find video formats')
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
||||||
default=None)
|
default=None)
|
||||||
@@ -188,3 +241,33 @@ class FacebookIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class FacebookPostIE(InfoExtractor):
|
||||||
|
IE_NAME = 'facebook:post'
|
||||||
|
_VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||||
|
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '544765982287235',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"What are you doing running in the snow?"',
|
||||||
|
'uploader': 'FailArmy',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
post_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, post_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||||
|
for video_id in self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
|
||||||
|
webpage, 'video ids', group='ids'),
|
||||||
|
post_id)]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, post_id)
|
||||||
|
@@ -6,24 +6,29 @@ from ..utils import (
|
|||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
class GamekingsIE(InfoExtractor):
|
class GamekingsIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
|
_VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
# YouTube embed video
|
||||||
# MD5 is flaky, seems to change regularly
|
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
'md5': '5208d3a17adeaef829a7861887cb9029',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
|
'id': 'HkSQKetlGOU',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review',
|
||||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ',
|
||||||
|
'uploader': 'Gamekings Vault',
|
||||||
|
'upload_date': '20151123',
|
||||||
},
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
}, {
|
}, {
|
||||||
# vimeo video
|
# vimeo video
|
||||||
'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
|
'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/',
|
||||||
'md5': '12bf04dfd238e70058046937657ea68d',
|
'md5': '12bf04dfd238e70058046937657ea68d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'the-legend-of-zelda-majoras-mask',
|
'id': 'the-legend-of-zelda-majoras-mask',
|
||||||
@@ -33,7 +38,7 @@ class GamekingsIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@@ -43,7 +48,11 @@ class GamekingsIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
playlist_id = self._search_regex(
|
playlist_id = self._search_regex(
|
||||||
r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
|
r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id')
|
||||||
|
|
||||||
|
# Check if a YouTube embed is used
|
||||||
|
if YoutubeIE.suitable(playlist_id):
|
||||||
|
return self.url_result(playlist_id, ie='Youtube')
|
||||||
|
|
||||||
playlist = self._download_xml(
|
playlist = self._download_xml(
|
||||||
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
||||||
|
@@ -1229,19 +1229,24 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
content_type = head_response.headers.get('Content-Type', '')
|
content_type = head_response.headers.get('Content-Type', '')
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
||||||
if m:
|
if m:
|
||||||
upload_date = unified_strdate(
|
upload_date = unified_strdate(
|
||||||
head_response.headers.get('Last-Modified'))
|
head_response.headers.get('Last-Modified'))
|
||||||
|
formats = []
|
||||||
|
if m.group('format_id').endswith('mpegurl'):
|
||||||
|
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
|
else:
|
||||||
|
formats = [{
|
||||||
|
'format_id': m.group('format_id'),
|
||||||
|
'url': url,
|
||||||
|
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||||
|
}]
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||||
'direct': True,
|
'direct': True,
|
||||||
'formats': [{
|
'formats': formats,
|
||||||
'format_id': m.group('format_id'),
|
|
||||||
'url': url,
|
|
||||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
|
||||||
}],
|
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1819,6 +1824,17 @@ class GenericIE(InfoExtractor):
|
|||||||
if digiteka_url:
|
if digiteka_url:
|
||||||
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Limelight embeds
|
||||||
|
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||||
|
if mobj:
|
||||||
|
lm = {
|
||||||
|
'Media': 'media',
|
||||||
|
'Channel': 'channel',
|
||||||
|
'ChannelList': 'channel_list',
|
||||||
|
}
|
||||||
|
return self.url_result('limelight:%s:%s' % (
|
||||||
|
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||||
|
|
||||||
# Look for AdobeTVVideo embeds
|
# Look for AdobeTVVideo embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||||
|
@@ -21,6 +21,18 @@ class InstagramIE(InfoExtractor):
|
|||||||
'title': 'Video by naomipq',
|
'title': 'Video by naomipq',
|
||||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# missing description
|
||||||
|
'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BA-pQFBG8HZ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader_id': 'britneyspears',
|
||||||
|
'title': 'Video by britneyspears',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -32,8 +44,8 @@ class InstagramIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||||
webpage, 'uploader id', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
|
desc = self._search_regex(
|
||||||
fatal=False)
|
r'"caption":"(.+?)"', webpage, 'description', default=None)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -2,12 +2,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
class KickStarterIE(InfoExtractor):
|
class KickStarterIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
|
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description',
|
||||||
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1404461844',
|
'id': '1404461844',
|
||||||
@@ -27,7 +28,8 @@ class KickStarterIE(InfoExtractor):
|
|||||||
'uploader_id': 'pebble',
|
'uploader_id': 'pebble',
|
||||||
'uploader': 'Pebble Technology',
|
'uploader': 'Pebble Technology',
|
||||||
'title': 'Pebble iOS Notifications',
|
'title': 'Pebble iOS Notifications',
|
||||||
}
|
},
|
||||||
|
'add_ie': ['Vimeo'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
|
'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -43,7 +45,7 @@ class KickStarterIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>\s*(.*?)(?:\s*— Kickstarter)?\s*</title>',
|
r'<title>\s*(.*?)(?:\s*—\s*Kickstarter)?\s*</title>',
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'data-video-url="(.*?)"',
|
r'data-video-url="(.*?)"',
|
||||||
@@ -52,7 +54,7 @@ class KickStarterIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'Generic',
|
'ie_key': 'Generic',
|
||||||
'url': url,
|
'url': smuggle_url(url, {'to_generic': True}),
|
||||||
'title': title,
|
'title': title,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -31,6 +31,10 @@ class KuwoBaseIE(InfoExtractor):
|
|||||||
(file_format['ext'], file_format.get('br', ''), song_id),
|
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||||
song_id, note='Download %s url info' % file_format['format'],
|
song_id, note='Download %s url info' % file_format['format'],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if song_url == 'IPDeny':
|
||||||
|
raise ExtractorError('This song is blocked in this region', expected=True)
|
||||||
|
|
||||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': song_url,
|
'url': song_url,
|
||||||
|
@@ -5,11 +5,13 @@ import datetime
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import base64
|
import base64
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_ord,
|
compat_ord,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@@ -258,6 +260,7 @@ class LetvCloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
|
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
|
||||||
|
'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p7jnfw5hw9_ec93197892',
|
'id': 'p7jnfw5hw9_ec93197892',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -265,6 +268,7 @@ class LetvCloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
|
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
|
||||||
|
'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p7jnfw5hw9_187060b6fd',
|
'id': 'p7jnfw5hw9_187060b6fd',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -272,21 +276,37 @@ class LetvCloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
@staticmethod
|
||||||
uu_mobj = re.search('uu=([\w]+)', url)
|
def sign_data(obj):
|
||||||
vu_mobj = re.search('vu=([\w]+)', url)
|
if obj['cf'] == 'flash':
|
||||||
|
salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
|
||||||
|
items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
|
||||||
|
elif obj['cf'] == 'html5':
|
||||||
|
salt = 'fbeh5player12c43eccf2bec3300344'
|
||||||
|
items = ['cf', 'ran', 'uu', 'bver', 'vu']
|
||||||
|
input_data = ''.join([item + obj[item] for item in items]) + salt
|
||||||
|
obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
if not uu_mobj or not vu_mobj:
|
def _get_formats(self, cf, uu, vu, media_id):
|
||||||
raise ExtractorError('Invalid URL: %s' % url, expected=True)
|
def get_play_json(cf, timestamp):
|
||||||
|
data = {
|
||||||
|
'cf': cf,
|
||||||
|
'ver': '2.2',
|
||||||
|
'bver': 'firefox44.0',
|
||||||
|
'format': 'json',
|
||||||
|
'uu': uu,
|
||||||
|
'vu': vu,
|
||||||
|
'ran': compat_str(timestamp),
|
||||||
|
}
|
||||||
|
self.sign_data(data)
|
||||||
|
return self._download_json(
|
||||||
|
'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data),
|
||||||
|
media_id, 'Downloading playJson data for type %s' % cf)
|
||||||
|
|
||||||
uu = uu_mobj.group(1)
|
play_json = get_play_json(cf, time.time())
|
||||||
vu = vu_mobj.group(1)
|
# The server time may be different from local time
|
||||||
media_id = uu + '_' + vu
|
if play_json.get('code') == 10071:
|
||||||
|
play_json = get_play_json(cf, play_json['timestamp'])
|
||||||
play_json_req = sanitized_Request(
|
|
||||||
'http://api.letvcloud.com/gpc.php?cf=html5&sign=signxxxxx&ver=2.2&format=json&' +
|
|
||||||
'uu=' + uu + '&vu=' + vu)
|
|
||||||
play_json = self._download_json(play_json_req, media_id, 'Downloading playJson data')
|
|
||||||
|
|
||||||
if not play_json.get('data'):
|
if not play_json.get('data'):
|
||||||
if play_json.get('message'):
|
if play_json.get('message'):
|
||||||
@@ -312,6 +332,21 @@ class LetvCloudIE(InfoExtractor):
|
|||||||
'width': int_or_none(play_url.get('vwidth')),
|
'width': int_or_none(play_url.get('vwidth')),
|
||||||
'height': int_or_none(play_url.get('vheight')),
|
'height': int_or_none(play_url.get('vheight')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uu_mobj = re.search('uu=([\w]+)', url)
|
||||||
|
vu_mobj = re.search('vu=([\w]+)', url)
|
||||||
|
|
||||||
|
if not uu_mobj or not vu_mobj:
|
||||||
|
raise ExtractorError('Invalid URL: %s' % url, expected=True)
|
||||||
|
|
||||||
|
uu = uu_mobj.group(1)
|
||||||
|
vu = vu_mobj.group(1)
|
||||||
|
media_id = uu + '_' + vu
|
||||||
|
|
||||||
|
formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -40,7 +40,8 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
if not stream_url:
|
if not stream_url:
|
||||||
continue
|
continue
|
||||||
if '.f4m' in stream_url:
|
if '.f4m' in stream_url:
|
||||||
formats.extend(self._extract_f4m_formats(stream_url, video_id))
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
stream_url, video_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
fmt = {
|
fmt = {
|
||||||
'url': stream_url,
|
'url': stream_url,
|
||||||
@@ -72,8 +73,8 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
format_id = mobile_url.get('targetMediaPlatform')
|
format_id = mobile_url.get('targetMediaPlatform')
|
||||||
if determine_ext(media_url) == 'm3u8':
|
if determine_ext(media_url) == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
media_url, video_id, 'mp4', 'm3u8_native',
|
||||||
preference=-1, m3u8_id=format_id))
|
m3u8_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': media_url,
|
'url': media_url,
|
||||||
|
55
youtube_dl/extractor/matchtv.py
Normal file
55
youtube_dl/extractor/matchtv.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse
|
||||||
|
from ..utils import (
|
||||||
|
sanitized_Request,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MatchTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://matchtv\.ru/?#live-player'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://matchtv.ru/#live-player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'matchtv-live',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = 'matchtv-live'
|
||||||
|
request = sanitized_Request(
|
||||||
|
'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({
|
||||||
|
'ts': '',
|
||||||
|
'quality': 'SD',
|
||||||
|
'contentId': '561d2c0df7159b37178b4567',
|
||||||
|
'sign': '',
|
||||||
|
'includeHighlights': '0',
|
||||||
|
'userId': '',
|
||||||
|
'sessionId': random.randint(1, 1000000000),
|
||||||
|
'contentType': 'channel',
|
||||||
|
'timeShift': '0',
|
||||||
|
'platform': 'portal',
|
||||||
|
}),
|
||||||
|
headers={
|
||||||
|
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
|
||||||
|
})
|
||||||
|
video_url = self._download_json(request, video_id)['data']['videoUrl']
|
||||||
|
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
||||||
|
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title('Матч ТВ - Прямой эфир'),
|
||||||
|
'is_live': True,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -18,13 +18,17 @@ class NBAIE(InfoExtractor):
|
|||||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0021200253-okc-bkn-recap',
|
'id': '0021200253-okc-bkn-recap',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Thunder vs. Nets',
|
'title': 'Thunder vs. Nets',
|
||||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||||
'duration': 181,
|
'duration': 181,
|
||||||
'timestamp': 1354638466,
|
'timestamp': 1354638466,
|
||||||
'upload_date': '20121204',
|
'upload_date': '20121204',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -68,7 +72,7 @@ class NBAIE(InfoExtractor):
|
|||||||
if video_url.startswith('/'):
|
if video_url.startswith('/'):
|
||||||
continue
|
continue
|
||||||
if video_url.endswith('.m3u8'):
|
if video_url.endswith('.m3u8'):
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False))
|
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||||
elif video_url.endswith('.f4m'):
|
elif video_url.endswith('.f4m'):
|
||||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
|
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
|
||||||
else:
|
else:
|
||||||
|
@@ -19,32 +19,39 @@ class NBCIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||||
# md5 checksum is not stable
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'c9xnCo0YPOPH',
|
'id': '112966',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
|
'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XwU9KZkp98TH',
|
'id': '176',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
|
'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
|
||||||
'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
|
'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
|
||||||
},
|
},
|
||||||
'skip': 'Only works from US',
|
'skip': '404 Not Found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8iUuyzWDdYUZ',
|
'id': '2832821',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Star Wars Teaser',
|
'title': 'Star Wars Teaser',
|
||||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
'skip': 'Only works from US',
|
'skip': 'Only works from US',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -66,7 +73,11 @@ class NBCIE(InfoExtractor):
|
|||||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||||
if theplatform_url.startswith('//'):
|
if theplatform_url.startswith('//'):
|
||||||
theplatform_url = 'http:' + theplatform_url
|
theplatform_url = 'http:' + theplatform_url
|
||||||
return self.url_result(smuggle_url(theplatform_url, {'source_url': url}))
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': smuggle_url(theplatform_url, {'source_url': url}),
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class NBCSportsVPlayerIE(InfoExtractor):
|
class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
|
@@ -193,7 +193,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
|||||||
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
|
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
src, video_id, m3u8_id='hls', entry_protocol='m3u8_native'))
|
src, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||||
else:
|
else:
|
||||||
quality = f.get('quality')
|
quality = f.get('quality')
|
||||||
ff = {
|
ff = {
|
||||||
|
@@ -189,7 +189,7 @@ class NPOIE(NPOBaseIE):
|
|||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
if format_id == 'adaptive':
|
if format_id == 'adaptive':
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@@ -406,6 +406,38 @@ class NPORadioFragmentIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SchoolTVIE(InfoExtractor):
|
||||||
|
IE_NAME = 'schooltv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'WO_NTR_429477',
|
||||||
|
'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
|
||||||
|
'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Skip because of m3u8 download
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id')
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': 'NPO',
|
||||||
|
'url': 'npo:%s' % video_id,
|
||||||
|
'display_id': display_id
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class VPROIE(NPOIE):
|
class VPROIE(NPOIE):
|
||||||
IE_NAME = 'vpro'
|
IE_NAME = 'vpro'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||||
|
@@ -133,26 +133,32 @@ class NRKTVIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MUHH48000314',
|
'id': 'MUHH48000314',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '20 spørsmål',
|
'title': '20 spørsmål',
|
||||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||||
'upload_date': '20140523',
|
'upload_date': '20140523',
|
||||||
'duration': 1741.52,
|
'duration': 1741.52,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||||
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdfp15000514',
|
'id': 'mdfp15000514',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
|
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||||
'upload_date': '20140524',
|
'upload_date': '20140524',
|
||||||
'duration': 4605.0,
|
'duration': 4605.08,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class OdnoklassnikiIE(InfoExtractor):
|
class OdnoklassnikiIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# metadata in JSON
|
# metadata in JSON
|
||||||
'url': 'http://ok.ru/video/20079905452',
|
'url': 'http://ok.ru/video/20079905452',
|
||||||
@@ -69,6 +69,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ok.ru/videoembed/20648036891',
|
'url': 'http://www.ok.ru/videoembed/20648036891',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.ok.ru/video/20079905452',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://mobile.ok.ru/video/20079905452',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -71,7 +71,7 @@ class ScreenwaveMediaIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for source in sources:
|
for source in sources:
|
||||||
if source['type'] == 'hls':
|
if source['type'] == 'hls':
|
||||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id))
|
formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
|
||||||
else:
|
else:
|
||||||
file_ = source.get('file')
|
file_ = source.get('file')
|
||||||
if not file_:
|
if not file_:
|
||||||
@@ -107,7 +107,11 @@ class TeamFourIE(InfoExtractor):
|
|||||||
'upload_date': '20130401',
|
'upload_date': '20130401',
|
||||||
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
|
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
|
||||||
'title': 'A Moment With TFS Episode 4',
|
'title': 'A Moment With TFS Episode 4',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -53,17 +53,25 @@ class SenateISVPIE(InfoExtractor):
|
|||||||
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'judiciary031715',
|
'id': 'judiciary031715',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Integrated Senate Video Player',
|
'title': 'Integrated Senate Video Player',
|
||||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
|
'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'commerce011514',
|
'id': 'commerce011514',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Integrated Senate Video Player'
|
'title': 'Integrated Senate Video Player'
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
||||||
# checksum differs each time
|
# checksum differs each time
|
||||||
|
@@ -34,11 +34,11 @@ class SpankBangIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'format_id': '%sp' % height,
|
'format_id': '%sp' % height,
|
||||||
'height': int(height),
|
'height': int(height),
|
||||||
} for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
|
} for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<h1>(.+?)</h1>', webpage, 'title')
|
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
|
||||||
description = self._search_regex(
|
description = self._search_regex(
|
||||||
r'class="desc"[^>]*>([^<]+)',
|
r'class="desc"[^>]*>([^<]+)',
|
||||||
webpage, 'description', default=None)
|
webpage, 'description', default=None)
|
||||||
|
@@ -70,14 +70,11 @@ class SRGSSRIE(InfoExtractor):
|
|||||||
asset_url, media_id, 'mp4', 'm3u8_native',
|
asset_url, media_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
ext = None
|
|
||||||
if protocol == 'RTMP':
|
|
||||||
ext = self._search_regex(r'([a-z0-9]+):[^/]+', asset_url, 'ext')
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': asset_url,
|
'url': asset_url,
|
||||||
'preference': preference(quality),
|
'preference': preference(quality),
|
||||||
'ext': ext,
|
'ext': 'flv' if protocol == 'RTMP' else None,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@@ -17,18 +17,21 @@ class TV2IE(InfoExtractor):
|
|||||||
_VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
_VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.tv2.no/v/916509/',
|
'url': 'http://www.tv2.no/v/916509/',
|
||||||
'md5': '9cb9e3410b18b515d71892f27856e9b1',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '916509',
|
'id': '916509',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Se Gryttens hyllest av Steven Gerrard',
|
'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
|
||||||
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
|
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
|
||||||
'timestamp': 1431715610,
|
'timestamp': 1431715610,
|
||||||
'upload_date': '20150515',
|
'upload_date': '20150515',
|
||||||
'duration': 156.967,
|
'duration': 156.967,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -3,22 +3,20 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_etree_fromstring
|
||||||
compat_etree_fromstring,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VevoIE(InfoExtractor):
|
class VevoIE(InfoExtractor):
|
||||||
"""
|
'''
|
||||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||||
(currently used by MTVIE and MySpaceIE)
|
(currently used by MTVIE and MySpaceIE)
|
||||||
"""
|
'''
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
|
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
|
||||||
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
||||||
@@ -28,19 +26,15 @@ class VevoIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
"md5": "95ee28ee45e70130e3ab02b0f579ae23",
|
'md5': '95ee28ee45e70130e3ab02b0f579ae23',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'GB1101300280',
|
'id': 'GB1101300280',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
"upload_date": "20130624",
|
'title': 'Somebody to Die For',
|
||||||
"uploader": "Hurts",
|
'upload_date': '20130624',
|
||||||
"title": "Somebody to Die For",
|
'uploader': 'Hurts',
|
||||||
"duration": 230.12,
|
'timestamp': 1372057200,
|
||||||
"width": 1920,
|
},
|
||||||
"height": 1080,
|
|
||||||
# timestamp and upload_date are often incorrect; seem to change randomly
|
|
||||||
'timestamp': int,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'v3 SMIL format',
|
'note': 'v3 SMIL format',
|
||||||
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
||||||
@@ -48,28 +42,23 @@ class VevoIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'USUV71302923',
|
'id': 'USUV71302923',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': 'I Wish I Could Break Your Heart',
|
||||||
'upload_date': '20140219',
|
'upload_date': '20140219',
|
||||||
'uploader': 'Cassadee Pope',
|
'uploader': 'Cassadee Pope',
|
||||||
'title': 'I Wish I Could Break Your Heart',
|
'timestamp': 1392796919,
|
||||||
'duration': 226.101,
|
},
|
||||||
'age_limit': 0,
|
|
||||||
'timestamp': int,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'Age-limited video',
|
'note': 'Age-limited video',
|
||||||
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
|
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'USRV81300282',
|
'id': 'USRV81300282',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': 18,
|
|
||||||
'title': 'Tunnel Vision (Explicit)',
|
'title': 'Tunnel Vision (Explicit)',
|
||||||
|
'upload_date': '20130703',
|
||||||
|
'age_limit': 18,
|
||||||
'uploader': 'Justin Timberlake',
|
'uploader': 'Justin Timberlake',
|
||||||
'upload_date': 're:2013070[34]',
|
'timestamp': 1372888800,
|
||||||
'timestamp': int,
|
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': 'true',
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'No video_info',
|
'note': 'No video_info',
|
||||||
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
|
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
|
||||||
@@ -77,69 +66,46 @@ class VevoIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'USUV71503000',
|
'id': 'USUV71503000',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Till I Die - K Camp ft. T.I.',
|
'title': 'Till I Die',
|
||||||
'duration': 193,
|
'upload_date': '20151207',
|
||||||
|
'age_limit': 18,
|
||||||
|
'uploader': 'K Camp',
|
||||||
|
'timestamp': 1449468000,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download SMIL file'],
|
|
||||||
}]
|
}]
|
||||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
|
||||||
|
_SOURCE_TYPES = {
|
||||||
|
0: 'youtube',
|
||||||
|
1: 'brightcove',
|
||||||
|
2: 'http',
|
||||||
|
3: 'hls_ios',
|
||||||
|
4: 'hls',
|
||||||
|
5: 'smil', # http
|
||||||
|
7: 'f4m_cc',
|
||||||
|
8: 'f4m_ak',
|
||||||
|
9: 'f4m_l3',
|
||||||
|
10: 'ism',
|
||||||
|
13: 'smil', # rtmp
|
||||||
|
18: 'dash',
|
||||||
|
}
|
||||||
|
_VERSIONS = {
|
||||||
|
0: 'youtube', # only in AuthenticateVideo videoVersions
|
||||||
|
1: 'level3',
|
||||||
|
2: 'akamai',
|
||||||
|
3: 'level3',
|
||||||
|
4: 'amazon',
|
||||||
|
}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||||
req = sanitized_Request(
|
|
||||||
'http://www.vevo.com/auth', data=b'')
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
req, None,
|
|
||||||
note='Retrieving oauth token',
|
|
||||||
errnote='Unable to retrieve oauth token',
|
|
||||||
fatal=False)
|
|
||||||
if webpage is False:
|
|
||||||
self._oauth_token = None
|
|
||||||
else:
|
|
||||||
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
|
|
||||||
raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
|
|
||||||
|
|
||||||
self._oauth_token = self._search_regex(
|
|
||||||
r'access_token":\s*"([^"]+)"',
|
|
||||||
webpage, 'access token', fatal=False)
|
|
||||||
|
|
||||||
def _formats_from_json(self, video_info):
|
|
||||||
if not video_info:
|
|
||||||
return []
|
|
||||||
|
|
||||||
last_version = {'version': -1}
|
|
||||||
for version in video_info['videoVersions']:
|
|
||||||
# These are the HTTP downloads, other types are for different manifests
|
|
||||||
if version['sourceType'] == 2:
|
|
||||||
if version['version'] > last_version['version']:
|
|
||||||
last_version = version
|
|
||||||
if last_version['version'] == -1:
|
|
||||||
raise ExtractorError('Unable to extract last version of the video')
|
|
||||||
|
|
||||||
renditions = compat_etree_fromstring(last_version['data'])
|
|
||||||
formats = []
|
formats = []
|
||||||
# Already sorted from worst to best quality
|
els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||||
for rend in renditions.findall('rendition'):
|
|
||||||
attr = rend.attrib
|
|
||||||
format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
|
|
||||||
formats.append({
|
|
||||||
'url': attr['url'],
|
|
||||||
'format_id': attr['name'],
|
|
||||||
'format_note': format_note,
|
|
||||||
'height': int(attr['frameheight']),
|
|
||||||
'width': int(attr['frameWidth']),
|
|
||||||
})
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _formats_from_smil(self, smil_doc):
|
|
||||||
formats = []
|
|
||||||
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
|
||||||
for el in els:
|
for el in els:
|
||||||
src = el.attrib['src']
|
src = el.attrib['src']
|
||||||
m = re.match(r'''(?xi)
|
m = re.match(r'''(?xi)
|
||||||
(?P<ext>[a-z0-9]+):
|
(?P<ext>[a-z0-9]+):
|
||||||
(?P<path>
|
(?P<path>
|
||||||
[/a-z0-9]+ # The directory and main part of the URL
|
[/a-z0-9]+ # The directory and main part of the URL
|
||||||
_(?P<cbr>[0-9]+)k
|
_(?P<tbr>[0-9]+)k
|
||||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||||
_(?P<vcodec>[a-z0-9]+)
|
_(?P<vcodec>[a-z0-9]+)
|
||||||
_(?P<vbr>[0-9]+)
|
_(?P<vbr>[0-9]+)
|
||||||
@@ -153,9 +119,10 @@ class VevoIE(InfoExtractor):
|
|||||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': 'SMIL_' + m.group('cbr'),
|
'format_id': 'smil_' + m.group('tbr'),
|
||||||
'vcodec': m.group('vcodec'),
|
'vcodec': m.group('vcodec'),
|
||||||
'acodec': m.group('acodec'),
|
'acodec': m.group('acodec'),
|
||||||
|
'tbr': int(m.group('tbr')),
|
||||||
'vbr': int(m.group('vbr')),
|
'vbr': int(m.group('vbr')),
|
||||||
'abr': int(m.group('abr')),
|
'abr': int(m.group('abr')),
|
||||||
'ext': m.group('ext'),
|
'ext': m.group('ext'),
|
||||||
@@ -164,48 +131,148 @@ class VevoIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _download_api_formats(self, video_id, video_url):
|
def _initialize_api(self, video_id):
|
||||||
if not self._oauth_token:
|
req = sanitized_Request(
|
||||||
self._downloader.report_warning(
|
'http://www.vevo.com/auth', data=b'')
|
||||||
'No oauth token available, skipping API HLS download')
|
webpage = self._download_webpage(
|
||||||
return []
|
req, None,
|
||||||
|
note='Retrieving oauth token',
|
||||||
|
errnote='Unable to retrieve oauth token')
|
||||||
|
|
||||||
api_url = compat_urlparse.urljoin(video_url, '//apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
|
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
|
||||||
video_id, self._oauth_token))
|
raise ExtractorError(
|
||||||
api_data = self._download_json(
|
'%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
|
||||||
api_url, video_id,
|
|
||||||
note='Downloading HLS formats',
|
|
||||||
errnote='Failed to download HLS format list', fatal=False)
|
|
||||||
if api_data is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
m3u8_url = api_data[0]['url']
|
auth_info = self._parse_json(webpage, video_id)
|
||||||
return self._extract_m3u8_formats(
|
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
|
||||||
m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
|
|
||||||
preference=0)
|
def _call_api(self, path, video_id, note, errnote, fatal=True):
|
||||||
|
return self._download_json(self._api_url_template % path, video_id, note, errnote)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = None
|
|
||||||
|
|
||||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||||
response = self._download_json(json_url, video_id)
|
response = self._download_json(
|
||||||
video_info = response['video'] or {}
|
json_url, video_id, 'Downloading video info', 'Unable to download info')
|
||||||
|
video_info = response.get('video') or {}
|
||||||
if not video_info and response.get('statusCode') != 909:
|
video_versions = video_info.get('videoVersions')
|
||||||
if 'statusMessage' in response:
|
uploader = None
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)
|
timestamp = None
|
||||||
raise ExtractorError('Unable to extract videos')
|
view_count = None
|
||||||
|
formats = []
|
||||||
|
|
||||||
if not video_info:
|
if not video_info:
|
||||||
if url.startswith('vevo:'):
|
if response.get('statusCode') != 909:
|
||||||
raise ExtractorError('Please specify full Vevo URL for downloading', expected=True)
|
ytid = response.get('errorInfo', {}).get('ytid')
|
||||||
webpage = self._download_webpage(url, video_id)
|
if ytid:
|
||||||
|
self.report_warning(
|
||||||
|
'Video is geoblocked, trying with the YouTube video %s' % ytid)
|
||||||
|
return self.url_result(ytid, 'Youtube', ytid)
|
||||||
|
|
||||||
title = video_info.get('title') or self._og_search_title(webpage)
|
if 'statusMessage' in response:
|
||||||
|
raise ExtractorError('%s said: %s' % (
|
||||||
|
self.IE_NAME, response['statusMessage']), expected=True)
|
||||||
|
raise ExtractorError('Unable to extract videos')
|
||||||
|
|
||||||
formats = self._formats_from_json(video_info)
|
self._initialize_api(video_id)
|
||||||
|
video_info = self._call_api(
|
||||||
|
'video/%s' % video_id, video_id, 'Downloading api video info',
|
||||||
|
'Failed to download video info')
|
||||||
|
|
||||||
|
video_versions = self._call_api(
|
||||||
|
'video/%s/streams' % video_id, video_id,
|
||||||
|
'Downloading video versions info',
|
||||||
|
'Failed to download video versions info')
|
||||||
|
|
||||||
|
timestamp = parse_iso8601(video_info.get('releaseDate'))
|
||||||
|
artists = video_info.get('artists')
|
||||||
|
if artists:
|
||||||
|
uploader = artists[0]['name']
|
||||||
|
view_count = int_or_none(video_info.get('views', {}).get('total'))
|
||||||
|
|
||||||
|
for video_version in video_versions:
|
||||||
|
version = self._VERSIONS.get(video_version['version'])
|
||||||
|
version_url = video_version.get('url')
|
||||||
|
if not version_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if '.mpd' in version_url or '.ism' in version_url:
|
||||||
|
continue
|
||||||
|
elif '.m3u8' in version_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
version_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls-%s' % version,
|
||||||
|
note='Downloading %s m3u8 information' % version,
|
||||||
|
errnote='Failed to download %s m3u8 information' % version,
|
||||||
|
fatal=False))
|
||||||
|
else:
|
||||||
|
m = re.search(r'''(?xi)
|
||||||
|
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||||
|
_(?P<vcodec>[a-z0-9]+)
|
||||||
|
_(?P<vbr>[0-9]+)
|
||||||
|
_(?P<acodec>[a-z0-9]+)
|
||||||
|
_(?P<abr>[0-9]+)
|
||||||
|
\.(?P<ext>[a-z0-9]+)''', version_url)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': version_url,
|
||||||
|
'format_id': 'http-%s-%s' % (version, video_version['quality']),
|
||||||
|
'vcodec': m.group('vcodec'),
|
||||||
|
'acodec': m.group('acodec'),
|
||||||
|
'vbr': int(m.group('vbr')),
|
||||||
|
'abr': int(m.group('abr')),
|
||||||
|
'ext': m.group('ext'),
|
||||||
|
'width': int(m.group('width')),
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
timestamp = int_or_none(self._search_regex(
|
||||||
|
r'/Date\((\d+)\)/',
|
||||||
|
video_info['releaseDate'], 'release date', fatal=False),
|
||||||
|
scale=1000)
|
||||||
|
artists = video_info.get('mainArtists')
|
||||||
|
if artists:
|
||||||
|
uploader = artists[0]['artistName']
|
||||||
|
|
||||||
|
smil_parsed = False
|
||||||
|
for video_version in video_info['videoVersions']:
|
||||||
|
version = self._VERSIONS.get(video_version['version'])
|
||||||
|
if version == 'youtube':
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
source_type = self._SOURCE_TYPES.get(video_version['sourceType'])
|
||||||
|
renditions = compat_etree_fromstring(video_version['data'])
|
||||||
|
if source_type == 'http':
|
||||||
|
for rend in renditions.findall('rendition'):
|
||||||
|
attr = rend.attrib
|
||||||
|
formats.append({
|
||||||
|
'url': attr['url'],
|
||||||
|
'format_id': 'http-%s-%s' % (version, attr['name']),
|
||||||
|
'height': int_or_none(attr.get('frameheight')),
|
||||||
|
'width': int_or_none(attr.get('frameWidth')),
|
||||||
|
'tbr': int_or_none(attr.get('totalBitrate')),
|
||||||
|
'vbr': int_or_none(attr.get('videoBitrate')),
|
||||||
|
'abr': int_or_none(attr.get('audioBitrate')),
|
||||||
|
'vcodec': attr.get('videoCodec'),
|
||||||
|
'acodec': attr.get('audioCodec'),
|
||||||
|
})
|
||||||
|
elif source_type == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
renditions.find('rendition').attrib['url'], video_id,
|
||||||
|
'mp4', 'm3u8_native', m3u8_id='hls-%s' % version,
|
||||||
|
note='Downloading %s m3u8 information' % version,
|
||||||
|
errnote='Failed to download %s m3u8 information' % version,
|
||||||
|
fatal=False))
|
||||||
|
elif source_type == 'smil' and version == 'level3' and not smil_parsed:
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
renditions.find('rendition').attrib['url'], video_id, False))
|
||||||
|
smil_parsed = True
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = video_info['title']
|
||||||
|
|
||||||
is_explicit = video_info.get('isExplicit')
|
is_explicit = video_info.get('isExplicit')
|
||||||
if is_explicit is True:
|
if is_explicit is True:
|
||||||
@@ -215,43 +282,16 @@ class VevoIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
age_limit = None
|
age_limit = None
|
||||||
|
|
||||||
# Download via HLS API
|
duration = video_info.get('duration')
|
||||||
formats.extend(self._download_api_formats(video_id, url))
|
|
||||||
|
|
||||||
# Download SMIL
|
|
||||||
smil_blocks = sorted((
|
|
||||||
f for f in video_info.get('videoVersions', [])
|
|
||||||
if f['sourceType'] == 13),
|
|
||||||
key=lambda f: f['version'])
|
|
||||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
|
||||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
|
||||||
if smil_blocks:
|
|
||||||
smil_url_m = self._search_regex(
|
|
||||||
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
|
||||||
default=None)
|
|
||||||
if smil_url_m is not None:
|
|
||||||
smil_url = smil_url_m
|
|
||||||
if smil_url:
|
|
||||||
smil_doc = self._download_smil(smil_url, video_id, fatal=False)
|
|
||||||
if smil_doc:
|
|
||||||
formats.extend(self._formats_from_smil(smil_doc))
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
timestamp = int_or_none(self._search_regex(
|
|
||||||
r'/Date\((\d+)\)/',
|
|
||||||
video_info['launchDate'], 'launch date', fatal=False),
|
|
||||||
scale=1000) if video_info else None
|
|
||||||
|
|
||||||
duration = video_info.get('duration') or int_or_none(
|
|
||||||
self._html_search_meta('video:duration', webpage))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': video_info.get('imageUrl'),
|
'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'uploader': video_info['mainArtists'][0]['artistName'] if video_info else None,
|
'uploader': uploader,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
@@ -86,10 +86,9 @@ class VGTVIE(XstreamIE):
|
|||||||
{
|
{
|
||||||
# streamType: wasLive
|
# streamType: wasLive
|
||||||
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
|
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
|
||||||
'md5': '458f4841239dab414343b50e5af8869c',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '113063',
|
'id': '113063',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'V75 fra Solvalla 30.05.15',
|
'title': 'V75 fra Solvalla 30.05.15',
|
||||||
'description': 'md5:b3743425765355855f88e096acc93231',
|
'description': 'md5:b3743425765355855f88e096acc93231',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
@@ -98,6 +97,10 @@ class VGTVIE(XstreamIE):
|
|||||||
'upload_date': '20150530',
|
'upload_date': '20150530',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
class VidziIE(InfoExtractor):
|
class VidziIE(InfoExtractor):
|
||||||
@@ -13,6 +14,11 @@ class VidziIE(InfoExtractor):
|
|||||||
'id': 'cghql9yq6emu',
|
'id': 'cghql9yq6emu',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
|
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
|
||||||
|
'uploader': 'vidzi.tv',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -20,19 +26,14 @@ class VidziIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_host = self._html_search_regex(
|
|
||||||
r'id=\'vplayer\'><img src="http://(.*?)/i', webpage,
|
|
||||||
'video host')
|
|
||||||
video_hash = self._html_search_regex(
|
|
||||||
r'\|([a-z0-9]+)\|hls\|type', webpage, 'video_hash')
|
|
||||||
ext = self._html_search_regex(
|
|
||||||
r'\|tracks\|([a-z0-9]+)\|', webpage, 'video ext')
|
|
||||||
video_url = 'http://' + video_host + '/' + video_hash + '/v.' + ext
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||||
|
|
||||||
|
# Vidzi now uses jwplayer, which can be handled by GenericIE
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'url': smuggle_url(url, {'to_generic': True}),
|
||||||
|
'ie_key': 'Generic',
|
||||||
}
|
}
|
||||||
|
@@ -45,6 +45,10 @@ class ViideaIE(InfoExtractor):
|
|||||||
'upload_date': '20130627',
|
'upload_date': '20130627',
|
||||||
'duration': 565,
|
'duration': 565,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# video with invalid direct format links (HTTP 403)
|
# video with invalid direct format links (HTTP 403)
|
||||||
'url': 'http://videolectures.net/russir2010_filippova_nlp/',
|
'url': 'http://videolectures.net/russir2010_filippova_nlp/',
|
||||||
|
@@ -321,7 +321,7 @@ class VKIE(InfoExtractor):
|
|||||||
class VKUserVideosIE(InfoExtractor):
|
class VKUserVideosIE(InfoExtractor):
|
||||||
IE_NAME = 'vk:uservideos'
|
IE_NAME = 'vk:uservideos'
|
||||||
IE_DESC = "VK - User's Videos"
|
IE_DESC = "VK - User's Videos"
|
||||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
|
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
|
||||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vk.com/videos205387401',
|
'url': 'http://vk.com/videos205387401',
|
||||||
@@ -333,6 +333,9 @@ class VKUserVideosIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://vk.com/videos-77521',
|
'url': 'http://vk.com/videos-77521',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://vk.com/videos-97664626?section=all',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -34,19 +34,20 @@ class XuiteIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Video with only one format
|
# Video with only one format
|
||||||
'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2',
|
'url': 'http://vlog.xuite.net/play/WUxxR2xCLTI1OTI1MDk5LmZsdg==',
|
||||||
'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e',
|
'md5': '21f7b39c009b5a4615b4463df6eb7a46',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3441629',
|
'id': '25925099',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '孫燕姿 - 眼淚成詩',
|
'title': 'BigBuckBunny_320x180',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'duration': 217.399,
|
'duration': 596.458,
|
||||||
'timestamp': 1299383640,
|
'timestamp': 1454242500,
|
||||||
'upload_date': '20110306',
|
'upload_date': '20160131',
|
||||||
'uploader': 'Valen',
|
'uploader': 'yan12125',
|
||||||
'uploader_id': '10400126',
|
'uploader_id': '12158353',
|
||||||
'categories': ['影視娛樂'],
|
'categories': ['個人短片'],
|
||||||
|
'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Video with two formats
|
# Video with two formats
|
||||||
|
@@ -114,15 +114,13 @@ class YouPornIE(InfoExtractor):
|
|||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._og_search_description(webpage, default=None)
|
||||||
r'(?s)<div[^>]+class=["\']video-description["\'][^>]*>(.+?)</div>',
|
|
||||||
webpage, 'description', default=None)
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=["\']videoInfoBy["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>',
|
r'(?s)<div[^>]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>',
|
||||||
|
@@ -32,6 +32,7 @@ from ..utils import (
|
|||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
@@ -180,7 +181,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class YoutubeEntryListBaseInfoExtractor(InfoExtractor):
|
class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
# Extract entries from page with "Load more" button
|
# Extract entries from page with "Load more" button
|
||||||
def _entries(self, page, playlist_id):
|
def _entries(self, page, playlist_id):
|
||||||
more_widget_html = content_html = page
|
more_widget_html = content_html = page
|
||||||
@@ -232,7 +233,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||||
def _process_page(self, content):
|
def _process_page(self, content):
|
||||||
for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
|
for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
|
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
|
||||||
|
|
||||||
@@ -277,93 +278,93 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
$"""
|
$"""
|
||||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||||
_formats = {
|
_formats = {
|
||||||
'5': {'ext': 'flv', 'width': 400, 'height': 240},
|
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||||
'6': {'ext': 'flv', 'width': 450, 'height': 270},
|
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||||
'13': {'ext': '3gp'},
|
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||||
'17': {'ext': '3gp', 'width': 176, 'height': 144},
|
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
|
||||||
'18': {'ext': 'mp4', 'width': 640, 'height': 360},
|
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
|
||||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720},
|
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||||
'34': {'ext': 'flv', 'width': 640, 'height': 360},
|
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||||
'35': {'ext': 'flv', 'width': 854, 'height': 480},
|
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||||
'36': {'ext': '3gp', 'width': 320, 'height': 240},
|
'36': {'ext': '3gp', 'width': 320, 'height': 240, 'acodec': 'aac', 'abr': 32, 'vcodec': 'mp4v'},
|
||||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080},
|
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072},
|
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||||
'43': {'ext': 'webm', 'width': 640, 'height': 360},
|
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
||||||
'44': {'ext': 'webm', 'width': 854, 'height': 480},
|
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
||||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720},
|
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
||||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080},
|
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
||||||
'59': {'ext': 'mp4', 'width': 854, 'height': 480},
|
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||||
'78': {'ext': 'mp4', 'width': 854, 'height': 480},
|
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||||
|
|
||||||
|
|
||||||
# 3d videos
|
# 3D videos
|
||||||
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20},
|
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
|
||||||
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20},
|
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
|
||||||
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20},
|
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
|
||||||
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20},
|
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
|
||||||
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20},
|
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
|
||||||
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20},
|
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
|
||||||
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20},
|
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
|
||||||
|
|
||||||
# Apple HTTP Live Streaming
|
# Apple HTTP Live Streaming
|
||||||
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
|
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
||||||
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10},
|
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
||||||
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10},
|
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
|
||||||
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10},
|
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
|
||||||
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10},
|
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
|
||||||
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10},
|
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
|
||||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10},
|
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
|
||||||
|
|
||||||
# DASH mp4 video
|
# DASH mp4 video
|
||||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
|
||||||
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
|
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||||
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
|
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
|
||||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
|
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
|
||||||
|
|
||||||
# Dash mp4 audio
|
# Dash mp4 audio
|
||||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
|
|
||||||
# Dash webm
|
# Dash webm
|
||||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||||
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
|
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
||||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||||
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||||
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
|
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
|
||||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
|
||||||
|
|
||||||
# Dash webm audio
|
# Dash webm audio
|
||||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||||
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
||||||
|
|
||||||
# Dash webm audio with opus inside
|
# Dash webm audio with opus inside
|
||||||
'249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
|
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
|
||||||
'250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
|
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
|
||||||
'251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
|
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
|
||||||
|
|
||||||
# RTMP (unnamed)
|
# RTMP (unnamed)
|
||||||
'_rtmp': {'protocol': 'rtmp'},
|
'_rtmp': {'protocol': 'rtmp'},
|
||||||
@@ -917,7 +918,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if lang in sub_lang_list:
|
if lang in sub_lang_list:
|
||||||
continue
|
continue
|
||||||
sub_formats = []
|
sub_formats = []
|
||||||
for ext in ['sbv', 'vtt', 'srt']:
|
for ext in ['ttml', 'vtt']:
|
||||||
params = compat_urllib_parse.urlencode({
|
params = compat_urllib_parse.urlencode({
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'v': video_id,
|
'v': video_id,
|
||||||
@@ -1034,73 +1035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||||
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||||
|
|
||||||
def _parse_dash_manifest(
|
|
||||||
self, video_id, dash_manifest_url, player_url, age_gate, fatal=True):
|
|
||||||
def decrypt_sig(mobj):
|
|
||||||
s = mobj.group(1)
|
|
||||||
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
|
||||||
return '/signature/%s' % dec_s
|
|
||||||
dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
|
|
||||||
dash_doc = self._download_xml(
|
|
||||||
dash_manifest_url, video_id,
|
|
||||||
note='Downloading DASH manifest',
|
|
||||||
errnote='Could not download DASH manifest',
|
|
||||||
fatal=fatal)
|
|
||||||
|
|
||||||
if dash_doc is False:
|
|
||||||
return []
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
|
|
||||||
mime_type = a.attrib.get('mimeType')
|
|
||||||
for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
|
||||||
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
|
|
||||||
if url_el is None:
|
|
||||||
continue
|
|
||||||
if mime_type == 'text/vtt':
|
|
||||||
# TODO implement WebVTT downloading
|
|
||||||
pass
|
|
||||||
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
|
|
||||||
segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
|
|
||||||
format_id = r.attrib['id']
|
|
||||||
video_url = url_el.text
|
|
||||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
|
|
||||||
f = {
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': video_url,
|
|
||||||
'width': int_or_none(r.attrib.get('width')),
|
|
||||||
'height': int_or_none(r.attrib.get('height')),
|
|
||||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
|
||||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
|
||||||
'filesize': filesize,
|
|
||||||
'fps': int_or_none(r.attrib.get('frameRate')),
|
|
||||||
}
|
|
||||||
if segment_list is not None:
|
|
||||||
f.update({
|
|
||||||
'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
|
|
||||||
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')],
|
|
||||||
'protocol': 'http_dash_segments',
|
|
||||||
})
|
|
||||||
try:
|
|
||||||
existing_format = next(
|
|
||||||
fo for fo in formats
|
|
||||||
if fo['format_id'] == format_id)
|
|
||||||
except StopIteration:
|
|
||||||
full_info = self._formats.get(format_id, {}).copy()
|
|
||||||
full_info.update(f)
|
|
||||||
codecs = r.attrib.get('codecs')
|
|
||||||
if codecs:
|
|
||||||
if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
|
|
||||||
full_info['vcodec'] = codecs
|
|
||||||
elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
|
|
||||||
full_info['acodec'] = codecs
|
|
||||||
formats.append(full_info)
|
|
||||||
else:
|
|
||||||
existing_format.update(f)
|
|
||||||
else:
|
|
||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
@@ -1461,15 +1395,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if 'ratebypass' not in url:
|
if 'ratebypass' not in url:
|
||||||
url += '&ratebypass=yes'
|
url += '&ratebypass=yes'
|
||||||
|
|
||||||
|
dct = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': url,
|
||||||
|
'player_url': player_url,
|
||||||
|
}
|
||||||
|
if format_id in self._formats:
|
||||||
|
dct.update(self._formats[format_id])
|
||||||
|
|
||||||
# Some itags are not included in DASH manifest thus corresponding formats will
|
# Some itags are not included in DASH manifest thus corresponding formats will
|
||||||
# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
|
# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
|
||||||
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
|
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
|
||||||
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
|
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
|
||||||
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
|
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
|
||||||
dct = {
|
|
||||||
'format_id': format_id,
|
more_fields = {
|
||||||
'url': url,
|
|
||||||
'player_url': player_url,
|
|
||||||
'filesize': int_or_none(url_data.get('clen', [None])[0]),
|
'filesize': int_or_none(url_data.get('clen', [None])[0]),
|
||||||
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
|
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
|
||||||
'width': width,
|
'width': width,
|
||||||
@@ -1477,13 +1417,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'fps': int_or_none(url_data.get('fps', [None])[0]),
|
'fps': int_or_none(url_data.get('fps', [None])[0]),
|
||||||
'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
|
'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
|
||||||
}
|
}
|
||||||
|
for key, value in more_fields.items():
|
||||||
|
if value:
|
||||||
|
dct[key] = value
|
||||||
type_ = url_data.get('type', [None])[0]
|
type_ = url_data.get('type', [None])[0]
|
||||||
if type_:
|
if type_:
|
||||||
type_split = type_.split(';')
|
type_split = type_.split(';')
|
||||||
kind_ext = type_split[0].split('/')
|
kind_ext = type_split[0].split('/')
|
||||||
if len(kind_ext) == 2:
|
if len(kind_ext) == 2:
|
||||||
kind, ext = kind_ext
|
kind, _ = kind_ext
|
||||||
dct['ext'] = ext
|
dct['ext'] = mimetype2ext(type_split[0])
|
||||||
if kind in ('audio', 'video'):
|
if kind in ('audio', 'video'):
|
||||||
codecs = None
|
codecs = None
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
@@ -1501,8 +1444,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'acodec': acodec,
|
'acodec': acodec,
|
||||||
'vcodec': vcodec,
|
'vcodec': vcodec,
|
||||||
})
|
})
|
||||||
if format_id in self._formats:
|
|
||||||
dct.update(self._formats[format_id])
|
|
||||||
formats.append(dct)
|
formats.append(dct)
|
||||||
elif video_info.get('hlsvp'):
|
elif video_info.get('hlsvp'):
|
||||||
manifest_url = video_info['hlsvp'][0]
|
manifest_url = video_info['hlsvp'][0]
|
||||||
@@ -1525,8 +1466,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
for dash_manifest_url in dash_mpds:
|
for dash_manifest_url in dash_mpds:
|
||||||
dash_formats = {}
|
dash_formats = {}
|
||||||
try:
|
try:
|
||||||
for df in self._parse_dash_manifest(
|
def decrypt_sig(mobj):
|
||||||
video_id, dash_manifest_url, player_url, age_gate, dash_mpd_fatal):
|
s = mobj.group(1)
|
||||||
|
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
||||||
|
return '/signature/%s' % dec_s
|
||||||
|
|
||||||
|
dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
|
||||||
|
|
||||||
|
for df in self._extract_dash_manifest_formats(
|
||||||
|
dash_manifest_url, video_id, fatal=dash_mpd_fatal,
|
||||||
|
namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats):
|
||||||
# Do not overwrite DASH format found in some previous DASH manifest
|
# Do not overwrite DASH format found in some previous DASH manifest
|
||||||
if df['format_id'] not in dash_formats:
|
if df['format_id'] not in dash_formats:
|
||||||
dash_formats[df['format_id']] = df
|
dash_formats[df['format_id']] = df
|
||||||
@@ -1594,7 +1543,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor):
|
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com playlists'
|
IE_DESC = 'YouTube.com playlists'
|
||||||
_VALID_URL = r"""(?x)(?:
|
_VALID_URL = r"""(?x)(?:
|
||||||
(?:https?://)?
|
(?:https?://)?
|
||||||
@@ -1838,7 +1787,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubeUserIE(YoutubeChannelIE):
|
class YoutubeUserIE(YoutubeChannelIE):
|
||||||
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
_TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
|
||||||
IE_NAME = 'youtube:user'
|
IE_NAME = 'youtube:user'
|
||||||
|
|
||||||
|
@@ -415,6 +415,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--hls-prefer-native',
|
'--hls-prefer-native',
|
||||||
dest='hls_prefer_native', action='store_true',
|
dest='hls_prefer_native', action='store_true',
|
||||||
help='Use the native HLS downloader instead of ffmpeg (experimental)')
|
help='Use the native HLS downloader instead of ffmpeg (experimental)')
|
||||||
|
downloader.add_option(
|
||||||
|
'--hls-use-mpegts',
|
||||||
|
dest='hls_use_mpegts', action='store_true',
|
||||||
|
help='Use the mpegts container for HLS videos, allowing to play the '
|
||||||
|
'video while downloading (some players may not be able to play it)')
|
||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
'--external-downloader',
|
'--external-downloader',
|
||||||
dest='external_downloader', metavar='COMMAND',
|
dest='external_downloader', metavar='COMMAND',
|
||||||
|
@@ -391,6 +391,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
for (name, value) in metadata.items():
|
for (name, value) in metadata.items():
|
||||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||||
|
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/8350
|
||||||
|
if info['protocol'] == 'm3u8_native' or self._downloader.params.get('hls_prefer_native', False):
|
||||||
|
options.extend(['-bsf:a', 'aac_adtstoasc'])
|
||||||
|
|
||||||
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
|
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||||
self.run_ffmpeg(filename, temp_filename, options)
|
self.run_ffmpeg(filename, temp_filename, options)
|
||||||
os.remove(encodeFilename(filename))
|
os.remove(encodeFilename(filename))
|
||||||
@@ -479,6 +483,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
|
self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
|
||||||
return [], info
|
return [], info
|
||||||
self._downloader.to_screen('[ffmpeg] Converting subtitles')
|
self._downloader.to_screen('[ffmpeg] Converting subtitles')
|
||||||
|
sub_filenames = []
|
||||||
for lang, sub in subs.items():
|
for lang, sub in subs.items():
|
||||||
ext = sub['ext']
|
ext = sub['ext']
|
||||||
if ext == new_ext:
|
if ext == new_ext:
|
||||||
@@ -486,6 +491,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
'[ffmpeg] Subtitle file for %s is already in the requested'
|
'[ffmpeg] Subtitle file for %s is already in the requested'
|
||||||
'format' % new_ext)
|
'format' % new_ext)
|
||||||
continue
|
continue
|
||||||
|
old_file = subtitles_filename(filename, lang, ext)
|
||||||
|
sub_filenames.append(old_file)
|
||||||
new_file = subtitles_filename(filename, lang, new_ext)
|
new_file = subtitles_filename(filename, lang, new_ext)
|
||||||
|
|
||||||
if ext == 'dfxp' or ext == 'ttml':
|
if ext == 'dfxp' or ext == 'ttml':
|
||||||
@@ -493,7 +500,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
'You have requested to convert dfxp (TTML) subtitles into another format, '
|
'You have requested to convert dfxp (TTML) subtitles into another format, '
|
||||||
'which results in style information loss')
|
'which results in style information loss')
|
||||||
|
|
||||||
dfxp_file = subtitles_filename(filename, lang, ext)
|
dfxp_file = old_file
|
||||||
srt_file = subtitles_filename(filename, lang, 'srt')
|
srt_file = subtitles_filename(filename, lang, 'srt')
|
||||||
|
|
||||||
with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
|
with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
|
||||||
@@ -511,9 +518,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
if new_ext == 'srt':
|
if new_ext == 'srt':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.run_ffmpeg(
|
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
|
||||||
subtitles_filename(filename, lang, ext),
|
|
||||||
new_file, ['-f', new_format])
|
|
||||||
|
|
||||||
with io.open(new_file, 'rt', encoding='utf-8') as f:
|
with io.open(new_file, 'rt', encoding='utf-8') as f:
|
||||||
subs[lang] = {
|
subs[lang] = {
|
||||||
@@ -521,4 +526,4 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
'data': f.read(),
|
'data': f.read(),
|
||||||
}
|
}
|
||||||
|
|
||||||
return [], info
|
return sub_filenames, info
|
||||||
|
@@ -1828,9 +1828,11 @@ def mimetype2ext(mt):
|
|||||||
_, _, res = mt.rpartition('/')
|
_, _, res = mt.rpartition('/')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'x-ms-wmv': 'wmv',
|
'3gpp': '3gp',
|
||||||
'x-mp4-fragmented': 'mp4',
|
|
||||||
'ttml+xml': 'ttml',
|
'ttml+xml': 'ttml',
|
||||||
|
'x-flv': 'flv',
|
||||||
|
'x-mp4-fragmented': 'mp4',
|
||||||
|
'x-ms-wmv': 'wmv',
|
||||||
}.get(res, res)
|
}.get(res, res)
|
||||||
|
|
||||||
|
|
||||||
@@ -2015,20 +2017,27 @@ def dfxp2srt(dfxp_data):
|
|||||||
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
||||||
})
|
})
|
||||||
|
|
||||||
|
class TTMLPElementParser(object):
|
||||||
|
out = ''
|
||||||
|
|
||||||
|
def start(self, tag, attrib):
|
||||||
|
if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
||||||
|
self.out += '\n'
|
||||||
|
|
||||||
|
def end(self, tag):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def data(self, data):
|
||||||
|
self.out += data
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
return self.out.strip()
|
||||||
|
|
||||||
def parse_node(node):
|
def parse_node(node):
|
||||||
str_or_empty = functools.partial(str_or_none, default='')
|
target = TTMLPElementParser()
|
||||||
|
parser = xml.etree.ElementTree.XMLParser(target=target)
|
||||||
out = str_or_empty(node.text)
|
parser.feed(xml.etree.ElementTree.tostring(node))
|
||||||
|
return parser.close()
|
||||||
for child in node:
|
|
||||||
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
|
||||||
out += '\n' + str_or_empty(child.tail)
|
|
||||||
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
|
|
||||||
out += str_or_empty(parse_node(child))
|
|
||||||
else:
|
|
||||||
out += str_or_empty(xml.etree.ElementTree.tostring(child))
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
||||||
out = []
|
out = []
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.01.23'
|
__version__ = '2016.02.04'
|
||||||
|
Reference in New Issue
Block a user