Compare commits
145 Commits
2013.11.06
...
2013.11.22
Author | SHA1 | Date | |
---|---|---|---|
a87b0615aa | |||
d7386f6276 | |||
081640940e | |||
7012b23c94 | |||
d3b30148ed | |||
9f79463803 | |||
d35dc6d3b5 | |||
50123be421 | |||
3f8ced5144 | |||
00ea0f11eb | |||
0b63aed8df | |||
15c3adbb16 | |||
f143a42fe6 | |||
241650c7ff | |||
bfe7439a20 | |||
cffa6aa107 | |||
02e4ebbbad | |||
ab009f59ef | |||
0980426559 | |||
b1c9c66936 | |||
a6a173c2fd | |||
2bb683c201 | |||
64bb5187f5 | |||
9e4f50a8ae | |||
0190eecc00 | |||
ca872a4c0b | |||
f2e87ef4fa | |||
0ad97bbc05 | |||
c4864091a1 | |||
9a98a466b3 | |||
f99e0f1ed6 | |||
d323bcb152 | |||
da6a795fdb | |||
c5edcde21f | |||
15ff3c831e | |||
100959a6d9 | |||
0a120f74b2 | |||
8f05351984 | |||
4eb92208a3 | |||
71791f414c | |||
f3682997d7 | |||
cc13cc0251 | |||
86bd5f2ca9 | |||
8694c60000 | |||
9d1538182f | |||
5904088811 | |||
69545c2aff | |||
495da337ae | |||
34b3afc7be | |||
00373a4c5d | |||
cb7dfeeac4 | |||
efd6c574a2 | |||
4113e6ab56 | |||
9a942a4671 | |||
9906d397a0 | |||
ae8f787141 | |||
a81b4d5c8f | |||
887c6acdf2 | |||
83aa529330 | |||
96b31b6533 | |||
fccd377198 | |||
73c566695f | |||
63b7b7224a | |||
ce80c8b8ee | |||
749febf4d1 | |||
bdde425cbe | |||
746f491f82 | |||
1672647ade | |||
90b6bbc38c | |||
ce02ed60f2 | |||
1e5b9a95fd | |||
1d699755e0 | |||
ddf49c6344 | |||
ba3881dffd | |||
d1c252048b | |||
eab2724138 | |||
21ea3e06c9 | |||
52d703d3d1 | |||
ce152341a1 | |||
f058e34011 | |||
b5349e8721 | |||
7150858d49 | |||
91c7271aab | |||
aa13b2dffd | |||
fc2ef392be | |||
463a908705 | |||
d24ffe1cfa | |||
78fb87b283 | |||
ab2d524780 | |||
85d61685f1 | |||
b9643eed7c | |||
feee2ecfa9 | |||
a25a5cfeec | |||
0e145dd541 | |||
9f9be844fc | |||
e3b9ab5e18 | |||
c66d2baa9c | |||
08bc37cdd0 | |||
9771cceb2c | |||
ca715127a2 | |||
ea7a7af1d4 | |||
880e1c529d | |||
dcbb45803f | |||
80b9bbce86 | |||
d37936386f | |||
c3a3028f9f | |||
6c5ad80cdc | |||
b5bdc2699a | |||
384b98cd8f | |||
eb9b5bffef | |||
0bd59f3723 | |||
8b8cbd8f6d | |||
72b18c5d34 | |||
eb0a839866 | |||
1777d5a952 | |||
d4b7da84c3 | |||
801dbbdffd | |||
0ed05a1d2d | |||
1008bebade | |||
ae84f879d7 | |||
be6dfd1b49 | |||
231516b6c9 | |||
fb53d58dcf | |||
2a9e9b210b | |||
897d6cc43a | |||
f470c6c812 | |||
566d4e0425 | |||
81be02d2f9 | |||
c2b6a482d5 | |||
12c167c881 | |||
20aafee7fa | |||
be07375b66 | |||
dd5bcdc4c9 | |||
6161d17579 | |||
4ac5306ae7 | |||
b1a80ec1a9 | |||
5137ebac0b | |||
a8eeb0597b | |||
4ed3e51080 | |||
38fcd4597a | |||
38db46794f | |||
a9a3876d55 | |||
1f343eaabb | |||
0a43ddf320 | |||
31366066bd |
@ -92,12 +92,14 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||||
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
|
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
|
||||||
when it is present in output filename template or
|
when it is present in output filename template or
|
||||||
--autonumber option is given
|
--auto-number option is given
|
||||||
--restrict-filenames Restrict filenames to only ASCII characters, and
|
--restrict-filenames Restrict filenames to only ASCII characters, and
|
||||||
avoid "&" and spaces in filenames
|
avoid "&" and spaces in filenames
|
||||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||||
-w, --no-overwrites do not overwrite files
|
-w, --no-overwrites do not overwrite files
|
||||||
-c, --continue resume partially downloaded files
|
-c, --continue force resume of partially downloaded files. By
|
||||||
|
default, youtube-dl will resume downloads if
|
||||||
|
possible.
|
||||||
--no-continue do not resume partially downloaded files (restart
|
--no-continue do not resume partially downloaded files (restart
|
||||||
from beginning)
|
from beginning)
|
||||||
--cookies FILE file to read cookies from and dump cookie jar in
|
--cookies FILE file to read cookies from and dump cookie jar in
|
||||||
@ -121,6 +123,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--get-description simulate, quiet but print video description
|
--get-description simulate, quiet but print video description
|
||||||
--get-filename simulate, quiet but print output filename
|
--get-filename simulate, quiet but print output filename
|
||||||
--get-format simulate, quiet but print output format
|
--get-format simulate, quiet but print output format
|
||||||
|
-j, --dump-json simulate, quiet but print JSON information
|
||||||
--newline output progress bar as new lines
|
--newline output progress bar as new lines
|
||||||
--no-progress do not print progress bar
|
--no-progress do not print progress bar
|
||||||
--console-title display progress in console titlebar
|
--console-title display progress in console titlebar
|
||||||
|
2
setup.py
2
setup.py
@ -48,7 +48,7 @@ else:
|
|||||||
'data_files': [ # Installing system-wide would require sudo...
|
'data_files': [ # Installing system-wide would require sudo...
|
||||||
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
||||||
('share/doc/youtube_dl', ['README.txt']),
|
('share/doc/youtube_dl', ['README.txt']),
|
||||||
('share/man/man1/', ['youtube-dl.1'])
|
('share/man/man1', ['youtube-dl.1'])
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
if setuptools_available:
|
if setuptools_available:
|
||||||
|
@ -24,7 +24,7 @@ def _download_restricted(url, filename, age):
|
|||||||
}
|
}
|
||||||
ydl = YoutubeDL(params)
|
ydl = YoutubeDL(params)
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
json_filename = filename + '.info.json'
|
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
||||||
try_rm(json_filename)
|
try_rm(json_filename)
|
||||||
ydl.download([url])
|
ydl.download([url])
|
||||||
res = os.path.exists(json_filename)
|
res = os.path.exists(json_filename)
|
||||||
|
@ -1,70 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
# Allow direct execution
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import unittest
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup, md5
|
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import DailymotionIE
|
|
||||||
|
|
||||||
class TestDailymotionSubtitles(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
self.DL = FakeYDL()
|
|
||||||
self.url = 'http://www.dailymotion.com/video/xczg00'
|
|
||||||
def getInfoDict(self):
|
|
||||||
IE = DailymotionIE(self.DL)
|
|
||||||
info_dict = IE.extract(self.url)
|
|
||||||
return info_dict
|
|
||||||
def getSubtitles(self):
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
return info_dict['subtitles']
|
|
||||||
def test_no_writesubtitles(self):
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
def test_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
|
||||||
def test_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['fr']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
|
||||||
def test_allsubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(len(subtitles.keys()), 5)
|
|
||||||
def test_list_subtitles(self):
|
|
||||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
def test_automatic_captions(self):
|
|
||||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslang'] = ['en']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(len(subtitles.keys()) == 0)
|
|
||||||
def test_nosubtitles(self):
|
|
||||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
|
||||||
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(len(subtitles), 0)
|
|
||||||
def test_multiple_langs(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['es', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
@ -103,7 +103,7 @@ def generator(test_case):
|
|||||||
tc_filename = get_tc_filename(tc)
|
tc_filename = get_tc_filename(tc)
|
||||||
try_rm(tc_filename)
|
try_rm(tc_filename)
|
||||||
try_rm(tc_filename + '.part')
|
try_rm(tc_filename + '.part')
|
||||||
try_rm(tc_filename + '.info.json')
|
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
try:
|
try:
|
||||||
try_num = 1
|
try_num = 1
|
||||||
@ -130,11 +130,12 @@ def generator(test_case):
|
|||||||
if not test_case.get('params', {}).get('skip_download', False):
|
if not test_case.get('params', {}).get('skip_download', False):
|
||||||
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||||
self.assertTrue(tc_filename in finished_hook_called)
|
self.assertTrue(tc_filename in finished_hook_called)
|
||||||
self.assertTrue(os.path.exists(tc_filename + '.info.json'))
|
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||||
|
self.assertTrue(os.path.exists(info_json_fn))
|
||||||
if 'md5' in tc:
|
if 'md5' in tc:
|
||||||
md5_for_file = _file_md5(tc_filename)
|
md5_for_file = _file_md5(tc_filename)
|
||||||
self.assertEqual(md5_for_file, tc['md5'])
|
self.assertEqual(md5_for_file, tc['md5'])
|
||||||
with io.open(tc_filename + '.info.json', encoding='utf-8') as infof:
|
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
|
@ -17,10 +17,12 @@ from youtube_dl.extractor import (
|
|||||||
DailymotionUserIE,
|
DailymotionUserIE,
|
||||||
VimeoChannelIE,
|
VimeoChannelIE,
|
||||||
UstreamChannelIE,
|
UstreamChannelIE,
|
||||||
|
SoundcloudSetIE,
|
||||||
SoundcloudUserIE,
|
SoundcloudUserIE,
|
||||||
LivestreamIE,
|
LivestreamIE,
|
||||||
NHLVideocenterIE,
|
NHLVideocenterIE,
|
||||||
BambuserChannelIE,
|
BambuserChannelIE,
|
||||||
|
BandcampAlbumIE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -61,6 +63,14 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['id'], u'5124905')
|
self.assertEqual(result['id'], u'5124905')
|
||||||
self.assertTrue(len(result['entries']) >= 11)
|
self.assertTrue(len(result['entries']) >= 11)
|
||||||
|
|
||||||
|
def test_soundcloud_set(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = SoundcloudSetIE(dl)
|
||||||
|
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'The Royal Concept EP')
|
||||||
|
self.assertTrue(len(result['entries']) >= 6)
|
||||||
|
|
||||||
def test_soundcloud_user(self):
|
def test_soundcloud_user(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = SoundcloudUserIE(dl)
|
ie = SoundcloudUserIE(dl)
|
||||||
@ -92,7 +102,15 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
result = ie.extract('http://bambuser.com/channel/pixelversity')
|
result = ie.extract('http://bambuser.com/channel/pixelversity')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], u'pixelversity')
|
self.assertEqual(result['title'], u'pixelversity')
|
||||||
self.assertTrue(len(result['entries']) >= 66)
|
self.assertTrue(len(result['entries']) >= 60)
|
||||||
|
|
||||||
|
def test_bandcamp_album(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = BandcampAlbumIE(dl)
|
||||||
|
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'Nightmare Night EP')
|
||||||
|
self.assertTrue(len(result['entries']) >= 4)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
211
test/test_subtitles.py
Normal file
211
test/test_subtitles.py
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import FakeYDL, global_setup, md5
|
||||||
|
global_setup()
|
||||||
|
|
||||||
|
|
||||||
|
from youtube_dl.extractor import (
|
||||||
|
YoutubeIE,
|
||||||
|
DailymotionIE,
|
||||||
|
TEDIE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTestSubtitles(unittest.TestCase):
|
||||||
|
url = None
|
||||||
|
IE = None
|
||||||
|
def setUp(self):
|
||||||
|
self.DL = FakeYDL()
|
||||||
|
self.ie = self.IE(self.DL)
|
||||||
|
|
||||||
|
def getInfoDict(self):
|
||||||
|
info_dict = self.ie.extract(self.url)
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
def getSubtitles(self):
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
return info_dict['subtitles']
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'QRS8MkLhQmM'
|
||||||
|
IE = YoutubeIE
|
||||||
|
|
||||||
|
def getSubtitles(self):
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
return info_dict[0]['subtitles']
|
||||||
|
|
||||||
|
def test_youtube_no_writesubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = False
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_youtube_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||||
|
|
||||||
|
def test_youtube_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['it']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||||
|
|
||||||
|
def test_youtube_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles.keys()), 13)
|
||||||
|
|
||||||
|
def test_youtube_subtitles_sbv_format(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitlesformat'] = 'sbv'
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||||
|
|
||||||
|
def test_youtube_subtitles_vtt_format(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitlesformat'] = 'vtt'
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
||||||
|
|
||||||
|
def test_youtube_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_youtube_automatic_captions(self):
|
||||||
|
self.url = '8YoUxe5ncPo'
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['it']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(subtitles['it'] is not None)
|
||||||
|
|
||||||
|
def test_youtube_nosubtitles(self):
|
||||||
|
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||||
|
self.url = 'sAjKT8FhjI8'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
def test_youtube_multiple_langs(self):
|
||||||
|
self.url = 'QRS8MkLhQmM'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['it', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.dailymotion.com/video/xczg00'
|
||||||
|
IE = DailymotionIE
|
||||||
|
|
||||||
|
def test_no_writesubtitles(self):
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||||
|
|
||||||
|
def test_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['fr']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles.keys()), 5)
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_automatic_captions(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslang'] = ['en']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(len(subtitles.keys()) == 0)
|
||||||
|
|
||||||
|
def test_nosubtitles(self):
|
||||||
|
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||||
|
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
def test_multiple_langs(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['es', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTedSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||||
|
IE = TEDIE
|
||||||
|
|
||||||
|
def test_no_writesubtitles(self):
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
|
||||||
|
|
||||||
|
def test_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['fr']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles.keys()), 28)
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_automatic_captions(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslang'] = ['en']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(len(subtitles.keys()) == 0)
|
||||||
|
|
||||||
|
def test_multiple_langs(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['es', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -24,6 +24,8 @@ from youtube_dl.utils import (
|
|||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
shell_quote,
|
||||||
|
encodeFilename,
|
||||||
)
|
)
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
@ -170,6 +172,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(res_url, url)
|
self.assertEqual(res_url, url)
|
||||||
self.assertEqual(res_data, None)
|
self.assertEqual(res_data, None)
|
||||||
|
|
||||||
|
def test_shell_quote(self):
|
||||||
|
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
|
||||||
|
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -31,7 +31,7 @@ params = get_params({
|
|||||||
|
|
||||||
|
|
||||||
TEST_ID = 'BaW_jenozKc'
|
TEST_ID = 'BaW_jenozKc'
|
||||||
INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
|
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||||
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
def test_youtube_playlist(self):
|
def test_youtube_playlist(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], 'ytdl test PL')
|
self.assertEqual(result['title'], 'ytdl test PL')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||||
@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
def test_issue_673(self):
|
def test_issue_673(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('PLBB231211A4F62143')[0]
|
result = ie.extract('PLBB231211A4F62143')
|
||||||
self.assertTrue(len(result['entries']) > 25)
|
self.assertTrue(len(result['entries']) > 25)
|
||||||
|
|
||||||
def test_youtube_playlist_long(self):
|
def test_youtube_playlist_long(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertTrue(len(result['entries']) >= 799)
|
self.assertTrue(len(result['entries']) >= 799)
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
#651
|
#651
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||||
@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
def test_youtube_playlist_empty(self):
|
def test_youtube_playlist_empty(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(len(result['entries']), 0)
|
self.assertEqual(len(result['entries']), 0)
|
||||||
|
|
||||||
@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
# TODO find a > 100 (paginating?) videos course
|
# TODO find a > 100 (paginating?) videos course
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
|
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
@ -84,22 +84,22 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeChannelIE(dl)
|
ie = YoutubeChannelIE(dl)
|
||||||
#test paginated channel
|
#test paginated channel
|
||||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
|
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
||||||
self.assertTrue(len(result['entries']) > 90)
|
self.assertTrue(len(result['entries']) > 90)
|
||||||
#test autogenerated channel
|
#test autogenerated channel
|
||||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
|
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
self.assertTrue(len(result['entries']) >= 18)
|
self.assertTrue(len(result['entries']) >= 18)
|
||||||
|
|
||||||
def test_youtube_user(self):
|
def test_youtube_user(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeUserIE(dl)
|
ie = YoutubeUserIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
|
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||||
self.assertTrue(len(result['entries']) >= 320)
|
self.assertTrue(len(result['entries']) >= 320)
|
||||||
|
|
||||||
def test_youtube_safe_search(self):
|
def test_youtube_safe_search(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
|
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
|
||||||
self.assertEqual(len(result['entries']), 2)
|
self.assertEqual(len(result['entries']), 2)
|
||||||
|
|
||||||
def test_youtube_show(self):
|
def test_youtube_show(self):
|
||||||
|
@ -1,95 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
# Allow direct execution
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import unittest
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup, md5
|
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import YoutubeIE
|
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeSubtitles(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
self.DL = FakeYDL()
|
|
||||||
self.url = 'QRS8MkLhQmM'
|
|
||||||
|
|
||||||
def getInfoDict(self):
|
|
||||||
IE = YoutubeIE(self.DL)
|
|
||||||
info_dict = IE.extract(self.url)
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
def getSubtitles(self):
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
return info_dict[0]['subtitles']
|
|
||||||
|
|
||||||
def test_youtube_no_writesubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = False
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_youtube_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
|
||||||
|
|
||||||
def test_youtube_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['it']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
|
||||||
|
|
||||||
def test_youtube_allsubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(len(subtitles.keys()), 13)
|
|
||||||
|
|
||||||
def test_youtube_subtitles_sbv_format(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitlesformat'] = 'sbv'
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
|
||||||
|
|
||||||
def test_youtube_subtitles_vtt_format(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitlesformat'] = 'vtt'
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
|
||||||
|
|
||||||
def test_youtube_list_subtitles(self):
|
|
||||||
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_youtube_automatic_captions(self):
|
|
||||||
self.url = '8YoUxe5ncPo'
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['it']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(subtitles['it'] is not None)
|
|
||||||
|
|
||||||
def test_youtube_nosubtitles(self):
|
|
||||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
|
||||||
self.url = 'sAjKT8FhjI8'
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(len(subtitles), 0)
|
|
||||||
|
|
||||||
def test_youtube_multiple_langs(self):
|
|
||||||
self.url = 'QRS8MkLhQmM'
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['it', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
@ -5,9 +5,6 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
if os.name == 'nt':
|
|
||||||
import ctypes
|
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@ -151,16 +148,8 @@ class FileDownloader(object):
|
|||||||
def to_stderr(self, message):
|
def to_stderr(self, message):
|
||||||
self.ydl.to_screen(message)
|
self.ydl.to_screen(message)
|
||||||
|
|
||||||
def to_cons_title(self, message):
|
def to_console_title(self, message):
|
||||||
"""Set console/terminal window title to message."""
|
self.ydl.to_console_title(message)
|
||||||
if not self.params.get('consoletitle', False):
|
|
||||||
return
|
|
||||||
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
|
||||||
# c_wchar_p() might not be necessary if `message` is
|
|
||||||
# already of type unicode()
|
|
||||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
|
||||||
elif 'TERM' in os.environ:
|
|
||||||
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
|
|
||||||
|
|
||||||
def trouble(self, *args, **kargs):
|
def trouble(self, *args, **kargs):
|
||||||
self.ydl.trouble(*args, **kargs)
|
self.ydl.trouble(*args, **kargs)
|
||||||
@ -234,8 +223,14 @@ class FileDownloader(object):
|
|||||||
if self.params.get('noprogress', False):
|
if self.params.get('noprogress', False):
|
||||||
return
|
return
|
||||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||||
eta_str = self.format_eta(eta)
|
if eta is not None:
|
||||||
percent_str = self.format_percent(percent)
|
eta_str = self.format_eta(eta)
|
||||||
|
else:
|
||||||
|
eta_str = 'Unknown ETA'
|
||||||
|
if percent is not None:
|
||||||
|
percent_str = self.format_percent(percent)
|
||||||
|
else:
|
||||||
|
percent_str = 'Unknown %'
|
||||||
speed_str = self.format_speed(speed)
|
speed_str = self.format_speed(speed)
|
||||||
if self.params.get('progress_with_newline', False):
|
if self.params.get('progress_with_newline', False):
|
||||||
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
||||||
@ -243,7 +238,7 @@ class FileDownloader(object):
|
|||||||
else:
|
else:
|
||||||
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
|
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
|
||||||
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
||||||
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
||||||
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
|
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
|
||||||
|
|
||||||
def report_resuming_byte(self, resume_len):
|
def report_resuming_byte(self, resume_len):
|
||||||
@ -274,7 +269,7 @@ class FileDownloader(object):
|
|||||||
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
|
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
|
||||||
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
||||||
|
|
||||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
|
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
test = self.params.get('test', False)
|
test = self.params.get('test', False)
|
||||||
@ -301,6 +296,8 @@ class FileDownloader(object):
|
|||||||
basic_args += ['--tcUrl', url]
|
basic_args += ['--tcUrl', url]
|
||||||
if test:
|
if test:
|
||||||
basic_args += ['--stop', '1']
|
basic_args += ['--stop', '1']
|
||||||
|
if live:
|
||||||
|
basic_args += ['--live']
|
||||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||||
if self.params.get('verbose', False):
|
if self.params.get('verbose', False):
|
||||||
try:
|
try:
|
||||||
@ -373,15 +370,20 @@ class FileDownloader(object):
|
|||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
|
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||||
# Check for ffmpeg first
|
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||||
try:
|
|
||||||
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
|
|
||||||
return False
|
|
||||||
|
|
||||||
retval = subprocess.call(args)
|
for program in ['avconv', 'ffmpeg']:
|
||||||
|
try:
|
||||||
|
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||||
|
break
|
||||||
|
except (OSError, IOError):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||||
|
cmd = [program] + args
|
||||||
|
|
||||||
|
retval = subprocess.call(cmd)
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||||
@ -418,7 +420,8 @@ class FileDownloader(object):
|
|||||||
info_dict.get('player_url', None),
|
info_dict.get('player_url', None),
|
||||||
info_dict.get('page_url', None),
|
info_dict.get('page_url', None),
|
||||||
info_dict.get('play_path', None),
|
info_dict.get('play_path', None),
|
||||||
info_dict.get('tc_url', None))
|
info_dict.get('tc_url', None),
|
||||||
|
info_dict.get('rtmp_live', False))
|
||||||
|
|
||||||
# Attempt to download using mplayer
|
# Attempt to download using mplayer
|
||||||
if url.startswith('mms') or url.startswith('rtsp'):
|
if url.startswith('mms') or url.startswith('rtsp'):
|
||||||
@ -557,12 +560,11 @@ class FileDownloader(object):
|
|||||||
# Progress message
|
# Progress message
|
||||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||||
if data_len is None:
|
if data_len is None:
|
||||||
self.report_progress('Unknown %', data_len_str, speed, 'Unknown ETA')
|
eta = percent = None
|
||||||
eta = None
|
|
||||||
else:
|
else:
|
||||||
percent = self.calc_percent(byte_counter, data_len)
|
percent = self.calc_percent(byte_counter, data_len)
|
||||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
self.report_progress(percent, data_len_str, speed, eta)
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
|
@ -501,7 +501,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
|
|
||||||
options = ['-c', 'copy']
|
options = ['-c', 'copy']
|
||||||
for (name, value) in metadata.items():
|
for (name, value) in metadata.items():
|
||||||
options.extend(['-metadata', '%s="%s"' % (name, value)])
|
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||||
options.extend(['-f', ext])
|
options.extend(['-f', ext])
|
||||||
|
|
||||||
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
|
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||||
|
@ -5,6 +5,7 @@ from __future__ import absolute_import
|
|||||||
|
|
||||||
import errno
|
import errno
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
@ -13,7 +14,34 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from .utils import *
|
if os.name == 'nt':
|
||||||
|
import ctypes
|
||||||
|
|
||||||
|
from .utils import (
|
||||||
|
compat_http_client,
|
||||||
|
compat_print,
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_error,
|
||||||
|
compat_urllib_request,
|
||||||
|
ContentTooShortError,
|
||||||
|
date_from_str,
|
||||||
|
DateRange,
|
||||||
|
determine_ext,
|
||||||
|
DownloadError,
|
||||||
|
encodeFilename,
|
||||||
|
ExtractorError,
|
||||||
|
locked_file,
|
||||||
|
MaxDownloadsReached,
|
||||||
|
PostProcessingError,
|
||||||
|
preferredencoding,
|
||||||
|
SameFileError,
|
||||||
|
sanitize_filename,
|
||||||
|
subtitles_filename,
|
||||||
|
takewhile_inclusive,
|
||||||
|
UnavailableVideoError,
|
||||||
|
write_json_file,
|
||||||
|
write_string,
|
||||||
|
)
|
||||||
from .extractor import get_info_extractor, gen_extractors
|
from .extractor import get_info_extractor, gen_extractors
|
||||||
from .FileDownloader import FileDownloader
|
from .FileDownloader import FileDownloader
|
||||||
|
|
||||||
@ -57,6 +85,7 @@ class YoutubeDL(object):
|
|||||||
forcethumbnail: Force printing thumbnail URL.
|
forcethumbnail: Force printing thumbnail URL.
|
||||||
forcedescription: Force printing description.
|
forcedescription: Force printing description.
|
||||||
forcefilename: Force printing final filename.
|
forcefilename: Force printing final filename.
|
||||||
|
forcejson: Force printing info_dict as JSON.
|
||||||
simulate: Do not download the video files.
|
simulate: Do not download the video files.
|
||||||
format: Video format code.
|
format: Video format code.
|
||||||
format_limit: Highest quality format to try.
|
format_limit: Highest quality format to try.
|
||||||
@ -176,6 +205,37 @@ class YoutubeDL(object):
|
|||||||
output = output.encode(preferredencoding())
|
output = output.encode(preferredencoding())
|
||||||
sys.stderr.write(output)
|
sys.stderr.write(output)
|
||||||
|
|
||||||
|
def to_console_title(self, message):
|
||||||
|
if not self.params.get('consoletitle', False):
|
||||||
|
return
|
||||||
|
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||||
|
# c_wchar_p() might not be necessary if `message` is
|
||||||
|
# already of type unicode()
|
||||||
|
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||||
|
elif 'TERM' in os.environ:
|
||||||
|
write_string(u'\033]0;%s\007' % message, self._screen_file)
|
||||||
|
|
||||||
|
def save_console_title(self):
|
||||||
|
if not self.params.get('consoletitle', False):
|
||||||
|
return
|
||||||
|
if 'TERM' in os.environ:
|
||||||
|
# Save the title on stack
|
||||||
|
write_string(u'\033[22;0t', self._screen_file)
|
||||||
|
|
||||||
|
def restore_console_title(self):
|
||||||
|
if not self.params.get('consoletitle', False):
|
||||||
|
return
|
||||||
|
if 'TERM' in os.environ:
|
||||||
|
# Restore the title from stack
|
||||||
|
write_string(u'\033[23;0t', self._screen_file)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.save_console_title()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
self.restore_console_title()
|
||||||
|
|
||||||
def fixed_template(self):
|
def fixed_template(self):
|
||||||
"""Checks if the output template is fixed."""
|
"""Checks if the output template is fixed."""
|
||||||
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
||||||
@ -254,7 +314,7 @@ class YoutubeDL(object):
|
|||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
try:
|
try:
|
||||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||||
except (UnicodeEncodeError) as err:
|
except UnicodeEncodeError:
|
||||||
self.to_screen(u'[download] The file has already been downloaded')
|
self.to_screen(u'[download] The file has already been downloaded')
|
||||||
|
|
||||||
def increment_downloads(self):
|
def increment_downloads(self):
|
||||||
@ -295,15 +355,17 @@ class YoutubeDL(object):
|
|||||||
def _match_entry(self, info_dict):
|
def _match_entry(self, info_dict):
|
||||||
""" Returns None iff the file should be downloaded """
|
""" Returns None iff the file should be downloaded """
|
||||||
|
|
||||||
title = info_dict['title']
|
if 'title' in info_dict:
|
||||||
matchtitle = self.params.get('matchtitle', False)
|
# This can happen when we're just evaluating the playlist
|
||||||
if matchtitle:
|
title = info_dict['title']
|
||||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
matchtitle = self.params.get('matchtitle', False)
|
||||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
if matchtitle:
|
||||||
rejecttitle = self.params.get('rejecttitle', False)
|
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||||
if rejecttitle:
|
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
rejecttitle = self.params.get('rejecttitle', False)
|
||||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
if rejecttitle:
|
||||||
|
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||||
|
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||||
date = info_dict.get('upload_date', None)
|
date = info_dict.get('upload_date', None)
|
||||||
if date is not None:
|
if date is not None:
|
||||||
dateRange = self.params.get('daterange', DateRange())
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
@ -314,8 +376,8 @@ class YoutubeDL(object):
|
|||||||
if age_limit < info_dict.get('age_limit', 0):
|
if age_limit < info_dict.get('age_limit', 0):
|
||||||
return u'Skipping "' + title + '" because it is age restricted'
|
return u'Skipping "' + title + '" because it is age restricted'
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return (u'%(title)s has already been recorded in archive'
|
return (u'%s has already been recorded in archive'
|
||||||
% info_dict)
|
% info_dict.get('title', info_dict.get('id', u'video')))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -385,7 +447,7 @@ class YoutubeDL(object):
|
|||||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
||||||
if result_type == 'video':
|
if result_type == 'video':
|
||||||
self.add_extra_info(ie_result, extra_info)
|
self.add_extra_info(ie_result, extra_info)
|
||||||
return self.process_video_result(ie_result)
|
return self.process_video_result(ie_result, download=download)
|
||||||
elif result_type == 'url':
|
elif result_type == 'url':
|
||||||
# We have to add extra_info to the results because it may be
|
# We have to add extra_info to the results because it may be
|
||||||
# contained in a playlist
|
# contained in a playlist
|
||||||
@ -394,7 +456,7 @@ class YoutubeDL(object):
|
|||||||
ie_key=ie_result.get('ie_key'),
|
ie_key=ie_result.get('ie_key'),
|
||||||
extra_info=extra_info)
|
extra_info=extra_info)
|
||||||
elif result_type == 'playlist':
|
elif result_type == 'playlist':
|
||||||
self.add_extra_info(ie_result, extra_info)
|
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||||
@ -424,6 +486,12 @@ class YoutubeDL(object):
|
|||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reason = self._match_entry(entry)
|
||||||
|
if reason is not None:
|
||||||
|
self.to_screen(u'[download] ' + reason)
|
||||||
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
entry_result = self.process_ie_result(entry,
|
||||||
download=download,
|
download=download,
|
||||||
extra_info=extra)
|
extra_info=extra)
|
||||||
@ -579,7 +647,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
# Forced printings
|
# Forced printings
|
||||||
if self.params.get('forcetitle', False):
|
if self.params.get('forcetitle', False):
|
||||||
compat_print(info_dict['title'])
|
compat_print(info_dict['fulltitle'])
|
||||||
if self.params.get('forceid', False):
|
if self.params.get('forceid', False):
|
||||||
compat_print(info_dict['id'])
|
compat_print(info_dict['id'])
|
||||||
if self.params.get('forceurl', False):
|
if self.params.get('forceurl', False):
|
||||||
@ -593,6 +661,8 @@ class YoutubeDL(object):
|
|||||||
compat_print(filename)
|
compat_print(filename)
|
||||||
if self.params.get('forceformat', False):
|
if self.params.get('forceformat', False):
|
||||||
compat_print(info_dict['format'])
|
compat_print(info_dict['format'])
|
||||||
|
if self.params.get('forcejson', False):
|
||||||
|
compat_print(json.dumps(info_dict))
|
||||||
|
|
||||||
# Do nothing else if in simulate mode
|
# Do nothing else if in simulate mode
|
||||||
if self.params.get('simulate', False):
|
if self.params.get('simulate', False):
|
||||||
@ -640,7 +710,7 @@ class YoutubeDL(object):
|
|||||||
# subtitles download errors are already managed as troubles in relevant IE
|
# subtitles download errors are already managed as troubles in relevant IE
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
subtitles = info_dict['subtitles']
|
subtitles = info_dict['subtitles']
|
||||||
sub_format = self.params.get('subtitlesformat')
|
sub_format = self.params.get('subtitlesformat', 'srt')
|
||||||
for sub_lang in subtitles.keys():
|
for sub_lang in subtitles.keys():
|
||||||
sub = subtitles[sub_lang]
|
sub = subtitles[sub_lang]
|
||||||
if sub is None:
|
if sub is None:
|
||||||
@ -655,7 +725,7 @@ class YoutubeDL(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
infofn = filename + u'.info.json'
|
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
||||||
self.report_writeinfojson(infofn)
|
self.report_writeinfojson(infofn)
|
||||||
try:
|
try:
|
||||||
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
||||||
@ -748,7 +818,16 @@ class YoutubeDL(object):
|
|||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return False
|
return False
|
||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
extractor = info_dict.get('extractor_id')
|
||||||
|
if extractor is None:
|
||||||
|
if 'id' in info_dict:
|
||||||
|
extractor = info_dict.get('ie_key') # key in a playlist
|
||||||
|
if extractor is None:
|
||||||
|
return False # Incomplete video information
|
||||||
|
# Future-proof against any change in case
|
||||||
|
# and backwards compatibility with prior versions
|
||||||
|
extractor = extractor.lower()
|
||||||
|
vid_id = extractor + u' ' + info_dict['id']
|
||||||
try:
|
try:
|
||||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||||
for line in archive_file:
|
for line in archive_file:
|
||||||
@ -781,20 +860,42 @@ class YoutubeDL(object):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def list_formats(self, info_dict):
|
def list_formats(self, info_dict):
|
||||||
|
def format_note(fdict):
|
||||||
|
if fdict.get('format_note') is not None:
|
||||||
|
return fdict['format_note']
|
||||||
|
res = u''
|
||||||
|
if fdict.get('vcodec') is not None:
|
||||||
|
res += u'%-5s' % fdict['vcodec']
|
||||||
|
elif fdict.get('vbr') is not None:
|
||||||
|
res += u'video'
|
||||||
|
if fdict.get('vbr') is not None:
|
||||||
|
res += u'@%4dk' % fdict['vbr']
|
||||||
|
if fdict.get('acodec') is not None:
|
||||||
|
if res:
|
||||||
|
res += u', '
|
||||||
|
res += u'%-5s' % fdict['acodec']
|
||||||
|
elif fdict.get('abr') is not None:
|
||||||
|
if res:
|
||||||
|
res += u', '
|
||||||
|
res += 'audio'
|
||||||
|
if fdict.get('abr') is not None:
|
||||||
|
res += u'@%3dk' % fdict['abr']
|
||||||
|
return res
|
||||||
|
|
||||||
def line(format):
|
def line(format):
|
||||||
return (u'%-15s%-10s%-12s%s' % (
|
return (u'%-20s%-10s%-12s%s' % (
|
||||||
format['format_id'],
|
format['format_id'],
|
||||||
format['ext'],
|
format['ext'],
|
||||||
self.format_resolution(format),
|
self.format_resolution(format),
|
||||||
format.get('format_note', ''),
|
format_note(format),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
formats = info_dict.get('formats', [info_dict])
|
formats = info_dict.get('formats', [info_dict])
|
||||||
formats_s = list(map(line, formats))
|
formats_s = list(map(line, formats))
|
||||||
if len(formats) > 1:
|
if len(formats) > 1:
|
||||||
formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
|
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
||||||
formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
|
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
||||||
|
|
||||||
header_line = line({
|
header_line = line({
|
||||||
'format_id': u'format code', 'ext': u'extension',
|
'format_id': u'format code', 'ext': u'extension',
|
||||||
|
@ -32,6 +32,9 @@ __authors__ = (
|
|||||||
'Ismael Mejía',
|
'Ismael Mejía',
|
||||||
'Steffan \'Ruirize\' James',
|
'Steffan \'Ruirize\' James',
|
||||||
'Andras Elso',
|
'Andras Elso',
|
||||||
|
'Jelle van der Waa',
|
||||||
|
'Marcin Cieślak',
|
||||||
|
'Anton Larionov',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
@ -304,6 +307,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
verbosity.add_option('--get-format',
|
verbosity.add_option('--get-format',
|
||||||
action='store_true', dest='getformat',
|
action='store_true', dest='getformat',
|
||||||
help='simulate, quiet but print output format', default=False)
|
help='simulate, quiet but print output format', default=False)
|
||||||
|
verbosity.add_option('-j', '--dump-json',
|
||||||
|
action='store_true', dest='dumpjson',
|
||||||
|
help='simulate, quiet but print JSON information', default=False)
|
||||||
verbosity.add_option('--newline',
|
verbosity.add_option('--newline',
|
||||||
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
||||||
verbosity.add_option('--no-progress',
|
verbosity.add_option('--no-progress',
|
||||||
@ -349,7 +355,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||||
filesystem.add_option('--autonumber-size',
|
filesystem.add_option('--autonumber-size',
|
||||||
dest='autonumber_size', metavar='NUMBER',
|
dest='autonumber_size', metavar='NUMBER',
|
||||||
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
|
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
|
||||||
filesystem.add_option('--restrict-filenames',
|
filesystem.add_option('--restrict-filenames',
|
||||||
action='store_true', dest='restrictfilenames',
|
action='store_true', dest='restrictfilenames',
|
||||||
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
||||||
@ -358,7 +364,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
filesystem.add_option('-w', '--no-overwrites',
|
filesystem.add_option('-w', '--no-overwrites',
|
||||||
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
||||||
filesystem.add_option('-c', '--continue',
|
filesystem.add_option('-c', '--continue',
|
||||||
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
|
action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
|
||||||
filesystem.add_option('--no-continue',
|
filesystem.add_option('--no-continue',
|
||||||
action='store_false', dest='continue_dl',
|
action='store_false', dest='continue_dl',
|
||||||
help='do not resume partially downloaded files (restart from beginning)')
|
help='do not resume partially downloaded files (restart from beginning)')
|
||||||
@ -601,13 +607,12 @@ def _real_main(argv=None):
|
|||||||
u' file! Use "%%(ext)s" instead of %r' %
|
u' file! Use "%%(ext)s" instead of %r' %
|
||||||
determine_ext(outtmpl, u''))
|
determine_ext(outtmpl, u''))
|
||||||
|
|
||||||
# YoutubeDL
|
ydl_opts = {
|
||||||
ydl = YoutubeDL({
|
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
'username': opts.username,
|
'username': opts.username,
|
||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
'videopassword': opts.videopassword,
|
'videopassword': opts.videopassword,
|
||||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
||||||
'forceurl': opts.geturl,
|
'forceurl': opts.geturl,
|
||||||
'forcetitle': opts.gettitle,
|
'forcetitle': opts.gettitle,
|
||||||
'forceid': opts.getid,
|
'forceid': opts.getid,
|
||||||
@ -615,8 +620,9 @@ def _real_main(argv=None):
|
|||||||
'forcedescription': opts.getdescription,
|
'forcedescription': opts.getdescription,
|
||||||
'forcefilename': opts.getfilename,
|
'forcefilename': opts.getfilename,
|
||||||
'forceformat': opts.getformat,
|
'forceformat': opts.getformat,
|
||||||
|
'forcejson': opts.dumpjson,
|
||||||
'simulate': opts.simulate,
|
'simulate': opts.simulate,
|
||||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
||||||
'format': opts.format,
|
'format': opts.format,
|
||||||
'format_limit': opts.format_limit,
|
'format_limit': opts.format_limit,
|
||||||
'listformats': opts.listformats,
|
'listformats': opts.listformats,
|
||||||
@ -665,61 +671,63 @@ def _real_main(argv=None):
|
|||||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
'age_limit': opts.age_limit,
|
'age_limit': opts.age_limit,
|
||||||
'download_archive': opts.download_archive,
|
'download_archive': opts.download_archive,
|
||||||
})
|
}
|
||||||
|
|
||||||
if opts.verbose:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
if opts.verbose:
|
||||||
try:
|
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||||
sp = subprocess.Popen(
|
|
||||||
['git', 'rev-parse', '--short', 'HEAD'],
|
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
||||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
out, err = sp.communicate()
|
|
||||||
out = out.decode().strip()
|
|
||||||
if re.match('[0-9a-f]+', out):
|
|
||||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
|
||||||
except:
|
|
||||||
try:
|
try:
|
||||||
sys.exc_clear()
|
sp = subprocess.Popen(
|
||||||
|
['git', 'rev-parse', '--short', 'HEAD'],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
|
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
out, err = sp.communicate()
|
||||||
|
out = out.decode().strip()
|
||||||
|
if re.match('[0-9a-f]+', out):
|
||||||
|
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||||
except:
|
except:
|
||||||
pass
|
try:
|
||||||
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
|
sys.exc_clear()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
write_string(u'[debug] Python version %s - %s' %
|
||||||
|
(platform.python_version(), platform_name()) + u'\n')
|
||||||
|
|
||||||
proxy_map = {}
|
proxy_map = {}
|
||||||
for handler in opener.handlers:
|
for handler in opener.handlers:
|
||||||
if hasattr(handler, 'proxies'):
|
if hasattr(handler, 'proxies'):
|
||||||
proxy_map.update(handler.proxies)
|
proxy_map.update(handler.proxies)
|
||||||
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
||||||
|
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
|
|
||||||
# PostProcessors
|
# PostProcessors
|
||||||
# Add the metadata pp first, the other pps will copy it
|
# Add the metadata pp first, the other pps will copy it
|
||||||
if opts.addmetadata:
|
if opts.addmetadata:
|
||||||
ydl.add_post_processor(FFmpegMetadataPP())
|
ydl.add_post_processor(FFmpegMetadataPP())
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||||
if opts.recodevideo:
|
if opts.recodevideo:
|
||||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||||
if opts.embedsubtitles:
|
if opts.embedsubtitles:
|
||||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
||||||
|
|
||||||
# Update version
|
# Update version
|
||||||
if opts.update_self:
|
if opts.update_self:
|
||||||
update_self(ydl.to_screen, opts.verbose)
|
update_self(ydl.to_screen, opts.verbose)
|
||||||
|
|
||||||
# Maybe do nothing
|
# Maybe do nothing
|
||||||
if len(all_urls) < 1:
|
if len(all_urls) < 1:
|
||||||
if not opts.update_self:
|
if not opts.update_self:
|
||||||
parser.error(u'you must provide at least one URL')
|
parser.error(u'you must provide at least one URL')
|
||||||
else:
|
else:
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
retcode = ydl.download(all_urls)
|
retcode = ydl.download(all_urls)
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
ydl.to_screen(u'--max-download limit reached, aborting.')
|
ydl.to_screen(u'--max-download limit reached, aborting.')
|
||||||
retcode = 101
|
retcode = 101
|
||||||
|
|
||||||
# Dump cookie jar if requested
|
# Dump cookie jar if requested
|
||||||
if opts.cookiefile is not None:
|
if opts.cookiefile is not None:
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from .appletrailers import AppleTrailersIE
|
from .appletrailers import AppleTrailersIE
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
|
from .anitube import AnitubeIE
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
from .ard import ARDIE
|
from .ard import ARDIE
|
||||||
from .arte import (
|
from .arte import (
|
||||||
@ -10,7 +11,7 @@ from .arte import (
|
|||||||
)
|
)
|
||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
@ -25,6 +26,7 @@ from .comedycentral import ComedyCentralIE
|
|||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
|
from .d8 import D8IE
|
||||||
from .dailymotion import (
|
from .dailymotion import (
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
@ -57,6 +59,7 @@ from .francetv import (
|
|||||||
)
|
)
|
||||||
from .freesound import FreesoundIE
|
from .freesound import FreesoundIE
|
||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
|
from .gamekings import GamekingsIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
@ -79,7 +82,7 @@ from .keezmovies import KeezMoviesIE
|
|||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE
|
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mit import TechTVMITIE, MITIE
|
from .mit import TechTVMITIE, MITIE
|
||||||
@ -115,25 +118,31 @@ from .slashdot import SlashdotIE
|
|||||||
from .slideshare import SlideshareIE
|
from .slideshare import SlideshareIE
|
||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
||||||
from .southparkstudios import SouthParkStudiosIE
|
from .southparkstudios import (
|
||||||
|
SouthParkStudiosIE,
|
||||||
|
SouthparkDeIE,
|
||||||
|
)
|
||||||
from .space import SpaceIE
|
from .space import SpaceIE
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE
|
from .spiegel import SpiegelIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .statigram import StatigramIE
|
from .statigram import StatigramIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
|
from .streamcloud import StreamcloudIE
|
||||||
from .sztvhu import SztvHuIE
|
from .sztvhu import SztvHuIE
|
||||||
from .teamcoco import TeamcocoIE
|
from .teamcoco import TeamcocoIE
|
||||||
from .techtalks import TechTalksIE
|
from .techtalks import TechTalksIE
|
||||||
from .ted import TEDIE
|
from .ted import TEDIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
|
from .toutv import TouTvIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tudou import TudouIE
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
|
from .tvp import TvpIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
|
55
youtube_dl/extractor/anitube.py
Normal file
55
youtube_dl/extractor/anitube.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class AnitubeIE(InfoExtractor):
|
||||||
|
IE_NAME = u'anitube.se'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.anitube.se/video/36621',
|
||||||
|
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
|
||||||
|
u'file': u'36621.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'36621',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Recorder to Randoseru 01',
|
||||||
|
},
|
||||||
|
u'skip': u'Blocked in the US',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||||
|
webpage, u'key')
|
||||||
|
|
||||||
|
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||||
|
key)
|
||||||
|
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
|
||||||
|
|
||||||
|
video_title = config_xml.find('title').text
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
video_url = config_xml.find('file')
|
||||||
|
if video_url is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'sd',
|
||||||
|
'url': video_url.text,
|
||||||
|
})
|
||||||
|
video_url = config_xml.find('filehd')
|
||||||
|
if video_url is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'hd',
|
||||||
|
'url': video_url.text,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'formats': formats
|
||||||
|
}
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
# There are different sources of video in arte.tv, the extraction process
|
||||||
@ -68,7 +69,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
lang = mobj.group('lang')
|
lang = mobj.group('lang')
|
||||||
return self._extract_liveweb(url, name, lang)
|
return self._extract_liveweb(url, name, lang)
|
||||||
|
|
||||||
if re.search(self._LIVE_URL, video_id) is not None:
|
if re.search(self._LIVE_URL, url) is not None:
|
||||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||||
# self.extractLiveStream(url)
|
# self.extractLiveStream(url)
|
||||||
# return
|
# return
|
||||||
@ -114,7 +115,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
event_doc = config_doc.find('event')
|
event_doc = config_doc.find('event')
|
||||||
url_node = event_doc.find('video').find('urlHd')
|
url_node = event_doc.find('video').find('urlHd')
|
||||||
if url_node is None:
|
if url_node is None:
|
||||||
url_node = video_doc.find('urlSd')
|
url_node = event_doc.find('urlSd')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
||||||
@ -181,20 +182,30 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
formats = all_formats
|
formats = all_formats
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'The formats list is empty')
|
raise ExtractorError(u'The formats list is empty')
|
||||||
# We order the formats by quality
|
|
||||||
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
||||||
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
def sort_key(f):
|
||||||
|
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
||||||
else:
|
else:
|
||||||
sort_key = lambda f: int(f.get('height',-1))
|
def sort_key(f):
|
||||||
|
return (
|
||||||
|
# Sort first by quality
|
||||||
|
int(f.get('height',-1)),
|
||||||
|
int(f.get('bitrate',-1)),
|
||||||
|
# The original version with subtitles has lower relevance
|
||||||
|
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
||||||
|
# The version with sourds/mal subtitles has also lower relevance
|
||||||
|
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
||||||
|
)
|
||||||
formats = sorted(formats, key=sort_key)
|
formats = sorted(formats, key=sort_key)
|
||||||
# Prefer videos without subtitles in the same language
|
|
||||||
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
|
|
||||||
# Pick the best quality
|
|
||||||
def _format(format_info):
|
def _format(format_info):
|
||||||
quality = format_info['quality']
|
quality = ''
|
||||||
m_quality = re.match(r'\w*? - (\d*)p', quality)
|
height = format_info.get('height')
|
||||||
if m_quality is not None:
|
if height is not None:
|
||||||
quality = m_quality.group(1)
|
quality = compat_str(height)
|
||||||
|
bitrate = format_info.get('bitrate')
|
||||||
|
if bitrate is not None:
|
||||||
|
quality += '-%d' % bitrate
|
||||||
if format_info.get('versionCode') is not None:
|
if format_info.get('versionCode') is not None:
|
||||||
format_id = u'%s-%s' % (quality, format_info['versionCode'])
|
format_id = u'%s-%s' % (quality, format_info['versionCode'])
|
||||||
else:
|
else:
|
||||||
@ -203,7 +214,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'format_note': format_info.get('versionLibelle'),
|
'format_note': format_info.get('versionLibelle'),
|
||||||
'width': format_info.get('width'),
|
'width': format_info.get('width'),
|
||||||
'height': format_info.get('height'),
|
'height': height,
|
||||||
}
|
}
|
||||||
if format_info['mediaType'] == u'rtmp':
|
if format_info['mediaType'] == u'rtmp':
|
||||||
info['url'] = format_info['streamer']
|
info['url'] = format_info['streamer']
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
import os.path
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_urlparse,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
class AUEngineIE(InfoExtractor):
|
class AUEngineIE(InfoExtractor):
|
||||||
@ -25,22 +25,25 @@ class AUEngineIE(InfoExtractor):
|
|||||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
||||||
webpage, u'title')
|
webpage, u'title')
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage)
|
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
||||||
links = [compat_urllib_parse.unquote(l) for l in links]
|
links = map(compat_urllib_parse.unquote, links)
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
video_url = None
|
||||||
for link in links:
|
for link in links:
|
||||||
root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path)
|
if link.endswith('.png'):
|
||||||
if pathext == '.png':
|
|
||||||
thumbnail = link
|
thumbnail = link
|
||||||
elif pathext == '.mp4':
|
elif '/videos/' in link:
|
||||||
url = link
|
video_url = link
|
||||||
ext = pathext
|
if not video_url:
|
||||||
|
raise ExtractorError(u'Could not find video URL')
|
||||||
|
ext = u'.' + determine_ext(video_url)
|
||||||
if ext == title[-len(ext):]:
|
if ext == title[-len(ext):]:
|
||||||
title = title[:-len(ext)]
|
title = title[:-len(ext)]
|
||||||
ext = ext[1:]
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': url,
|
'url': video_url,
|
||||||
'ext': ext,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}]
|
}
|
||||||
|
@ -15,7 +15,8 @@ class BambuserIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://bambuser.com/v/4050584',
|
u'url': u'http://bambuser.com/v/4050584',
|
||||||
u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
|
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
|
||||||
|
#u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'id': u'4050584',
|
u'id': u'4050584',
|
||||||
u'ext': u'flv',
|
u'ext': u'flv',
|
||||||
|
@ -3,13 +3,16 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
|
IE_NAME = u'Bandcamp'
|
||||||
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
u'file': u'1812978515.mp3',
|
u'file': u'1812978515.mp3',
|
||||||
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
|
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
|
||||||
@ -17,7 +20,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
||||||
},
|
},
|
||||||
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -26,6 +29,24 @@ class BandcampIE(InfoExtractor):
|
|||||||
# We get the link to the free download page
|
# We get the link to the free download page
|
||||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||||
if m_download is None:
|
if m_download is None:
|
||||||
|
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||||
|
if m_trackinfo:
|
||||||
|
json_code = m_trackinfo.group(1)
|
||||||
|
data = json.loads(json_code)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for d in data:
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'format_id',
|
||||||
|
'url': format_url,
|
||||||
|
'ext': format_id.partition('-')[0]
|
||||||
|
} for format_id, format_url in sorted(d['file'].items())]
|
||||||
|
return {
|
||||||
|
'id': compat_str(d['id']),
|
||||||
|
'title': d['title'],
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
else:
|
||||||
raise ExtractorError(u'No free songs found')
|
raise ExtractorError(u'No free songs found')
|
||||||
|
|
||||||
download_link = m_download.group(1)
|
download_link = m_download.group(1)
|
||||||
@ -61,3 +82,49 @@ class BandcampIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
return [track_info]
|
return [track_info]
|
||||||
|
|
||||||
|
|
||||||
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
|
IE_NAME = u'Bandcamp:album'
|
||||||
|
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
|
u'playlist': [
|
||||||
|
{
|
||||||
|
u'file': u'1353101989.mp3',
|
||||||
|
u'md5': u'39bc1eded3476e927c724321ddf116cf',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Intro',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'file': u'38097443.mp3',
|
||||||
|
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Kero One - Keep It Alive (Blazo remix)',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
u'params': {
|
||||||
|
u'playlistend': 2
|
||||||
|
},
|
||||||
|
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||||
|
if not tracks_paths:
|
||||||
|
raise ExtractorError(u'The page doesn\'t contain any track')
|
||||||
|
entries = [
|
||||||
|
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||||
|
for t_path in tracks_paths]
|
||||||
|
title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': title,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
@ -10,10 +10,12 @@ from ..utils import (
|
|||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BrightcoveIE(InfoExtractor):
|
class BrightcoveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
||||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||||
@ -80,6 +82,9 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
|
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
|
||||||
if videoPlayer is not None:
|
if videoPlayer is not None:
|
||||||
params['@videoPlayer'] = videoPlayer.attrib['value']
|
params['@videoPlayer'] = videoPlayer.attrib['value']
|
||||||
|
linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
|
||||||
|
if linkBase is not None:
|
||||||
|
params['linkBaseURL'] = linkBase.attrib['value']
|
||||||
data = compat_urllib_parse.urlencode(params)
|
data = compat_urllib_parse.urlencode(params)
|
||||||
return cls._FEDERATED_URL_TEMPLATE % data
|
return cls._FEDERATED_URL_TEMPLATE % data
|
||||||
|
|
||||||
@ -107,14 +112,18 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
|
|
||||||
videoPlayer = query.get('@videoPlayer')
|
videoPlayer = query.get('@videoPlayer')
|
||||||
if videoPlayer:
|
if videoPlayer:
|
||||||
return self._get_video_info(videoPlayer[0], query_str)
|
return self._get_video_info(videoPlayer[0], query_str, query)
|
||||||
else:
|
else:
|
||||||
player_key = query['playerKey']
|
player_key = query['playerKey']
|
||||||
return self._get_playlist_info(player_key[0])
|
return self._get_playlist_info(player_key[0])
|
||||||
|
|
||||||
def _get_video_info(self, video_id, query):
|
def _get_video_info(self, video_id, query_str, query):
|
||||||
request_url = self._FEDERATED_URL_TEMPLATE % query
|
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||||
webpage = self._download_webpage(request_url, video_id)
|
req = compat_urllib_request.Request(request_url)
|
||||||
|
linkBase = query.get('linkBaseURL')
|
||||||
|
if linkBase is not None:
|
||||||
|
req.add_header('Referer', linkBase[0])
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
||||||
|
@ -5,6 +5,7 @@ import xml.etree.ElementTree
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
class CanalplusIE(InfoExtractor):
|
class CanalplusIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||||
@ -25,7 +26,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.groupdict().get('id')
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
webpage = self._download_webpage(url, mobj.group('path'))
|
webpage = self._download_webpage(url, mobj.group('path'))
|
||||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||||
|
@ -65,6 +65,7 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': url,
|
'url': url,
|
||||||
'play_path': 'mp4:' + sd_file,
|
'play_path': 'mp4:' + sd_file,
|
||||||
|
'rtmp_live': True, # workaround
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format': 'sd',
|
'format': 'sd',
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
@ -72,6 +73,7 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': url,
|
'url': url,
|
||||||
'play_path': 'mp4:' + hd_file,
|
'play_path': 'mp4:' + hd_file,
|
||||||
|
'rtmp_live': True, # workaround
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format': 'hd',
|
'format': 'hd',
|
||||||
'format_id': 'hd',
|
'format_id': 'hd',
|
||||||
|
@ -6,7 +6,7 @@ from ..utils import determine_ext
|
|||||||
|
|
||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -71,10 +71,8 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||||
try:
|
try:
|
||||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
|
||||||
node_id = media_node.attrib['url']
|
|
||||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||||
except IndexError as err:
|
except IndexError:
|
||||||
raise ExtractorError(u'Invalid manifest file')
|
raise ExtractorError(u'Invalid manifest file')
|
||||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||||
|
@ -71,6 +71,10 @@ class InfoExtractor(object):
|
|||||||
("3D" or "DASH video")
|
("3D" or "DASH video")
|
||||||
* width Width of the video, if known
|
* width Width of the video, if known
|
||||||
* height Height of the video, if known
|
* height Height of the video, if known
|
||||||
|
* abr Average audio bitrate in KBit/s
|
||||||
|
* acodec Name of the audio codec in use
|
||||||
|
* vbr Average video bitrate in KBit/s
|
||||||
|
* vcodec Name of the video codec in use
|
||||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
@ -225,12 +229,14 @@ class InfoExtractor(object):
|
|||||||
self.to_screen(u'Logging in')
|
self.to_screen(u'Logging in')
|
||||||
|
|
||||||
#Methods for following #608
|
#Methods for following #608
|
||||||
def url_result(self, url, ie=None):
|
def url_result(self, url, ie=None, video_id=None):
|
||||||
"""Returns a url that points to a page that should be processed"""
|
"""Returns a url that points to a page that should be processed"""
|
||||||
#TODO: ie should be the class used for getting the info
|
#TODO: ie should be the class used for getting the info
|
||||||
video_info = {'_type': 'url',
|
video_info = {'_type': 'url',
|
||||||
'url': url,
|
'url': url,
|
||||||
'ie_key': ie}
|
'ie_key': ie}
|
||||||
|
if video_id is not None:
|
||||||
|
video_info['id'] = video_id
|
||||||
return video_info
|
return video_info
|
||||||
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
||||||
"""Returns a playlist"""
|
"""Returns a playlist"""
|
||||||
@ -315,13 +321,21 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regex(prop):
|
def _og_regexes(prop):
|
||||||
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
|
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
||||||
|
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||||
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
|
return [
|
||||||
|
template % (property_re, content_re),
|
||||||
|
template % (content_re, property_re),
|
||||||
|
]
|
||||||
|
|
||||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||||
if name is None:
|
if name is None:
|
||||||
name = 'OpenGraph %s' % prop
|
name = 'OpenGraph %s' % prop
|
||||||
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
|
escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
|
||||||
|
if escaped is None:
|
||||||
|
return None
|
||||||
return unescapeHTML(escaped)
|
return unescapeHTML(escaped)
|
||||||
|
|
||||||
def _og_search_thumbnail(self, html, **kargs):
|
def _og_search_thumbnail(self, html, **kargs):
|
||||||
@ -334,10 +348,21 @@ class InfoExtractor(object):
|
|||||||
return self._og_search_property('title', html, **kargs)
|
return self._og_search_property('title', html, **kargs)
|
||||||
|
|
||||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||||
regexes = [self._og_regex('video')]
|
regexes = self._og_regexes('video')
|
||||||
if secure: regexes.insert(0, self._og_regex('video:secure_url'))
|
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||||
return self._html_search_regex(regexes, html, name, **kargs)
|
return self._html_search_regex(regexes, html, name, **kargs)
|
||||||
|
|
||||||
|
def _html_search_meta(self, name, html, display_name=None):
|
||||||
|
if display_name is None:
|
||||||
|
display_name = name
|
||||||
|
return self._html_search_regex(
|
||||||
|
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
|
||||||
|
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||||
|
html, display_name, fatal=False)
|
||||||
|
|
||||||
|
def _dc_search_uploader(self, html):
|
||||||
|
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||||
|
|
||||||
def _rta_search(self, html):
|
def _rta_search(self, html):
|
||||||
# See http://www.rtalabel.org/index.php?content=howtofaq#single
|
# See http://www.rtalabel.org/index.php?content=howtofaq#single
|
||||||
if re.search(r'(?ix)<meta\s+name="rating"\s+'
|
if re.search(r'(?ix)<meta\s+name="rating"\s+'
|
||||||
@ -346,6 +371,23 @@ class InfoExtractor(object):
|
|||||||
return 18
|
return 18
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
def _media_rating_search(self, html):
|
||||||
|
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
|
||||||
|
rating = self._html_search_meta('rating', html)
|
||||||
|
|
||||||
|
if not rating:
|
||||||
|
return None
|
||||||
|
|
||||||
|
RATING_TABLE = {
|
||||||
|
'safe for kids': 0,
|
||||||
|
'general': 8,
|
||||||
|
'14 years': 14,
|
||||||
|
'mature': 17,
|
||||||
|
'restricted': 19,
|
||||||
|
}
|
||||||
|
return RATING_TABLE.get(rating.lower(), None)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
22
youtube_dl/extractor/d8.py
Normal file
22
youtube_dl/extractor/d8.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from .canalplus import CanalplusIE
|
||||||
|
|
||||||
|
|
||||||
|
class D8IE(CanalplusIE):
|
||||||
|
_VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
|
||||||
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
|
||||||
|
IE_NAME = u'd8.tv'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
|
||||||
|
u'file': u'966289.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Campagne intime - Documentaire exceptionnel',
|
||||||
|
u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
|
||||||
|
u'upload_date': u'20131108',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
@ -141,9 +141,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
raise ExtractorError(u'Unable to extract video URL')
|
raise ExtractorError(u'Unable to extract video URL')
|
||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id)
|
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
self._list_available_subtitles(video_id)
|
self._list_available_subtitles(video_id, webpage)
|
||||||
return
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -157,7 +157,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
sub_list = self._download_webpage(
|
sub_list = self._download_webpage(
|
||||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
||||||
@ -186,7 +186,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
webpage = self._download_webpage(request,
|
webpage = self._download_webpage(request,
|
||||||
id, u'Downloading page %s' % pagenum)
|
id, u'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
|
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
|
||||||
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
|
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import itertools
|
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
@ -33,5 +33,5 @@ class EitbIE(InfoExtractor):
|
|||||||
raise ExtractorError(u'Could not extract the Brightcove url')
|
raise ExtractorError(u'Could not extract the Brightcove url')
|
||||||
# The BrightcoveExperience object doesn't contain the video id, we set
|
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||||
# it manually
|
# it manually
|
||||||
bc_url += '&%40videoPlayer={}'.format(chapter_id)
|
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
||||||
return self.url_result(bc_url, BrightcoveIE.ie_key())
|
return self.url_result(bc_url, BrightcoveIE.ie_key())
|
||||||
|
@ -11,11 +11,11 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class EscapistIE(InfoExtractor):
|
class EscapistIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||||
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
||||||
u'md5': u'c6793dbda81388f4264c1ba18684a74d',
|
u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||||
u"uploader": u"the-escapist-presents",
|
u"uploader": u"the-escapist-presents",
|
||||||
@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
showName = mobj.group('showname')
|
showName = mobj.group('showname')
|
||||||
videoId = mobj.group('episode')
|
videoId = mobj.group('episode')
|
||||||
|
|
||||||
self.report_extraction(videoId)
|
self.report_extraction(videoId)
|
||||||
webpage = self._download_webpage(url, videoId)
|
webpage = self._download_webpage(url, videoId)
|
||||||
|
|
||||||
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
videoDesc = self._html_search_regex(
|
||||||
|
r'<meta name="description" content="([^"]*)"',
|
||||||
webpage, u'description', fatal=False)
|
webpage, u'description', fatal=False)
|
||||||
|
|
||||||
playerUrl = self._og_search_video_url(webpage, name='player url')
|
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
||||||
|
|
||||||
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
title = self._html_search_regex(
|
||||||
webpage, u'player url').split(' : ')[-1]
|
r'<meta name="title" content="([^"]*)"',
|
||||||
|
webpage, u'title').split(' : ')[-1]
|
||||||
|
|
||||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
|
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
|
||||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
configUrl = compat_urllib_parse.unquote(configUrl)
|
||||||
|
|
||||||
configJSON = self._download_webpage(configUrl, videoId,
|
formats = []
|
||||||
u'Downloading configuration',
|
|
||||||
u'unable to download configuration')
|
|
||||||
|
|
||||||
# Technically, it's JavaScript, not JSON
|
def _add_format(name, cfgurl):
|
||||||
configJSON = configJSON.replace("'", '"')
|
configJSON = self._download_webpage(
|
||||||
|
cfgurl, videoId,
|
||||||
|
u'Downloading ' + name + ' configuration',
|
||||||
|
u'Unable to download ' + name + ' configuration')
|
||||||
|
|
||||||
|
# Technically, it's JavaScript, not JSON
|
||||||
|
configJSON = configJSON.replace("'", '"')
|
||||||
|
|
||||||
|
try:
|
||||||
|
config = json.loads(configJSON)
|
||||||
|
except (ValueError,) as err:
|
||||||
|
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
||||||
|
playlist = config['playlist']
|
||||||
|
formats.append({
|
||||||
|
'url': playlist[1]['url'],
|
||||||
|
'format_id': name,
|
||||||
|
})
|
||||||
|
|
||||||
|
_add_format(u'normal', configUrl)
|
||||||
|
hq_url = (configUrl +
|
||||||
|
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
||||||
try:
|
try:
|
||||||
config = json.loads(configJSON)
|
_add_format(u'hq', hq_url)
|
||||||
except (ValueError,) as err:
|
except ExtractorError:
|
||||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
pass # That's fine, we'll just use normal quality
|
||||||
|
|
||||||
playlist = config['playlist']
|
return {
|
||||||
videoUrl = playlist[1]['url']
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': videoId,
|
'id': videoId,
|
||||||
'url': videoUrl,
|
'formats': formats,
|
||||||
'uploader': showName,
|
'uploader': showName,
|
||||||
'upload_date': None,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': 'mp4',
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': videoDesc,
|
'description': videoDesc,
|
||||||
'player_url': playerUrl,
|
'player_url': playerUrl,
|
||||||
}
|
}
|
||||||
|
|
||||||
return [info]
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import json
|
import json
|
||||||
import netrc
|
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor):
|
|||||||
for i, _ in enumerate(files, 1):
|
for i, _ in enumerate(files, 1):
|
||||||
video_id = '%04d%d' % (episode, i)
|
video_id = '%04d%d' % (episode, i)
|
||||||
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
||||||
video_title = 'Fernsehkritik %d.%d' % (episode, i)
|
|
||||||
videos.append({
|
videos.append({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
38
youtube_dl/extractor/gamekings.py
Normal file
38
youtube_dl/extractor/gamekings.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class GamekingsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
||||||
|
u'file': u'20130811.mp4',
|
||||||
|
# MD5 is flaky, seems to change regularly
|
||||||
|
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
|
||||||
|
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
name = mobj.group('name')
|
||||||
|
webpage = self._download_webpage(url, name)
|
||||||
|
video_url = self._og_search_video_url(webpage)
|
||||||
|
|
||||||
|
video = re.search(r'[0-9]+', video_url)
|
||||||
|
video_id = video.group(0)
|
||||||
|
|
||||||
|
# Todo: add medium format
|
||||||
|
video_url = video_url.replace(video_id, 'large/' + video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'url': video_url,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
@ -24,7 +24,7 @@ class GameSpotIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_id = video_id = mobj.group('page_id')
|
page_id = mobj.group('page_id')
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
|
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
|
||||||
data_video = json.loads(unescapeHTML(data_video_json))
|
data_video = json.loads(unescapeHTML(data_video_json))
|
||||||
|
@ -55,15 +55,17 @@ class GenericIE(InfoExtractor):
|
|||||||
u'skip': u'There is a limit of 200 free downloads / month for the test song',
|
u'skip': u'There is a limit of 200 free downloads / month for the test song',
|
||||||
},
|
},
|
||||||
# embedded brightcove video
|
# embedded brightcove video
|
||||||
|
# it also tests brightcove videos that need to set the 'Referer' in the
|
||||||
|
# http requests
|
||||||
{
|
{
|
||||||
u'add_ie': ['Brightcove'],
|
u'add_ie': ['Brightcove'],
|
||||||
u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics',
|
u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'id': u'2365799484001',
|
u'id': u'2765128793001',
|
||||||
u'ext': u'mp4',
|
u'ext': u'mp4',
|
||||||
u'title': u'Bubble Simulation',
|
u'title': u'Le cours de bourse : l’analyse technique',
|
||||||
u'description': u'A visualization from a new computer model of foam behavior.',
|
u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
|
||||||
u'uploader': u'Scientific American',
|
u'uploader': u'BFM BUSINESS',
|
||||||
},
|
},
|
||||||
u'params': {
|
u'params': {
|
||||||
u'skip_download': True,
|
u'skip_download': True,
|
||||||
@ -160,6 +162,16 @@ class GenericIE(InfoExtractor):
|
|||||||
raise ExtractorError(u'Failed to download URL: %s' % url)
|
raise ExtractorError(u'Failed to download URL: %s' % url)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
|
# it's tempting to parse this further, but you would
|
||||||
|
# have to take into account all the variations like
|
||||||
|
# Video Title - Site Name
|
||||||
|
# Site Name | Video Title
|
||||||
|
# Video Title - Tagline | Site Name
|
||||||
|
# and so on and so forth; it's just not practical
|
||||||
|
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
||||||
|
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
||||||
|
|
||||||
# Look for BrightCove:
|
# Look for BrightCove:
|
||||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||||
if bc_url is not None:
|
if bc_url is not None:
|
||||||
@ -175,17 +187,20 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(surl, 'Vimeo')
|
return self.url_result(surl, 'Vimeo')
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for embedded YouTube player
|
||||||
mobj = re.search(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?youtube.com/embed/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
|
||||||
if mobj:
|
if matches:
|
||||||
surl = unescapeHTML(mobj.group(u'url'))
|
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||||
return self.url_result(surl, 'Youtube')
|
for tuppl in matches]
|
||||||
|
return self.playlist_result(
|
||||||
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
burl = unescapeHTML(mobj.group(1))
|
burl = unescapeHTML(mobj.group(1))
|
||||||
return self.url_result(burl, 'Bandcamp')
|
# Don't set the extractor because it can be a track url or an album
|
||||||
|
return self.url_result(burl)
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
@ -224,15 +239,6 @@ class GenericIE(InfoExtractor):
|
|||||||
video_extension = os.path.splitext(video_id)[1][1:]
|
video_extension = os.path.splitext(video_id)[1][1:]
|
||||||
video_id = os.path.splitext(video_id)[0]
|
video_id = os.path.splitext(video_id)[0]
|
||||||
|
|
||||||
# it's tempting to parse this further, but you would
|
|
||||||
# have to take into account all the variations like
|
|
||||||
# Video Title - Site Name
|
|
||||||
# Site Name | Video Title
|
|
||||||
# Video Title - Tagline | Site Name
|
|
||||||
# and so on and so forth; it's just not practical
|
|
||||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
|
||||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
|
||||||
|
|
||||||
# video uploader is domain name
|
# video uploader is domain name
|
||||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
||||||
url, u'video uploader')
|
url, u'video uploader')
|
||||||
|
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||||
u'file': u'390161.mp4',
|
u'file': u'390161.mp4',
|
||||||
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
|
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
||||||
u"title": u"How to Tie a Square Knot Properly"
|
u"title": u"How to Tie a Square Knot Properly"
|
||||||
|
@ -22,7 +22,7 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
title = re.match(self._VALID_URL, url).group(1)
|
title = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
xml_link = self._html_search_regex(
|
xml_link = self._html_search_regex(
|
||||||
r'<param name="flashvars" value="config=(.*?)" />',
|
r'<param name="flashvars" value="config=(.*?)" />',
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
import re
|
import re
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
class KankanIE(InfoExtractor):
|
class KankanIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||||
@ -30,7 +32,10 @@ class KankanIE(InfoExtractor):
|
|||||||
video_id, u'Downloading video url info')
|
video_id, u'Downloading video url info')
|
||||||
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
|
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
|
||||||
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
|
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
|
||||||
video_url = 'http://%s%s' % (ip, path)
|
param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
|
||||||
|
param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
|
||||||
|
key = _md5('xl_mp43651' + param1 + param2)
|
||||||
|
video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -1,16 +1,17 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
get_meta_content,
|
xpath_with_ns,
|
||||||
ExtractorError,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LivestreamIE(InfoExtractor):
|
class LivestreamIE(InfoExtractor):
|
||||||
|
IE_NAME = u'livestream'
|
||||||
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||||
@ -54,3 +55,44 @@ class LivestreamIE(InfoExtractor):
|
|||||||
info = json.loads(self._download_webpage(api_url, video_id,
|
info = json.loads(self._download_webpage(api_url, video_id,
|
||||||
u'Downloading video info'))
|
u'Downloading video info'))
|
||||||
return self._extract_video_info(info)
|
return self._extract_video_info(info)
|
||||||
|
|
||||||
|
|
||||||
|
# The original version of Livestream uses a different system
|
||||||
|
class LivestreamOriginalIE(InfoExtractor):
|
||||||
|
IE_NAME = u'livestream:original'
|
||||||
|
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
user = mobj.group('user')
|
||||||
|
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||||
|
|
||||||
|
api_response = self._download_webpage(api_url, video_id)
|
||||||
|
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
|
||||||
|
item = info.find('channel').find('item')
|
||||||
|
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||||
|
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||||
|
# Remove the extension and number from the path (like 1.jpg)
|
||||||
|
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': item.find('title').text,
|
||||||
|
'url': 'rtmp://extondemand.livestream.com/ondemand',
|
||||||
|
'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
|
||||||
|
'ext': 'flv',
|
||||||
|
'thumbnail': thumbnail_url,
|
||||||
|
}
|
||||||
|
@ -48,7 +48,7 @@ class MTVIE(InfoExtractor):
|
|||||||
def _transform_rtmp_url(rtmp_video_url):
|
def _transform_rtmp_url(rtmp_video_url):
|
||||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||||
if not m:
|
if not m:
|
||||||
raise ExtractorError(u'Cannot transform RTMP url')
|
return rtmp_video_url
|
||||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||||
return base + m.group('finalid')
|
return base + m.group('finalid')
|
||||||
|
|
||||||
@ -59,7 +59,6 @@ class MTVIE(InfoExtractor):
|
|||||||
if '/error_country_block.swf' in metadataXml:
|
if '/error_country_block.swf' in metadataXml:
|
||||||
raise ExtractorError(u'This video is not available from your country.', expected=True)
|
raise ExtractorError(u'This video is not available from your country.', expected=True)
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
|
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
|
||||||
renditions = mdoc.findall('.//rendition')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for rendition in mdoc.findall('.//rendition'):
|
for rendition in mdoc.findall('.//rendition'):
|
||||||
|
@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor):
|
|||||||
|
|
||||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||||
IE_NAME = u'nhl.com:videocenter'
|
IE_NAME = u'nhl.com:videocenter'
|
||||||
IE_DESC = u'Download the first 12 videos from a videocenter category'
|
IE_DESC = u'NHL videocenter category'
|
||||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
|
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -6,7 +6,6 @@ from ..utils import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
unescapeHTML,
|
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_decrypt_text
|
aes_decrypt_text
|
||||||
|
@ -8,7 +8,9 @@ class RedTubeIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.redtube.com/66418',
|
u'url': u'http://www.redtube.com/66418',
|
||||||
u'file': u'66418.mp4',
|
u'file': u'66418.mp4',
|
||||||
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
# md5 varies from time to time, as in
|
||||||
|
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
|
||||||
|
#u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Sucked on a toilet",
|
u"title": u"Sucked on a toilet",
|
||||||
u"age_limit": 18,
|
u"age_limit": 18,
|
||||||
|
@ -62,18 +62,6 @@ class RTLnowIE(InfoExtractor):
|
|||||||
u'skip_download': True,
|
u'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
u'url': u'http://www.rtlnitronow.de/recht-ordnung/stadtpolizei-frankfurt-gerichtsvollzieher-leipzig.php?film_id=129679&player=1&season=1',
|
|
||||||
u'file': u'129679.flv',
|
|
||||||
u'info_dict': {
|
|
||||||
u'upload_date': u'20131016',
|
|
||||||
u'title': u'Recht & Ordnung - Stadtpolizei Frankfurt/ Gerichtsvollzieher...',
|
|
||||||
u'description': u'Stadtpolizei Frankfurt/ Gerichtsvollzieher Leipzig',
|
|
||||||
},
|
|
||||||
u'params': {
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
|
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
|
||||||
u'file': u'124903.flv',
|
u'file': u'124903.flv',
|
||||||
|
@ -29,19 +29,37 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
IE_NAME = u'soundcloud'
|
IE_NAME = u'soundcloud'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
{
|
||||||
u'file': u'62986583.mp3',
|
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||||
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
|
u'file': u'62986583.mp3',
|
||||||
u'info_dict': {
|
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
|
||||||
u"upload_date": u"20121011",
|
u'info_dict': {
|
||||||
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
|
u"upload_date": u"20121011",
|
||||||
u"uploader": u"E.T. ExTerrestrial Music",
|
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
|
||||||
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
|
u"uploader": u"E.T. ExTerrestrial Music",
|
||||||
}
|
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
# not streamable song
|
||||||
|
{
|
||||||
|
u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'47127627',
|
||||||
|
u'ext': u'mp3',
|
||||||
|
u'title': u'Goldrushed',
|
||||||
|
u'uploader': u'The Royal Concept',
|
||||||
|
u'upload_date': u'20120521',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||||
|
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
@ -56,24 +74,48 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||||
|
|
||||||
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
||||||
video_id = info['id']
|
track_id = compat_str(info['id'])
|
||||||
name = full_title or video_id
|
name = full_title or track_id
|
||||||
if quiet == False:
|
if quiet == False:
|
||||||
self.report_extraction(name)
|
self.report_extraction(name)
|
||||||
|
|
||||||
thumbnail = info['artwork_url']
|
thumbnail = info['artwork_url']
|
||||||
if thumbnail is not None:
|
if thumbnail is not None:
|
||||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||||
return {
|
result = {
|
||||||
'id': info['id'],
|
'id': track_id,
|
||||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
|
||||||
'uploader': info['user']['username'],
|
'uploader': info['user']['username'],
|
||||||
'upload_date': unified_strdate(info['created_at']),
|
'upload_date': unified_strdate(info['created_at']),
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'ext': u'mp3',
|
'ext': info.get('original_format', u'mp3'),
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
if info.get('downloadable', False):
|
||||||
|
# We can build a direct link to the song
|
||||||
|
result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
|
||||||
|
else:
|
||||||
|
# We have to retrieve the url
|
||||||
|
stream_json = self._download_webpage(
|
||||||
|
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
|
||||||
|
track_id, u'Downloading track url')
|
||||||
|
# There should be only one entry in the dictionary
|
||||||
|
key, stream_url = list(json.loads(stream_json).items())[0]
|
||||||
|
if key.startswith(u'http'):
|
||||||
|
result['url'] = stream_url
|
||||||
|
elif key.startswith(u'rtmp'):
|
||||||
|
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||||
|
url, path = stream_url.split('mp3:', 1)
|
||||||
|
result.update({
|
||||||
|
'url': url,
|
||||||
|
'play_path': 'mp3:' + path,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# We fallback to the stream_url in the original info, this
|
||||||
|
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||||
|
result['url'] = info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||||
@ -106,70 +148,8 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
class SoundcloudSetIE(SoundcloudIE):
|
class SoundcloudSetIE(SoundcloudIE):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
||||||
IE_NAME = u'soundcloud:set'
|
IE_NAME = u'soundcloud:set'
|
||||||
_TEST = {
|
# it's in tests/test_playlists.py
|
||||||
u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
|
_TESTS = []
|
||||||
u"playlist": [
|
|
||||||
{
|
|
||||||
u"file":"30510138.mp3",
|
|
||||||
u"md5":"f9136bf103901728f29e419d2c70f55d",
|
|
||||||
u"info_dict": {
|
|
||||||
u"upload_date": u"20111213",
|
|
||||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
|
||||||
u"uploader": u"The Royal Concept",
|
|
||||||
u"title": u"D-D-Dance"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
u"file":"47127625.mp3",
|
|
||||||
u"md5":"09b6758a018470570f8fd423c9453dd8",
|
|
||||||
u"info_dict": {
|
|
||||||
u"upload_date": u"20120521",
|
|
||||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
|
||||||
u"uploader": u"The Royal Concept",
|
|
||||||
u"title": u"The Royal Concept - Gimme Twice"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
u"file":"47127627.mp3",
|
|
||||||
u"md5":"154abd4e418cea19c3b901f1e1306d9c",
|
|
||||||
u"info_dict": {
|
|
||||||
u"upload_date": u"20120521",
|
|
||||||
u"uploader": u"The Royal Concept",
|
|
||||||
u"title": u"Goldrushed"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
u"file":"47127629.mp3",
|
|
||||||
u"md5":"2f5471edc79ad3f33a683153e96a79c1",
|
|
||||||
u"info_dict": {
|
|
||||||
u"upload_date": u"20120521",
|
|
||||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
|
||||||
u"uploader": u"The Royal Concept",
|
|
||||||
u"title": u"In the End"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
u"file":"47127631.mp3",
|
|
||||||
u"md5":"f9ba87aa940af7213f98949254f1c6e2",
|
|
||||||
u"info_dict": {
|
|
||||||
u"upload_date": u"20120521",
|
|
||||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
|
|
||||||
u"uploader": u"The Royal Concept",
|
|
||||||
u"title": u"Knocked Up"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
u"file":"75206121.mp3",
|
|
||||||
u"md5":"f9d1fe9406717e302980c30de4af9353",
|
|
||||||
u"info_dict": {
|
|
||||||
u"upload_date": u"20130116",
|
|
||||||
u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
|
|
||||||
u"uploader": u"The Royal Concept",
|
|
||||||
u"title": u"World On Fire"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -188,7 +168,6 @@ class SoundcloudSetIE(SoundcloudIE):
|
|||||||
resolv_url = self._resolv_url(url)
|
resolv_url = self._resolv_url(url)
|
||||||
info_json = self._download_webpage(resolv_url, full_title)
|
info_json = self._download_webpage(resolv_url, full_title)
|
||||||
|
|
||||||
videos = []
|
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
if 'errors' in info:
|
if 'errors' in info:
|
||||||
for err in info['errors']:
|
for err in info['errors']:
|
||||||
@ -208,7 +187,7 @@ class SoundcloudUserIE(SoundcloudIE):
|
|||||||
IE_NAME = u'soundcloud:user'
|
IE_NAME = u'soundcloud:user'
|
||||||
|
|
||||||
# it's in tests/test_playlists.py
|
# it's in tests/test_playlists.py
|
||||||
_TEST = None
|
_TESTS = []
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -5,21 +5,19 @@ from .mtv import MTVIE, _media_xml_tag
|
|||||||
|
|
||||||
class SouthParkStudiosIE(MTVIE):
|
class SouthParkStudiosIE(MTVIE):
|
||||||
IE_NAME = u'southparkstudios.com'
|
IE_NAME = u'southparkstudios.com'
|
||||||
_VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
|
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||||
|
|
||||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||||
|
|
||||||
_TEST = {
|
# Overwrite MTVIE properties we don't want
|
||||||
|
_TESTS = [{
|
||||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'Bat Daded',
|
u'title': u'Bat Daded',
|
||||||
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
# Overwrite MTVIE properties we don't want
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _get_thumbnail_url(self, uri, itemdoc):
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||||
@ -31,8 +29,23 @@ class SouthParkStudiosIE(MTVIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
url = u'http://www.' + mobj.group(u'url')
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
|
mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
|
||||||
webpage, u'mgid')
|
webpage, u'mgid')
|
||||||
return self._get_videos_info(mgid)
|
return self._get_videos_info(mgid)
|
||||||
|
|
||||||
|
class SouthparkDeIE(SouthParkStudiosIE):
|
||||||
|
IE_NAME = u'southpark.de'
|
||||||
|
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||||
|
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
||||||
|
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'The Government Won\'t Respect My Privacy',
|
||||||
|
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
@ -6,7 +6,6 @@ from ..utils import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
unescapeHTML,
|
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_decrypt_text
|
aes_decrypt_text
|
||||||
@ -36,11 +35,12 @@ class SpankwireIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
|
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
|
||||||
video_uploader = self._html_search_regex(r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
|
video_uploader = self._html_search_regex(
|
||||||
thumbnail = self._html_search_regex(r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
|
r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
|
||||||
description = self._html_search_regex(r'>\s*Description:</div>\s*<[^>]*>([^<]+)', webpage, u'description', fatal=False)
|
thumbnail = self._html_search_regex(
|
||||||
if len(description) == 0:
|
r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
|
||||||
description = None
|
description = self._html_search_regex(
|
||||||
|
r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
||||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||||
|
@ -6,14 +6,22 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
class SpiegelIE(InfoExtractor):
|
class SpiegelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||||
u'file': u'1259285.mp4',
|
u'file': u'1259285.mp4',
|
||||||
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||||
|
u'file': u'1309159.mp4',
|
||||||
|
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
@ -21,25 +29,38 @@ class SpiegelIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
|
video_title = self._html_search_regex(
|
||||||
webpage, u'title')
|
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
||||||
|
|
||||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
||||||
xml_code = self._download_webpage(xml_url, video_id,
|
xml_code = self._download_webpage(
|
||||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
xml_url, video_id,
|
||||||
|
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||||
|
|
||||||
idoc = xml.etree.ElementTree.fromstring(xml_code)
|
idoc = xml.etree.ElementTree.fromstring(xml_code)
|
||||||
last_type = idoc[-1]
|
|
||||||
filename = last_type.findall('./filename')[0].text
|
|
||||||
duration = float(last_type.findall('./duration')[0].text)
|
|
||||||
|
|
||||||
video_url = 'http://video2.spiegel.de/flash/' + filename
|
formats = [
|
||||||
video_ext = filename.rpartition('.')[2]
|
{
|
||||||
|
'format_id': n.tag.rpartition('type')[2],
|
||||||
|
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||||
|
'width': int(n.find('./width').text),
|
||||||
|
'height': int(n.find('./height').text),
|
||||||
|
'abr': int(n.find('./audiobitrate').text),
|
||||||
|
'vbr': int(n.find('./videobitrate').text),
|
||||||
|
'vcodec': n.find('./codec').text,
|
||||||
|
'acodec': 'MP4A',
|
||||||
|
}
|
||||||
|
for n in list(idoc)
|
||||||
|
# Blacklist type 6, it's extremely LQ and not available on the same server
|
||||||
|
if n.tag.startswith('type') and n.tag != 'type6'
|
||||||
|
]
|
||||||
|
formats.sort(key=lambda f: f['vbr'])
|
||||||
|
duration = float(idoc[0].findall('./duration')[0].text)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'ext': video_ext,
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
return [info]
|
return info
|
||||||
|
65
youtube_dl/extractor/streamcloud.py
Normal file
65
youtube_dl/extractor/streamcloud.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StreamcloudIE(InfoExtractor):
|
||||||
|
IE_NAME = u'streamcloud.eu'
|
||||||
|
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||||
|
u'file': u'skp9j99s4bpz.mp4',
|
||||||
|
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
||||||
|
u'duration': 9,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
orig_webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
fields = re.findall(r'''(?x)<input\s+
|
||||||
|
type="(?:hidden|submit)"\s+
|
||||||
|
name="([^"]+)"\s+
|
||||||
|
(?:id="[^"]+"\s+)?
|
||||||
|
value="([^"]*)"
|
||||||
|
''', orig_webpage)
|
||||||
|
post = compat_urllib_parse.urlencode(fields)
|
||||||
|
|
||||||
|
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||||
|
time.sleep(12)
|
||||||
|
headers = {
|
||||||
|
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
req = compat_urllib_request.Request(url, post, headers)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
req, video_id, note=u'Downloading video page ...')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1[^>]*>([^<]+)<', webpage, u'title')
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'file:\s*"([^"]+)"', webpage, u'video URL')
|
||||||
|
duration_str = self._search_regex(
|
||||||
|
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
|
||||||
|
duration = None if duration_str is None else int(duration_str)
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
return any([self._downloader.params.get('writesubtitles', False),
|
return any([self._downloader.params.get('writesubtitles', False),
|
||||||
self._downloader.params.get('writeautomaticsub')])
|
self._downloader.params.get('writeautomaticsub')])
|
||||||
|
|
||||||
def _list_available_subtitles(self, video_id, webpage=None):
|
def _list_available_subtitles(self, video_id, webpage):
|
||||||
""" outputs the available subtitles for the video """
|
""" outputs the available subtitles for the video """
|
||||||
sub_lang_list = self._get_available_subtitles(video_id)
|
sub_lang_list = self._get_available_subtitles(video_id, webpage)
|
||||||
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
|
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
|
||||||
sub_lang = ",".join(list(sub_lang_list.keys()))
|
sub_lang = ",".join(list(sub_lang_list.keys()))
|
||||||
self.to_screen(u'%s: Available subtitles for video: %s' %
|
self.to_screen(u'%s: Available subtitles for video: %s' %
|
||||||
@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
self.to_screen(u'%s: Available automatic captions for video: %s' %
|
self.to_screen(u'%s: Available automatic captions for video: %s' %
|
||||||
(video_id, auto_lang))
|
(video_id, auto_lang))
|
||||||
|
|
||||||
def extract_subtitles(self, video_id, video_webpage=None):
|
def extract_subtitles(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
returns {sub_lang: sub} ,{} if subtitles not found or None if the
|
returns {sub_lang: sub} ,{} if subtitles not found or None if the
|
||||||
subtitles aren't requested.
|
subtitles aren't requested.
|
||||||
@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
return None
|
return None
|
||||||
available_subs_list = {}
|
available_subs_list = {}
|
||||||
if self._downloader.params.get('writeautomaticsub', False):
|
if self._downloader.params.get('writeautomaticsub', False):
|
||||||
available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
|
available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
|
||||||
if self._downloader.params.get('writesubtitles', False):
|
if self._downloader.params.get('writesubtitles', False):
|
||||||
available_subs_list.update(self._get_available_subtitles(video_id))
|
available_subs_list.update(self._get_available_subtitles(video_id, webpage))
|
||||||
|
|
||||||
if not available_subs_list: # error, it didn't get the available subtitles
|
if not available_subs_list: # error, it didn't get the available subtitles
|
||||||
return {}
|
return {}
|
||||||
@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
return
|
return
|
||||||
return sub
|
return sub
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
returns {sub_lang: url} or {} if not available
|
returns {sub_lang: url} or {} if not available
|
||||||
Must be redefined by the subclasses
|
Must be redefined by the subclasses
|
||||||
|
@ -15,7 +15,8 @@ class SztvHuIE(InfoExtractor):
|
|||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
|
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
|
||||||
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
|
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
|
||||||
}
|
},
|
||||||
|
u'skip': u'Service temporarily disabled as of 2013-11-20'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -60,7 +60,7 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
return -1
|
return -1
|
||||||
formats.sort(key=sort_key)
|
formats.sort(key=sort_key)
|
||||||
if not formats:
|
if not formats:
|
||||||
raise RegexNotFoundError(u'Unable to extract video URL')
|
raise ExtractorError(u'Unable to extract video URL')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
RegexNotFoundError,
|
||||||
|
)
|
||||||
|
|
||||||
class TEDIE(InfoExtractor):
|
class TEDIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL=r'''http://www\.ted\.com/
|
_VALID_URL=r'''http://www\.ted\.com/
|
||||||
(
|
(
|
||||||
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
||||||
@ -32,33 +35,32 @@ class TEDIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m=re.match(self._VALID_URL, url, re.VERBOSE)
|
m=re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
if m.group('type_talk'):
|
if m.group('type_talk'):
|
||||||
return [self._talk_info(url)]
|
return self._talk_info(url)
|
||||||
else :
|
else :
|
||||||
playlist_id=m.group('playlist_id')
|
playlist_id=m.group('playlist_id')
|
||||||
name=m.group('name')
|
name=m.group('name')
|
||||||
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
|
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
|
||||||
return [self._playlist_videos_info(url,name,playlist_id)]
|
return [self._playlist_videos_info(url,name,playlist_id)]
|
||||||
|
|
||||||
def _playlist_videos_info(self,url,name,playlist_id=0):
|
|
||||||
|
def _playlist_videos_info(self, url, name, playlist_id):
|
||||||
'''Returns the videos of the playlist'''
|
'''Returns the videos of the playlist'''
|
||||||
video_RE=r'''
|
|
||||||
<li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
|
webpage = self._download_webpage(
|
||||||
([.\s]*?)data-playlist_item_id="(\d+)"
|
url, playlist_id, u'Downloading playlist webpage')
|
||||||
([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
|
matches = re.finditer(
|
||||||
'''
|
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
|
||||||
video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
|
webpage)
|
||||||
webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
|
|
||||||
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
|
|
||||||
m_names=re.finditer(video_name_RE,webpage)
|
|
||||||
|
|
||||||
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
|
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
|
||||||
webpage, 'playlist title')
|
webpage, 'playlist title')
|
||||||
|
|
||||||
playlist_entries = []
|
playlist_entries = [
|
||||||
for m_video, m_name in zip(m_videos,m_names):
|
self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
|
||||||
talk_url='http://www.ted.com%s' % m_name.group('talk_url')
|
for m in matches
|
||||||
playlist_entries.append(self.url_result(talk_url, 'TED'))
|
]
|
||||||
return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
|
return self.playlist_result(
|
||||||
|
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
|
||||||
|
|
||||||
def _talk_info(self, url, video_id=0):
|
def _talk_info(self, url, video_id=0):
|
||||||
"""Return the video for the talk in the url"""
|
"""Return the video for the talk in the url"""
|
||||||
@ -81,16 +83,35 @@ class TEDIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'url': stream['file'],
|
'url': stream['file'],
|
||||||
'format': stream['id']
|
'format': stream['id']
|
||||||
} for stream in info['htmlStreams']]
|
} for stream in info['htmlStreams']]
|
||||||
info = {
|
|
||||||
'id': info['id'],
|
video_id = info['id']
|
||||||
|
|
||||||
|
# subtitles
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, webpage)
|
||||||
|
return
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': desc,
|
'description': desc,
|
||||||
|
'subtitles': video_subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: Remove when #980 has been merged
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
info.update(info['formats'][-1])
|
try:
|
||||||
|
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
|
||||||
return info
|
languages = re.findall(r'(?:<option value=")(\S+)"', options)
|
||||||
|
if languages:
|
||||||
|
sub_lang_list = {}
|
||||||
|
for l in languages:
|
||||||
|
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||||
|
sub_lang_list[l] = url
|
||||||
|
return sub_lang_list
|
||||||
|
except RegexNotFoundError:
|
||||||
|
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||||
|
return {}
|
||||||
|
74
youtube_dl/extractor/toutv.py
Normal file
74
youtube_dl/extractor/toutv.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TouTvIE(InfoExtractor):
|
||||||
|
IE_NAME = u'tou.tv'
|
||||||
|
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.tou.tv/30-vies/S04E41',
|
||||||
|
u'file': u'30-vies_S04E41.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'30 vies Saison 4 / Épisode 41',
|
||||||
|
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||||
|
u'age_limit': 8,
|
||||||
|
u'uploader': u'Groupe des Nouveaux Médias',
|
||||||
|
u'duration': 1296,
|
||||||
|
u'upload_date': u'20131118',
|
||||||
|
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True, # Requires rtmpdump
|
||||||
|
},
|
||||||
|
u'skip': 'Only available in Canada'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
mediaId = self._search_regex(
|
||||||
|
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
||||||
|
|
||||||
|
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||||
|
streams_webpage = self._download_webpage(
|
||||||
|
streams_url, video_id, note=u'Downloading stream list')
|
||||||
|
|
||||||
|
streams_doc = xml.etree.ElementTree.fromstring(
|
||||||
|
streams_webpage.encode('utf-8'))
|
||||||
|
video_url = next(n.text
|
||||||
|
for n in streams_doc.findall('.//choice/url')
|
||||||
|
if u'//ad.doubleclick' not in n.text)
|
||||||
|
if video_url.endswith('/Unavailable.flv'):
|
||||||
|
raise ExtractorError(
|
||||||
|
u'Access to this video is blocked from outside of Canada',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
duration_str = self._html_search_meta(
|
||||||
|
'video:duration', webpage, u'duration')
|
||||||
|
duration = int(duration_str) if duration_str else None
|
||||||
|
upload_date_str = self._html_search_meta(
|
||||||
|
'video:release_date', webpage, u'upload date')
|
||||||
|
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'url': video_url,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'uploader': self._dc_search_uploader(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'age_limit': self._media_rating_search(webpage),
|
||||||
|
'duration': duration,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
@ -5,8 +5,6 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
|
||||||
unescapeHTML,
|
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_decrypt_text
|
aes_decrypt_text
|
||||||
|
42
youtube_dl/extractor/tvp.py
Normal file
42
youtube_dl/extractor/tvp.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class TvpIE(InfoExtractor):
|
||||||
|
IE_NAME = u'tvp.pl'
|
||||||
|
_VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
|
||||||
|
u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
|
||||||
|
u'file': u'12878238.wmv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'31.10.2013 - Odcinek 2',
|
||||||
|
u'description': u'31.10.2013 - Odcinek 2',
|
||||||
|
},
|
||||||
|
u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
|
||||||
|
json_params = self._download_webpage(
|
||||||
|
json_url, video_id, u"Downloading video metadata")
|
||||||
|
|
||||||
|
params = json.loads(json_params)
|
||||||
|
self.report_extraction(video_id)
|
||||||
|
video_url = params['video_url']
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, fatal=True)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'ext': 'wmv',
|
||||||
|
'url': video_url,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
}
|
@ -78,12 +78,13 @@ class VevoIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||||
format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' %
|
|
||||||
m.groupdict())
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': u'SMIL_' + m.group('cbr'),
|
'format_id': u'SMIL_' + m.group('cbr'),
|
||||||
'format_note': format_note,
|
'vcodec': m.group('vcodec'),
|
||||||
|
'acodec': m.group('acodec'),
|
||||||
|
'vbr': int(m.group('vbr')),
|
||||||
|
'abr': int(m.group('abr')),
|
||||||
'ext': m.group('ext'),
|
'ext': m.group('ext'),
|
||||||
'width': int(m.group('width')),
|
'width': int(m.group('width')),
|
||||||
'height': int(m.group('height')),
|
'height': int(m.group('height')),
|
||||||
|
@ -24,12 +24,16 @@ class VideoPremiumIE(InfoExtractor):
|
|||||||
webpage_url = 'http://videopremium.tv/' + video_id
|
webpage_url = 'http://videopremium.tv/' + video_id
|
||||||
webpage = self._download_webpage(webpage_url, video_id)
|
webpage = self._download_webpage(webpage_url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
if re.match(r"^<html><head><script[^>]*>window.location\s*=", webpage):
|
||||||
|
# Download again, we need a cookie
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
webpage_url, video_id,
|
||||||
|
note=u'Downloading webpage again (with cookie)')
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h2(?:.*?)>\s*(.+?)\s*<',
|
video_title = self._html_search_regex(
|
||||||
webpage, u'video title')
|
r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, u'video title')
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
|
'url': "rtmp://e%d.md.iplay.md/play" % random.randint(1, 16),
|
||||||
'play_path': "mp4:%s.f4v" % video_id,
|
'play_path': "mp4:%s.f4v" % video_id,
|
||||||
@ -37,4 +41,4 @@ class VideoPremiumIE(InfoExtractor):
|
|||||||
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
'player_url': "http://videopremium.tv/uplayer/uppod.swf",
|
||||||
'ext': 'f4v',
|
'ext': 'f4v',
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
}]
|
}
|
@ -151,7 +151,7 @@ class VimeoIE(InfoExtractor):
|
|||||||
config = json.loads(config_json)
|
config = json.loads(config_json)
|
||||||
except RegexNotFoundError:
|
except RegexNotFoundError:
|
||||||
# For pro videos or player.vimeo.com urls
|
# For pro videos or player.vimeo.com urls
|
||||||
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
|
config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'],
|
||||||
webpage, u'info section', flags=re.DOTALL)
|
webpage, u'info section', flags=re.DOTALL)
|
||||||
config = json.loads(config)
|
config = json.loads(config)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -27,7 +27,7 @@ class VineIE(InfoExtractor):
|
|||||||
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
||||||
webpage, u'video URL')
|
webpage, u'video URL')
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<div class="user">.*?<h2>(.+?)</h2>',
|
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class XNXXIE(InfoExtractor):
|
class XNXXIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
|
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
|
||||||
VIDEO_URL_RE = r'flv_url=(.*?)&'
|
VIDEO_URL_RE = r'flv_url=(.*?)&'
|
||||||
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
|
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
|
||||||
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
|
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
|
||||||
|
@ -5,7 +5,6 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
class XTubeIE(InfoExtractor):
|
class XTubeIE(InfoExtractor):
|
||||||
|
@ -139,10 +139,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com'
|
IE_DESC = u'YouTube.com'
|
||||||
_VALID_URL = r"""^
|
_VALID_URL = r"""(?x)^
|
||||||
(
|
(
|
||||||
(?:https?://)? # http(s):// (optional)
|
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||||
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
|
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
||||||
tube\.majestyc\.net/|
|
tube\.majestyc\.net/|
|
||||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
@ -363,6 +363,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
u"uploader_id": u"justintimberlakeVEVO"
|
u"uploader_id": u"justintimberlakeVEVO"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
|
||||||
|
u"file": u"yZIXLfi8CZQ.mp4",
|
||||||
|
u"note": u"Embed-only video (#1746)",
|
||||||
|
u"info_dict": {
|
||||||
|
u"upload_date": u"20120608",
|
||||||
|
u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
|
||||||
|
u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
|
||||||
|
u"uploader": u"SET India",
|
||||||
|
u"uploader_id": u"setindia"
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -370,7 +382,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
if YoutubePlaylistIE.suitable(url): return False
|
if YoutubePlaylistIE.suitable(url): return False
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
return re.match(cls._VALID_URL, url) is not None
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(YoutubeIE, self).__init__(*args, **kwargs)
|
super(YoutubeIE, self).__init__(*args, **kwargs)
|
||||||
@ -1019,6 +1031,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
"""Turn the encrypted s field into a working signature"""
|
"""Turn the encrypted s field into a working signature"""
|
||||||
|
|
||||||
if player_url is not None:
|
if player_url is not None:
|
||||||
|
if player_url.startswith(u'//'):
|
||||||
|
player_url = u'https:' + player_url
|
||||||
try:
|
try:
|
||||||
player_id = (player_url, len(s))
|
player_id = (player_url, len(s))
|
||||||
if player_id not in self._player_cache:
|
if player_id not in self._player_cache:
|
||||||
@ -1082,7 +1096,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
|
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
sub_list = self._download_webpage(
|
sub_list = self._download_webpage(
|
||||||
'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||||
@ -1098,7 +1112,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
params = compat_urllib_parse.urlencode({
|
params = compat_urllib_parse.urlencode({
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'v': video_id,
|
'v': video_id,
|
||||||
'fmt': self._downloader.params.get('subtitlesformat'),
|
'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
|
||||||
'name': l[0].encode('utf-8'),
|
'name': l[0].encode('utf-8'),
|
||||||
})
|
})
|
||||||
url = u'http://www.youtube.com/api/timedtext?' + params
|
url = u'http://www.youtube.com/api/timedtext?' + params
|
||||||
@ -1111,7 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
def _get_available_automatic_caption(self, video_id, webpage):
|
def _get_available_automatic_caption(self, video_id, webpage):
|
||||||
"""We need the webpage for getting the captions url, pass it as an
|
"""We need the webpage for getting the captions url, pass it as an
|
||||||
argument to speed up the process."""
|
argument to speed up the process."""
|
||||||
sub_format = self._downloader.params.get('subtitlesformat')
|
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||||
self.to_screen(u'%s: Looking for automatic captions' % video_id)
|
self.to_screen(u'%s: Looking for automatic captions' % video_id)
|
||||||
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
||||||
err_msg = u'Couldn\'t find automatic captions for %s' % video_id
|
err_msg = u'Couldn\'t find automatic captions for %s' % video_id
|
||||||
@ -1270,7 +1284,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||||
# this can be viewed without login into Youtube
|
# this can be viewed without login into Youtube
|
||||||
data = compat_urllib_parse.urlencode({'video_id': video_id,
|
data = compat_urllib_parse.urlencode({'video_id': video_id,
|
||||||
'el': 'embedded',
|
'el': 'player_embedded',
|
||||||
'gl': 'US',
|
'gl': 'US',
|
||||||
'hl': 'en',
|
'hl': 'en',
|
||||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||||
@ -1299,6 +1313,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
raise ExtractorError(u'"token" parameter not in video info for unknown reason')
|
raise ExtractorError(u'"token" parameter not in video info for unknown reason')
|
||||||
|
|
||||||
|
if 'view_count' in video_info:
|
||||||
|
view_count = int(video_info['view_count'][0])
|
||||||
|
else:
|
||||||
|
view_count = None
|
||||||
|
|
||||||
# Check for "rental" videos
|
# Check for "rental" videos
|
||||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||||
raise ExtractorError(u'"rental" videos not supported')
|
raise ExtractorError(u'"rental" videos not supported')
|
||||||
@ -1487,10 +1506,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'age_limit': 18 if age_gate else 0,
|
'age_limit': 18 if age_gate else 0,
|
||||||
'annotations': video_annotations,
|
'annotations': video_annotations,
|
||||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
||||||
|
'view_count': view_count,
|
||||||
})
|
})
|
||||||
return results
|
return results
|
||||||
|
|
||||||
class YoutubePlaylistIE(InfoExtractor):
|
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com playlists'
|
IE_DESC = u'YouTube.com playlists'
|
||||||
_VALID_URL = r"""(?:
|
_VALID_URL = r"""(?:
|
||||||
(?:https?://)?
|
(?:https?://)?
|
||||||
@ -1506,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||||||
|
|
|
|
||||||
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
|
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
|
||||||
)"""
|
)"""
|
||||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
||||||
_MAX_RESULTS = 50
|
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||||
|
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -1515,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
@ -1528,45 +1552,28 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||||||
video_id = query_dict['v'][0]
|
video_id = query_dict['v'][0]
|
||||||
if self._downloader.params.get('noplaylist'):
|
if self._downloader.params.get('noplaylist'):
|
||||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
||||||
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
|
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
else:
|
else:
|
||||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
# Download playlist videos from API
|
# Extract the video ids from the playlist pages
|
||||||
videos = []
|
ids = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
start_index = self._MAX_RESULTS * (page_num - 1) + 1
|
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
||||||
if start_index >= 1000:
|
|
||||||
self._downloader.report_warning(u'Max number of results reached')
|
|
||||||
break
|
|
||||||
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
|
|
||||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||||
|
# The ids are duplicated
|
||||||
|
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
|
||||||
|
ids.extend(new_ids)
|
||||||
|
|
||||||
try:
|
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
||||||
response = json.loads(page)
|
|
||||||
except ValueError as err:
|
|
||||||
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
|
|
||||||
|
|
||||||
if 'feed' not in response:
|
|
||||||
raise ExtractorError(u'Got a malformed response from YouTube API')
|
|
||||||
playlist_title = response['feed']['title']['$t']
|
|
||||||
if 'entry' not in response['feed']:
|
|
||||||
# Number of videos is a multiple of self._MAX_RESULTS
|
|
||||||
break
|
break
|
||||||
|
|
||||||
for entry in response['feed']['entry']:
|
playlist_title = self._og_search_title(page)
|
||||||
index = entry['yt$position']['$t']
|
|
||||||
if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
|
|
||||||
videos.append((
|
|
||||||
index,
|
|
||||||
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
|
|
||||||
))
|
|
||||||
|
|
||||||
videos = [v[1] for v in sorted(videos)]
|
url_results = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
|
for video_id in ids]
|
||||||
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
|
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||||
return [self.playlist_result(url_results, playlist_id, playlist_title)]
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeChannelIE(InfoExtractor):
|
class YoutubeChannelIE(InfoExtractor):
|
||||||
@ -1592,26 +1599,37 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
# Download channel page
|
# Download channel page
|
||||||
channel_id = mobj.group(1)
|
channel_id = mobj.group(1)
|
||||||
video_ids = []
|
video_ids = []
|
||||||
|
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
|
||||||
|
channel_page = self._download_webpage(url, channel_id)
|
||||||
|
if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
|
||||||
|
autogenerated = True
|
||||||
|
else:
|
||||||
|
autogenerated = False
|
||||||
|
|
||||||
# Download all channel pages using the json-based channel_ajax query
|
if autogenerated:
|
||||||
for pagenum in itertools.count(1):
|
# The videos are contained in a single page
|
||||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
# the ajax pages can't be used, they are empty
|
||||||
page = self._download_webpage(url, channel_id,
|
video_ids = self.extract_videos_from_page(channel_page)
|
||||||
u'Downloading page #%s' % pagenum)
|
else:
|
||||||
|
# Download all channel pages using the json-based channel_ajax query
|
||||||
page = json.loads(page)
|
for pagenum in itertools.count(1):
|
||||||
|
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
page = self._download_webpage(url, channel_id,
|
||||||
video_ids.extend(ids_in_page)
|
u'Downloading page #%s' % pagenum)
|
||||||
|
|
||||||
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
page = json.loads(page)
|
||||||
break
|
|
||||||
|
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||||
|
video_ids.extend(ids_in_page)
|
||||||
|
|
||||||
|
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
||||||
|
break
|
||||||
|
|
||||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||||
|
|
||||||
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
|
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
|
for video_id in video_ids]
|
||||||
return [self.playlist_result(url_entries, channel_id)]
|
return self.playlist_result(url_entries, channel_id)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
@ -1675,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
||||||
break
|
break
|
||||||
|
|
||||||
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
url_results = [
|
||||||
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
|
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
return [self.playlist_result(url_results, playlist_title = username)]
|
for video_id in video_ids]
|
||||||
|
return self.playlist_result(url_results, playlist_title=username)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com searches'
|
IE_DESC = u'YouTube.com searches'
|
||||||
@ -1718,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
|||||||
|
|
||||||
if len(video_ids) > n:
|
if len(video_ids) > n:
|
||||||
video_ids = video_ids[:n]
|
video_ids = video_ids[:n]
|
||||||
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
|
videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
|
for video_id in video_ids]
|
||||||
return self.playlist_result(videos, query)
|
return self.playlist_result(videos, query)
|
||||||
|
|
||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
@ -1778,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
feed_html = info['feed_html']
|
feed_html = info['feed_html']
|
||||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||||
ids = orderedSet(m.group(1) for m in m_ids)
|
ids = orderedSet(m.group(1) for m in m_ids)
|
||||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
feed_entries.extend(
|
||||||
|
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
|
for video_id in ids)
|
||||||
if info['paging'] is None:
|
if info['paging'] is None:
|
||||||
break
|
break
|
||||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||||
|
@ -53,7 +53,7 @@ class ZDFIE(InfoExtractor):
|
|||||||
video_id,
|
video_id,
|
||||||
u'Get stream URL')
|
u'Get stream URL')
|
||||||
|
|
||||||
MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
|
#MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
|
||||||
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
|
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
|
||||||
|
|
||||||
mobj = re.search(self._MEDIA_STREAM, media_link)
|
mobj = re.search(self._MEDIA_STREAM, media_link)
|
||||||
|
@ -2,11 +2,15 @@ import io
|
|||||||
import json
|
import json
|
||||||
import traceback
|
import traceback
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from zipimport import zipimporter
|
from zipimport import zipimporter
|
||||||
|
|
||||||
from .utils import *
|
from .utils import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
|
||||||
def rsa_verify(message, signature, key):
|
def rsa_verify(message, signature, key):
|
||||||
@ -37,6 +41,7 @@ def rsa_verify(message, signature, key):
|
|||||||
if signature != sha256(message).digest(): return False
|
if signature != sha256(message).digest(): return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def update_self(to_screen, verbose):
|
def update_self(to_screen, verbose):
|
||||||
"""Update the program file with the latest version from the repository"""
|
"""Update the program file with the latest version from the repository"""
|
||||||
|
|
||||||
@ -78,6 +83,13 @@ def update_self(to_screen, verbose):
|
|||||||
return
|
return
|
||||||
|
|
||||||
version_id = versions_info['latest']
|
version_id = versions_info['latest']
|
||||||
|
|
||||||
|
def version_tuple(version_str):
|
||||||
|
return tuple(map(int, version_str.split('.')))
|
||||||
|
if version_tuple(__version__) >= version_tuple(version_str):
|
||||||
|
to_screen(u'youtube-dl is up to date (%s)' % __version__)
|
||||||
|
return
|
||||||
|
|
||||||
to_screen(u'Updating to version ' + version_id + '...')
|
to_screen(u'Updating to version ' + version_id + '...')
|
||||||
version = versions_info['versions'][version_id]
|
version = versions_info['versions'][version_id]
|
||||||
|
|
||||||
@ -105,7 +117,7 @@ def update_self(to_screen, verbose):
|
|||||||
urlh = compat_urllib_request.urlopen(version['exe'][0])
|
urlh = compat_urllib_request.urlopen(version['exe'][0])
|
||||||
newcontent = urlh.read()
|
newcontent = urlh.read()
|
||||||
urlh.close()
|
urlh.close()
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError):
|
||||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||||
to_screen(u'ERROR: unable to download latest version')
|
to_screen(u'ERROR: unable to download latest version')
|
||||||
return
|
return
|
||||||
@ -118,7 +130,7 @@ def update_self(to_screen, verbose):
|
|||||||
try:
|
try:
|
||||||
with open(exe + '.new', 'wb') as outf:
|
with open(exe + '.new', 'wb') as outf:
|
||||||
outf.write(newcontent)
|
outf.write(newcontent)
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError):
|
||||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||||
to_screen(u'ERROR: unable to write the new version')
|
to_screen(u'ERROR: unable to write the new version')
|
||||||
return
|
return
|
||||||
@ -137,7 +149,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
|
|||||||
|
|
||||||
subprocess.Popen([bat]) # Continues to run in the background
|
subprocess.Popen([bat]) # Continues to run in the background
|
||||||
return # Do not show premature success messages
|
return # Do not show premature success messages
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError):
|
||||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||||
to_screen(u'ERROR: unable to overwrite current version')
|
to_screen(u'ERROR: unable to overwrite current version')
|
||||||
return
|
return
|
||||||
@ -148,7 +160,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
|
|||||||
urlh = compat_urllib_request.urlopen(version['bin'][0])
|
urlh = compat_urllib_request.urlopen(version['bin'][0])
|
||||||
newcontent = urlh.read()
|
newcontent = urlh.read()
|
||||||
urlh.close()
|
urlh.close()
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError):
|
||||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||||
to_screen(u'ERROR: unable to download latest version')
|
to_screen(u'ERROR: unable to download latest version')
|
||||||
return
|
return
|
||||||
@ -161,7 +173,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
|
|||||||
try:
|
try:
|
||||||
with open(filename, 'wb') as outf:
|
with open(filename, 'wb') as outf:
|
||||||
outf.write(newcontent)
|
outf.write(newcontent)
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError):
|
||||||
if verbose: to_screen(compat_str(traceback.format_exc()))
|
if verbose: to_screen(compat_str(traceback.format_exc()))
|
||||||
to_screen(u'ERROR: unable to overwrite current version')
|
to_screen(u'ERROR: unable to overwrite current version')
|
||||||
return
|
return
|
||||||
|
@ -734,6 +734,8 @@ def unified_strdate(date_str):
|
|||||||
'%Y/%m/%d %H:%M:%S',
|
'%Y/%m/%d %H:%M:%S',
|
||||||
'%d.%m.%Y %H:%M',
|
'%d.%m.%Y %H:%M',
|
||||||
'%Y-%m-%dT%H:%M:%SZ',
|
'%Y-%m-%dT%H:%M:%SZ',
|
||||||
|
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||||
|
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||||
'%Y-%m-%dT%H:%M:%S',
|
'%Y-%m-%dT%H:%M:%S',
|
||||||
]
|
]
|
||||||
for expression in format_expressions:
|
for expression in format_expressions:
|
||||||
@ -949,7 +951,16 @@ class locked_file(object):
|
|||||||
|
|
||||||
|
|
||||||
def shell_quote(args):
|
def shell_quote(args):
|
||||||
return ' '.join(map(pipes.quote, args))
|
quoted_args = []
|
||||||
|
encoding = sys.getfilesystemencoding()
|
||||||
|
if encoding is None:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
for a in args:
|
||||||
|
if isinstance(a, bytes):
|
||||||
|
# We may get a filename encoded with 'encodeFilename'
|
||||||
|
a = a.decode(encoding)
|
||||||
|
quoted_args.append(pipes.quote(a))
|
||||||
|
return u' '.join(quoted_args)
|
||||||
|
|
||||||
|
|
||||||
def takewhile_inclusive(pred, seq):
|
def takewhile_inclusive(pred, seq):
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.11.06.1'
|
__version__ = '2013.11.22.2'
|
||||||
|
Reference in New Issue
Block a user