Compare commits
140 Commits
2014.04.04
...
2014.04.21
Author | SHA1 | Date | |
---|---|---|---|
|
4086f11929 | ||
|
478c2c6193 | ||
|
d2d6481afb | ||
|
43acb120f3 | ||
|
e8f2025edf | ||
|
a4eb9578af | ||
|
fa35cdad02 | ||
|
d1b9c912a4 | ||
|
edec83a025 | ||
|
c0a7c60815 | ||
|
117a7d1944 | ||
|
a40e0dd434 | ||
|
188b086dd9 | ||
|
1f27d2c0e1 | ||
|
7560096db5 | ||
|
282cb9c7ba | ||
|
3a9d6790ad | ||
|
0610a3e0b2 | ||
|
7f9c31df88 | ||
|
3fa6b6e293 | ||
|
3c50b99ab4 | ||
|
52fadd5fb2 | ||
|
5367fe7f4d | ||
|
427588f6e7 | ||
|
51745be312 | ||
|
d7f1e7c88f | ||
|
4145a257be | ||
|
525dc9809e | ||
|
1bf3210816 | ||
|
e6c6d10d99 | ||
|
f270256e06 | ||
|
f401c6f69f | ||
|
b075d25bed | ||
|
3d1bb6b4dd | ||
|
1db2666916 | ||
|
8f5c0218d8 | ||
|
d7666dff82 | ||
|
2d4c98dbd1 | ||
|
fd50bf623c | ||
|
d360a14678 | ||
|
d0f2ab6969 | ||
|
de906ef543 | ||
|
2fb3deeca1 | ||
|
66398056f1 | ||
|
77477fa4c9 | ||
|
a169e18ce1 | ||
|
381640e3ac | ||
|
37e3410137 | ||
|
97b5196960 | ||
|
6a4f3528c8 | ||
|
b9c76aa1a9 | ||
|
0d3070d364 | ||
|
7753cadbfa | ||
|
3950450342 | ||
|
c82b1fdad6 | ||
|
b0fb63abe8 | ||
|
3ab34c603e | ||
|
7d6413341a | ||
|
140012d0f6 | ||
|
4be9f8c814 | ||
|
5c802bac37 | ||
|
6c30ff756a | ||
|
62749e4708 | ||
|
6b7dee4b38 | ||
|
ef2041eb4e | ||
|
29e3e682af | ||
|
f983c44199 | ||
|
e4db19511a | ||
|
c47d21da80 | ||
|
269aecd0c0 | ||
|
aafddb2b0a | ||
|
6262ac8ac5 | ||
|
89938c719e | ||
|
ec0fafbb19 | ||
|
a5863bdf33 | ||
|
b58ddb32ba | ||
|
b9e12a8140 | ||
|
104aa7388a | ||
|
c3855d28b0 | ||
|
734f90bb41 | ||
|
91a6addeeb | ||
|
9afb76c5ad | ||
|
dfb2cb5cfd | ||
|
650d688d10 | ||
|
0ba77818f3 | ||
|
09baa7da7e | ||
|
85e787f51d | ||
|
2a9e1e453a | ||
|
ee1e199685 | ||
|
17c5a00774 | ||
|
15c0e8e7b2 | ||
|
cca37fba48 | ||
|
9d0993ec4a | ||
|
342f33bf9e | ||
|
7cd3bc5f99 | ||
|
931055e6cb | ||
|
d0e4cf82f1 | ||
|
6f88df2c57 | ||
|
4479bf2762 | ||
|
1ff7c0f7d8 | ||
|
610e47c87e | ||
|
50f566076f | ||
|
92810ff497 | ||
|
60ccc59a1c | ||
|
91745595d3 | ||
|
d6e40507d0 | ||
|
deed48b472 | ||
|
e4d41bfca5 | ||
|
a355b70f27 | ||
|
f8514f6186 | ||
|
e09b8fcd9d | ||
|
7d1b527ff9 | ||
|
f943c7b622 | ||
|
676eb3f2dd | ||
|
98b7cf1ace | ||
|
c465afd736 | ||
|
b84d6e7fc4 | ||
|
2efd5d78c1 | ||
|
c8edf47b3a | ||
|
3b4c26a428 | ||
|
1525148114 | ||
|
9e0c5791c1 | ||
|
29a1ab2afc | ||
|
fa387d2d99 | ||
|
6d0d573eca | ||
|
bb799e811b | ||
|
04ee53eca1 | ||
|
659eb98a53 | ||
|
ca6aada48e | ||
|
43df5a7e71 | ||
|
88f1c6de7b | ||
|
65a40ab82b | ||
|
4b9cced103 | ||
|
5c38625259 | ||
|
6344fa04bb | ||
|
e3ced9ed61 | ||
|
784763c565 | ||
|
39c68260c0 | ||
|
149254d0d5 | ||
|
0c14e2fbe3 |
@@ -250,6 +250,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
default
|
default
|
||||||
--embed-subs embed subtitles in the video (only for mp4
|
--embed-subs embed subtitles in the video (only for mp4
|
||||||
videos)
|
videos)
|
||||||
|
--embed-thumbnail embed thumbnail in the audio as cover art
|
||||||
--add-metadata write metadata to the video file
|
--add-metadata write metadata to the video file
|
||||||
--xattrs write metadata to the video file's xattrs
|
--xattrs write metadata to the video file's xattrs
|
||||||
(using dublin core and xdg standards)
|
(using dublin core and xdg standards)
|
||||||
|
@@ -74,13 +74,19 @@ class FakeYDL(YoutubeDL):
|
|||||||
old_report_warning(message)
|
old_report_warning(message)
|
||||||
self.report_warning = types.MethodType(report_warning, self)
|
self.report_warning = types.MethodType(report_warning, self)
|
||||||
|
|
||||||
def gettestcases():
|
|
||||||
|
def gettestcases(include_onlymatching=False):
|
||||||
for ie in youtube_dl.extractor.gen_extractors():
|
for ie in youtube_dl.extractor.gen_extractors():
|
||||||
t = getattr(ie, '_TEST', None)
|
t = getattr(ie, '_TEST', None)
|
||||||
if t:
|
if t:
|
||||||
t['name'] = type(ie).__name__[:-len('IE')]
|
assert not hasattr(ie, '_TESTS'), \
|
||||||
yield t
|
'%s has _TEST and _TESTS' % type(ie).__name__
|
||||||
for t in getattr(ie, '_TESTS', []):
|
tests = [t]
|
||||||
|
else:
|
||||||
|
tests = getattr(ie, '_TESTS', [])
|
||||||
|
for t in tests:
|
||||||
|
if not include_onlymatching and getattr(t, 'only_matching', False):
|
||||||
|
continue
|
||||||
t['name'] = type(ie).__name__[:-len('IE')]
|
t['name'] = type(ie).__name__[:-len('IE')]
|
||||||
yield t
|
yield t
|
||||||
|
|
||||||
|
@@ -49,6 +49,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||||
|
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
||||||
|
|
||||||
def test_youtube_channel_matching(self):
|
def test_youtube_channel_matching(self):
|
||||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
||||||
@@ -76,20 +77,20 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_justin_tv_channelid_matching(self):
|
def test_justin_tv_channelid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/"))
|
self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/'))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/"))
|
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/'))
|
||||||
|
|
||||||
def test_justintv_videoid_matching(self):
|
def test_justintv_videoid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483"))
|
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
|
||||||
|
|
||||||
def test_justin_tv_chapterid_matching(self):
|
def test_justin_tv_chapterid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
@@ -105,7 +106,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
for tc in gettestcases():
|
for tc in gettestcases(include_onlymatching=True):
|
||||||
url = tc['url']
|
url = tc['url']
|
||||||
for ie in ies:
|
for ie in ies:
|
||||||
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
||||||
@@ -156,6 +157,25 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch(
|
self.assertMatch(
|
||||||
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||||
['ComedyCentralShows'])
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
|
||||||
|
def test_yahoo_https(self):
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/2701
|
||||||
|
self.assertMatch(
|
||||||
|
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
||||||
|
['Yahoo'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -43,6 +43,7 @@ from youtube_dl.extractor import (
|
|||||||
XTubeUserIE,
|
XTubeUserIE,
|
||||||
InstagramUserIE,
|
InstagramUserIE,
|
||||||
CSpanIE,
|
CSpanIE,
|
||||||
|
AolIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -191,8 +192,8 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['id'], 'dezhurnyi_angel')
|
self.assertEqual(result['id'], 'dezhurnyi_angel')
|
||||||
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
|
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
|
||||||
self.assertTrue(len(result['entries']) >= 36)
|
self.assertTrue(len(result['entries']) >= 23)
|
||||||
|
|
||||||
def test_ivi_compilation_season(self):
|
def test_ivi_compilation_season(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = IviCompilationIE(dl)
|
ie = IviCompilationIE(dl)
|
||||||
@@ -200,7 +201,7 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
|
self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
|
||||||
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
|
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
|
||||||
self.assertTrue(len(result['entries']) >= 20)
|
self.assertTrue(len(result['entries']) >= 7)
|
||||||
|
|
||||||
def test_imdb_list(self):
|
def test_imdb_list(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@@ -324,10 +325,19 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['id'], '342759')
|
self.assertEqual(result['id'], '342759')
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
result['title'], 'General Motors Ignition Switch Recall')
|
result['title'], 'General Motors Ignition Switch Recall')
|
||||||
self.assertEqual(len(result['entries']), 9)
|
|
||||||
whole_duration = sum(e['duration'] for e in result['entries'])
|
whole_duration = sum(e['duration'] for e in result['entries'])
|
||||||
self.assertEqual(whole_duration, 14855)
|
self.assertEqual(whole_duration, 14855)
|
||||||
|
|
||||||
|
def test_aol_playlist(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = AolIE(dl)
|
||||||
|
result = ie.extract(
|
||||||
|
'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], '152147')
|
||||||
|
self.assertEqual(
|
||||||
|
result['title'], 'Brace Yourself - Today\'s Weirdest News')
|
||||||
|
self.assertTrue(len(result['entries']) >= 10)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -38,6 +38,7 @@ from youtube_dl.utils import (
|
|||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
|
uppercase_escape,
|
||||||
)
|
)
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
@@ -279,6 +280,9 @@ class TestUtil(unittest.TestCase):
|
|||||||
d = json.loads(stripped)
|
d = json.loads(stripped)
|
||||||
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
||||||
|
|
||||||
|
def test_uppercase_escpae(self):
|
||||||
|
self.assertEqual(uppercase_escape(u'aä'), u'aä')
|
||||||
|
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
34
youtube_dl/YoutubeDL.py
Normal file → Executable file
34
youtube_dl/YoutubeDL.py
Normal file → Executable file
@@ -286,6 +286,9 @@ class YoutubeDL(object):
|
|||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
return self.to_stdout(message, skip_eol, check_quiet=True)
|
return self.to_stdout(message, skip_eol, check_quiet=True)
|
||||||
|
|
||||||
|
def _write_string(self, s, out=None):
|
||||||
|
write_string(s, out=out, encoding=self.params.get('encoding'))
|
||||||
|
|
||||||
def to_stdout(self, message, skip_eol=False, check_quiet=False):
|
def to_stdout(self, message, skip_eol=False, check_quiet=False):
|
||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
if self.params.get('logger'):
|
if self.params.get('logger'):
|
||||||
@@ -295,7 +298,7 @@ class YoutubeDL(object):
|
|||||||
terminator = ['\n', ''][skip_eol]
|
terminator = ['\n', ''][skip_eol]
|
||||||
output = message + terminator
|
output = message + terminator
|
||||||
|
|
||||||
write_string(output, self._screen_file)
|
self._write_string(output, self._screen_file)
|
||||||
|
|
||||||
def to_stderr(self, message):
|
def to_stderr(self, message):
|
||||||
"""Print message to stderr."""
|
"""Print message to stderr."""
|
||||||
@@ -305,7 +308,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
message = self._bidi_workaround(message)
|
message = self._bidi_workaround(message)
|
||||||
output = message + '\n'
|
output = message + '\n'
|
||||||
write_string(output, self._err_file)
|
self._write_string(output, self._err_file)
|
||||||
|
|
||||||
def to_console_title(self, message):
|
def to_console_title(self, message):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
@@ -315,21 +318,21 @@ class YoutubeDL(object):
|
|||||||
# already of type unicode()
|
# already of type unicode()
|
||||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||||
elif 'TERM' in os.environ:
|
elif 'TERM' in os.environ:
|
||||||
write_string('\033]0;%s\007' % message, self._screen_file)
|
self._write_string('\033]0;%s\007' % message, self._screen_file)
|
||||||
|
|
||||||
def save_console_title(self):
|
def save_console_title(self):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if 'TERM' in os.environ:
|
if 'TERM' in os.environ:
|
||||||
# Save the title on stack
|
# Save the title on stack
|
||||||
write_string('\033[22;0t', self._screen_file)
|
self._write_string('\033[22;0t', self._screen_file)
|
||||||
|
|
||||||
def restore_console_title(self):
|
def restore_console_title(self):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
return
|
return
|
||||||
if 'TERM' in os.environ:
|
if 'TERM' in os.environ:
|
||||||
# Restore the title from stack
|
# Restore the title from stack
|
||||||
write_string('\033[23;0t', self._screen_file)
|
self._write_string('\033[23;0t', self._screen_file)
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self.save_console_title()
|
self.save_console_title()
|
||||||
@@ -933,7 +936,7 @@ class YoutubeDL(object):
|
|||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||||
subfile.write(sub)
|
subfile.write(sub)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + descfn)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
@@ -1211,9 +1214,16 @@ class YoutubeDL(object):
|
|||||||
if not self.params.get('verbose'):
|
if not self.params.get('verbose'):
|
||||||
return
|
return
|
||||||
|
|
||||||
write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
|
write_string(
|
||||||
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
|
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
|
||||||
write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
locale.getpreferredencoding(),
|
||||||
|
sys.getfilesystemencoding(),
|
||||||
|
sys.stdout.encoding,
|
||||||
|
self.get_encoding()),
|
||||||
|
encoding=None
|
||||||
|
)
|
||||||
|
|
||||||
|
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
||||||
try:
|
try:
|
||||||
sp = subprocess.Popen(
|
sp = subprocess.Popen(
|
||||||
['git', 'rev-parse', '--short', 'HEAD'],
|
['git', 'rev-parse', '--short', 'HEAD'],
|
||||||
@@ -1222,20 +1232,20 @@ class YoutubeDL(object):
|
|||||||
out, err = sp.communicate()
|
out, err = sp.communicate()
|
||||||
out = out.decode().strip()
|
out = out.decode().strip()
|
||||||
if re.match('[0-9a-f]+', out):
|
if re.match('[0-9a-f]+', out):
|
||||||
write_string('[debug] Git HEAD: ' + out + '\n')
|
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
||||||
except:
|
except:
|
||||||
try:
|
try:
|
||||||
sys.exc_clear()
|
sys.exc_clear()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
write_string('[debug] Python version %s - %s' %
|
self._write_string('[debug] Python version %s - %s' %
|
||||||
(platform.python_version(), platform_name()) + '\n')
|
(platform.python_version(), platform_name()) + '\n')
|
||||||
|
|
||||||
proxy_map = {}
|
proxy_map = {}
|
||||||
for handler in self._opener.handlers:
|
for handler in self._opener.handlers:
|
||||||
if hasattr(handler, 'proxies'):
|
if hasattr(handler, 'proxies'):
|
||||||
proxy_map.update(handler.proxies)
|
proxy_map.update(handler.proxies)
|
||||||
write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
|
self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
|
||||||
|
|
||||||
def _setup_opener(self):
|
def _setup_opener(self):
|
||||||
timeout_val = self.params.get('socket_timeout')
|
timeout_val = self.params.get('socket_timeout')
|
||||||
|
@@ -52,6 +52,7 @@ __authors__ = (
|
|||||||
'Juan C. Olivares',
|
'Juan C. Olivares',
|
||||||
'Mattias Harrysson',
|
'Mattias Harrysson',
|
||||||
'phaer',
|
'phaer',
|
||||||
|
'Sainyam Kapoor',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
@@ -91,6 +92,8 @@ from .extractor import gen_extractors
|
|||||||
from .version import __version__
|
from .version import __version__
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
from .postprocessor import (
|
from .postprocessor import (
|
||||||
|
AtomicParsleyPP,
|
||||||
|
FFmpegAudioFixPP,
|
||||||
FFmpegMetadataPP,
|
FFmpegMetadataPP,
|
||||||
FFmpegVideoConvertor,
|
FFmpegVideoConvertor,
|
||||||
FFmpegExtractAudioPP,
|
FFmpegExtractAudioPP,
|
||||||
@@ -242,7 +245,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--prefer-insecure', action='store_true', dest='prefer_insecure',
|
'--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||||
@@ -502,6 +505,8 @@ def parseOpts(overrideArguments=None):
|
|||||||
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
|
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
|
||||||
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
|
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
|
||||||
help='embed subtitles in the video (only for mp4 videos)')
|
help='embed subtitles in the video (only for mp4 videos)')
|
||||||
|
postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
|
||||||
|
help='embed thumbnail in the audio as cover art')
|
||||||
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
|
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
|
||||||
help='write metadata to the video file')
|
help='write metadata to the video file')
|
||||||
postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
|
postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
|
||||||
@@ -807,6 +812,10 @@ def _real_main(argv=None):
|
|||||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
||||||
if opts.xattrs:
|
if opts.xattrs:
|
||||||
ydl.add_post_processor(XAttrMetadataPP())
|
ydl.add_post_processor(XAttrMetadataPP())
|
||||||
|
if opts.embedthumbnail:
|
||||||
|
if not opts.addmetadata:
|
||||||
|
ydl.add_post_processor(FFmpegAudioFixPP())
|
||||||
|
ydl.add_post_processor(AtomicParsleyPP())
|
||||||
|
|
||||||
# Update version
|
# Update version
|
||||||
if opts.update_self:
|
if opts.update_self:
|
||||||
|
@@ -4,9 +4,10 @@ import sys
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
timeconvert,
|
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
timeconvert,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -173,7 +174,7 @@ class FileDownloader(object):
|
|||||||
return
|
return
|
||||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError) as err:
|
||||||
self.report_error(u'unable to rename file: %s' % str(err))
|
self.report_error(u'unable to rename file: %s' % compat_str(err))
|
||||||
|
|
||||||
def try_utime(self, filename, last_modified_hdr):
|
def try_utime(self, filename, last_modified_hdr):
|
||||||
"""Try to set the last-modified time of the given file."""
|
"""Try to set the last-modified time of the given file."""
|
||||||
|
@@ -32,6 +32,7 @@ from .canal13cl import Canal13clIE
|
|||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
|
from .cbsnews import CBSNewsIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .chilloutzone import ChilloutzoneIE
|
from .chilloutzone import ChilloutzoneIE
|
||||||
@@ -39,6 +40,7 @@ from .cinemassacre import CinemassacreIE
|
|||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
|
from .clubic import ClubicIE
|
||||||
from .cmt import CMTIE
|
from .cmt import CMTIE
|
||||||
from .cnet import CNETIE
|
from .cnet import CNETIE
|
||||||
from .cnn import (
|
from .cnn import (
|
||||||
@@ -62,6 +64,7 @@ from .dotsub import DotsubIE
|
|||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
|
from .divxstage import DivxStageIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
@@ -156,6 +159,7 @@ from .mofosex import MofosexIE
|
|||||||
from .mooshare import MooshareIE
|
from .mooshare import MooshareIE
|
||||||
from .morningstar import MorningstarIE
|
from .morningstar import MorningstarIE
|
||||||
from .motorsport import MotorsportIE
|
from .motorsport import MotorsportIE
|
||||||
|
from .movshare import MovShareIE
|
||||||
from .mtv import (
|
from .mtv import (
|
||||||
MTVIE,
|
MTVIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
@@ -178,6 +182,7 @@ from .nfb import NFBIE
|
|||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
|
from .noco import NocoIE
|
||||||
from .normalboots import NormalbootsIE
|
from .normalboots import NormalbootsIE
|
||||||
from .novamov import NovaMovIE
|
from .novamov import NovaMovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
@@ -205,6 +210,7 @@ from .rottentomatoes import RottenTomatoesIE
|
|||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
|
from .rtve import RTVEALaCartaIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
@@ -247,6 +253,7 @@ from .tf1 import TF1IE
|
|||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
|
from .tlc import TlcIE, TlcDeIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .toypics import ToypicsUserIE, ToypicsIE
|
from .toypics import ToypicsUserIE, ToypicsIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
@@ -276,6 +283,7 @@ from .videodetective import VideoDetectiveIE
|
|||||||
from .videolecturesnet import VideoLecturesNetIE
|
from .videolecturesnet import VideoLecturesNetIE
|
||||||
from .videofyme import VideofyMeIE
|
from .videofyme import VideofyMeIE
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
|
from .videoweed import VideoWeedIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
VimeoChannelIE,
|
VimeoChannelIE,
|
||||||
|
@@ -8,7 +8,18 @@ from .fivemin import FiveMinIE
|
|||||||
|
|
||||||
class AolIE(InfoExtractor):
|
class AolIE(InfoExtractor):
|
||||||
IE_NAME = 'on.aol.com'
|
IE_NAME = 'on.aol.com'
|
||||||
_VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
aol-video:|
|
||||||
|
http://on\.aol\.com/
|
||||||
|
(?:
|
||||||
|
video/.*-|
|
||||||
|
playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(?P<id>[0-9]+)
|
||||||
|
(?:$|\?)
|
||||||
|
'''
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
||||||
@@ -24,5 +35,31 @@ class AolIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
self.to_screen('Downloading 5min.com video %s' % video_id)
|
|
||||||
|
playlist_id = mobj.group('playlist_id')
|
||||||
|
if playlist_id and not self._downloader.params.get('noplaylist'):
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
|
||||||
|
playlist_html = self._search_regex(
|
||||||
|
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
|
||||||
|
'playlist HTML')
|
||||||
|
entries = [{
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'aol-video:%s' % m.group('id'),
|
||||||
|
'ie_key': 'Aol',
|
||||||
|
} for m in re.finditer(
|
||||||
|
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
|
||||||
|
playlist_html)]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': playlist_id,
|
||||||
|
'display_id': mobj.group('playlist_display_id'),
|
||||||
|
'title': title,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
return FiveMinIE._build_result(video_id)
|
return FiveMinIE._build_result(video_id)
|
||||||
|
@@ -4,39 +4,72 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BRIE(InfoExtractor):
|
class BRIE(InfoExtractor):
|
||||||
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
IE_DESC = 'Bayerischer Rundfunk Mediathek'
|
||||||
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
|
_VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html'
|
||||||
_BASE_URL = "http://www.br.de"
|
_BASE_URL = 'http://www.br.de'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
|
||||||
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
|
||||||
"info_dict": {
|
'info_dict': {
|
||||||
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
|
'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
|
||||||
"ext": "mp4",
|
'ext': 'mp4',
|
||||||
"title": "Feiern und Verzichten",
|
'title': 'Feiern und Verzichten',
|
||||||
"description": "Anselm Grün: Feiern und Verzichten",
|
'description': 'Anselm Grün: Feiern und Verzichten',
|
||||||
"uploader": "BR/Birgit Baier",
|
'uploader': 'BR/Birgit Baier',
|
||||||
"upload_date": "20140301"
|
'upload_date': '20140301',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
|
'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
|
||||||
"md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
|
'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
|
||||||
"info_dict": {
|
'info_dict': {
|
||||||
"id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
|
'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
|
||||||
"ext": "mp4",
|
'ext': 'mp4',
|
||||||
"title": "Über den Pass",
|
'title': 'Über den Pass',
|
||||||
"description": "Die Eroberung der Alpen: Über den Pass",
|
'description': 'Die Eroberung der Alpen: Über den Pass',
|
||||||
"uploader": None,
|
|
||||||
"upload_date": None
|
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
|
||||||
|
'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
|
||||||
|
'ext': 'aac',
|
||||||
|
'title': '"Keine neuen Schulden im nächsten Jahr"',
|
||||||
|
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
|
||||||
|
'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Umweltbewusster Häuslebauer',
|
||||||
|
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
|
||||||
|
'md5': '23bca295f1650d698f94fc570977dae3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Folge 1 - Metaphysik',
|
||||||
|
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
|
||||||
|
'uploader': 'Eva Maria Steimle',
|
||||||
|
'upload_date': '20140117',
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -44,56 +77,63 @@ class BRIE(InfoExtractor):
|
|||||||
display_id = mobj.group('id')
|
display_id = mobj.group('id')
|
||||||
page = self._download_webpage(url, display_id)
|
page = self._download_webpage(url, display_id)
|
||||||
xml_url = self._search_regex(
|
xml_url = self._search_regex(
|
||||||
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
|
||||||
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
||||||
|
|
||||||
videos = []
|
medias = []
|
||||||
for xml_video in xml.findall("video"):
|
|
||||||
video = {
|
|
||||||
"id": xml_video.get("externalId"),
|
|
||||||
"title": xml_video.find("title").text,
|
|
||||||
"formats": self._extract_formats(xml_video.find("assets")),
|
|
||||||
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
|
||||||
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
|
||||||
"webpage_url": xml_video.find("permalink").text
|
|
||||||
}
|
|
||||||
if xml_video.find("author").text:
|
|
||||||
video["uploader"] = xml_video.find("author").text
|
|
||||||
if xml_video.find("broadcastDate").text:
|
|
||||||
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
|
|
||||||
videos.append(video)
|
|
||||||
|
|
||||||
if len(videos) > 1:
|
for xml_media in xml.findall('video') + xml.findall('audio'):
|
||||||
|
media = {
|
||||||
|
'id': xml_media.get('externalId'),
|
||||||
|
'title': xml_media.find('title').text,
|
||||||
|
'formats': self._extract_formats(xml_media.find('assets')),
|
||||||
|
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
|
||||||
|
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
|
||||||
|
'webpage_url': xml_media.find('permalink').text
|
||||||
|
}
|
||||||
|
if xml_media.find('author').text:
|
||||||
|
media['uploader'] = xml_media.find('author').text
|
||||||
|
if xml_media.find('broadcastDate').text:
|
||||||
|
media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
|
||||||
|
medias.append(media)
|
||||||
|
|
||||||
|
if len(medias) > 1:
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
'found multiple videos; please '
|
'found multiple medias; please '
|
||||||
'report this with the video URL to http://yt-dl.org/bug')
|
'report this with the video URL to http://yt-dl.org/bug')
|
||||||
if not videos:
|
if not medias:
|
||||||
raise ExtractorError('No video entries found')
|
raise ExtractorError('No media entries found')
|
||||||
return videos[0]
|
return medias[0]
|
||||||
|
|
||||||
def _extract_formats(self, assets):
|
def _extract_formats(self, assets):
|
||||||
|
|
||||||
|
def text_or_none(asset, tag):
|
||||||
|
elem = asset.find(tag)
|
||||||
|
return None if elem is None else elem.text
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
"url": asset.find("downloadUrl").text,
|
'url': text_or_none(asset, 'downloadUrl'),
|
||||||
"ext": asset.find("mediaType").text,
|
'ext': text_or_none(asset, 'mediaType'),
|
||||||
"format_id": asset.get("type"),
|
'format_id': asset.get('type'),
|
||||||
"width": int(asset.find("frameWidth").text),
|
'width': int_or_none(text_or_none(asset, 'frameWidth')),
|
||||||
"height": int(asset.find("frameHeight").text),
|
'height': int_or_none(text_or_none(asset, 'frameHeight')),
|
||||||
"tbr": int(asset.find("bitrateVideo").text),
|
'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
|
||||||
"abr": int(asset.find("bitrateAudio").text),
|
'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
|
||||||
"vcodec": asset.find("codecVideo").text,
|
'vcodec': text_or_none(asset, 'codecVideo'),
|
||||||
"container": asset.find("mediaType").text,
|
'acodec': text_or_none(asset, 'codecAudio'),
|
||||||
"filesize": int(asset.find("size").text),
|
'container': text_or_none(asset, 'mediaType'),
|
||||||
} for asset in assets.findall("asset")
|
'filesize': int_or_none(text_or_none(asset, 'size')),
|
||||||
if asset.find("downloadUrl") is not None]
|
} for asset in assets.findall('asset')
|
||||||
|
if asset.find('downloadUrl') is not None]
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_thumbnails(self, variants):
|
def _extract_thumbnails(self, variants):
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
"url": self._BASE_URL + variant.find("url").text,
|
'url': self._BASE_URL + variant.find('url').text,
|
||||||
"width": int(variant.find("width").text),
|
'width': int_or_none(variant.find('width').text),
|
||||||
"height": int(variant.find("height").text),
|
'height': int_or_none(variant.find('height').text),
|
||||||
} for variant in variants.findall("variant")]
|
} for variant in variants.findall('variant')]
|
||||||
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
|
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
|
||||||
return thumbnails
|
return thumbnails
|
||||||
|
@@ -27,9 +27,10 @@ class BreakIE(InfoExtractor):
|
|||||||
webpage, 'info json', flags=re.DOTALL)
|
webpage, 'info json', flags=re.DOTALL)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
video_url = info['videoUri']
|
video_url = info['videoUri']
|
||||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
youtube_id = info.get('youtubeId')
|
||||||
if m_youtube is not None:
|
if youtube_id:
|
||||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
return self.url_result(youtube_id, 'Youtube')
|
||||||
|
|
||||||
final_url = video_url + '?' + info['AuthToken']
|
final_url = video_url + '?' + info['AuthToken']
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -140,7 +140,11 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
|
|
||||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
return [unescapeHTML(url_m.group(1))]
|
url = unescapeHTML(url_m.group(1))
|
||||||
|
# Some sites don't add it, we can't download with this url, for example:
|
||||||
|
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
||||||
|
if 'playerKey' in url:
|
||||||
|
return [url]
|
||||||
|
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'''(?sx)<object
|
r'''(?sx)<object
|
||||||
|
@@ -4,9 +4,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import ExtractorError
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BYUtvIE(InfoExtractor):
|
class BYUtvIE(InfoExtractor):
|
||||||
@@ -16,7 +14,7 @@ class BYUtvIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'granite-flats-talking',
|
'id': 'granite-flats-talking',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f',
|
'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
|
||||||
'title': 'Talking',
|
'title': 'Talking',
|
||||||
'thumbnail': 're:^https?://.*promo.*'
|
'thumbnail': 're:^https?://.*promo.*'
|
||||||
},
|
},
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -8,46 +10,56 @@ from ..utils import unified_strdate
|
|||||||
class CanalplusIE(InfoExtractor):
|
class CanalplusIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||||
IE_NAME = u'canalplus.fr'
|
IE_NAME = 'canalplus.fr'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
||||||
u'file': u'922470.flv',
|
'md5': '60c29434a416a83c15dae2587d47027d',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Zapping - 26/08/13',
|
'id': '922470',
|
||||||
u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
|
'ext': 'flv',
|
||||||
u'upload_date': u'20130826',
|
'title': 'Zapping - 26/08/13',
|
||||||
},
|
'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
|
||||||
u'params': {
|
'upload_date': '20130826',
|
||||||
u'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.groupdict().get('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
webpage = self._download_webpage(url, mobj.group('path'))
|
webpage = self._download_webpage(url, mobj.group('path'))
|
||||||
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
|
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, 'video id')
|
||||||
|
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||||
doc = self._download_xml(info_url,video_id,
|
doc = self._download_xml(info_url, video_id, 'Downloading video XML')
|
||||||
u'Downloading video info')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||||
infos = video_info.find('INFOS')
|
|
||||||
media = video_info.find('MEDIA')
|
media = video_info.find('MEDIA')
|
||||||
formats = [media.find('VIDEOS/%s' % format)
|
infos = video_info.find('INFOS')
|
||||||
for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
|
|
||||||
video_url = [format.text for format in formats if format is not None][-1]
|
|
||||||
|
|
||||||
return {'id': video_id,
|
preferences = ['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']
|
||||||
'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text,
|
|
||||||
infos.find('TITRAGE/SOUS_TITRE').text),
|
formats = [
|
||||||
'url': video_url,
|
{
|
||||||
'ext': 'flv',
|
'url': fmt.text + '?hdcore=2.11.3' if fmt.tag == 'HDS' else fmt.text,
|
||||||
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
'format_id': fmt.tag,
|
||||||
'thumbnail': media.find('IMAGES/GRAND').text,
|
'ext': 'mp4' if fmt.tag == 'HLS' else 'flv',
|
||||||
'description': infos.find('DESCRIPTION').text,
|
'preference': preferences.index(fmt.tag) if fmt.tag in preferences else -1,
|
||||||
'view_count': int(infos.find('NB_VUES').text),
|
} for fmt in media.find('VIDEOS') if fmt.text
|
||||||
}
|
]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text,
|
||||||
|
infos.find('TITRAGE/SOUS_TITRE').text),
|
||||||
|
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
||||||
|
'thumbnail': media.find('IMAGES/GRAND').text,
|
||||||
|
'description': infos.find('DESCRIPTION').text,
|
||||||
|
'view_count': int(infos.find('NB_VUES').text),
|
||||||
|
'like_count': int(infos.find('NB_LIKES').text),
|
||||||
|
'comment_count': int(infos.find('NB_COMMENTS').text),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
87
youtube_dl/extractor/cbsnews.py
Normal file
87
youtube_dl/extractor/cbsnews.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CBSNewsIE(InfoExtractor):
|
||||||
|
IE_DESC = 'CBS News'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'tesla-and-spacex-elon-musks-industrial-empire',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
|
||||||
|
'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
|
||||||
|
'duration': 791,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||||
|
'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg',
|
||||||
|
'duration': 205,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_info = json.loads(self._html_search_regex(
|
||||||
|
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||||
|
webpage, 'video JSON info'))
|
||||||
|
|
||||||
|
item = video_info['item'] if 'item' in video_info else video_info
|
||||||
|
title = item.get('articleTitle') or item.get('hed')
|
||||||
|
duration = item.get('duration')
|
||||||
|
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
||||||
|
uri = item.get('media' + format_id + 'URI')
|
||||||
|
if not uri:
|
||||||
|
continue
|
||||||
|
fmt = {
|
||||||
|
'url': uri,
|
||||||
|
'format_id': format_id,
|
||||||
|
}
|
||||||
|
if uri.startswith('rtmp'):
|
||||||
|
fmt.update({
|
||||||
|
'app': 'ondemand?auth=cbs',
|
||||||
|
'play_path': 'mp4:' + uri.split('<break>')[-1],
|
||||||
|
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
|
||||||
|
'page_url': 'http://www.cbsnews.com',
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
elif uri.endswith('.m3u8'):
|
||||||
|
fmt['ext'] = 'mp4'
|
||||||
|
formats.append(fmt)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
58
youtube_dl/extractor/clubic.py
Normal file
58
youtube_dl/extractor/clubic.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClubicIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
|
||||||
|
'md5': '1592b694ba586036efac1776b0b43cd3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '448474',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
|
||||||
|
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
|
||||||
|
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
||||||
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
|
|
||||||
|
config_json = self._search_regex(
|
||||||
|
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
||||||
|
'configuration')
|
||||||
|
config = json.loads(config_json)
|
||||||
|
|
||||||
|
video_info = config['videoInfo']
|
||||||
|
sources = config['sources']
|
||||||
|
quality_order = qualities(['sd', 'hq'])
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': src['streamQuality'],
|
||||||
|
'url': src['src'],
|
||||||
|
'quality': quality_order(src['streamQuality']),
|
||||||
|
} for src in sources]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_info['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'description': clean_html(video_info.get('description')),
|
||||||
|
'thumbnail': config.get('poster'),
|
||||||
|
}
|
@@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||||
'md5': '4167875aae411f903b751a21f357f1ee',
|
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
||||||
|https?://(:www\.)?
|
|https?://(:www\.)?
|
||||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||||
(full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||||
(?P<clip>
|
(?P<clip>
|
||||||
(?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
|
(?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||||
)|
|
)|
|
||||||
|
@@ -251,7 +251,10 @@ class InfoExtractor(object):
|
|||||||
with open(filename, 'wb') as outf:
|
with open(filename, 'wb') as outf:
|
||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
|
|
||||||
content = webpage_bytes.decode(encoding, 'replace')
|
try:
|
||||||
|
content = webpage_bytes.decode(encoding, 'replace')
|
||||||
|
except LookupError:
|
||||||
|
content = webpage_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
if (u'<title>Access to this site is blocked</title>' in content and
|
if (u'<title>Access to this site is blocked</title>' in content and
|
||||||
u'Websense' in content[:512]):
|
u'Websense' in content[:512]):
|
||||||
|
@@ -28,16 +28,18 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'glamour': 'Glamour',
|
'glamour': 'Glamour',
|
||||||
'wmagazine': 'W Magazine',
|
'wmagazine': 'W Magazine',
|
||||||
'vanityfair': 'Vanity Fair',
|
'vanityfair': 'Vanity Fair',
|
||||||
|
'cnevids': 'Condé Nast',
|
||||||
}
|
}
|
||||||
|
|
||||||
_VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
|
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||||
'file': '5171b343c2b4c00dd0c1ccb3.mp4',
|
|
||||||
'md5': '1921f713ed48aabd715691f774c451f7',
|
'md5': '1921f713ed48aabd715691f774c451f7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '5171b343c2b4c00dd0c1ccb3',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '3D Printed Speakers Lit With LED',
|
'title': '3D Printed Speakers Lit With LED',
|
||||||
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||||
}
|
}
|
||||||
@@ -55,12 +57,16 @@ class CondeNastIE(InfoExtractor):
|
|||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
def _extract_video(self, webpage):
|
def _extract_video(self, webpage, url_type):
|
||||||
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
|
if url_type != 'embed':
|
||||||
r'<div class="video-post-content">(.+?)</div>',
|
description = self._html_search_regex(
|
||||||
],
|
[
|
||||||
webpage, 'description',
|
r'<div class="cne-video-description">(.+?)</div>',
|
||||||
fatal=False, flags=re.DOTALL)
|
r'<div class="video-post-content">(.+?)</div>',
|
||||||
|
],
|
||||||
|
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||||
|
else:
|
||||||
|
description = None
|
||||||
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
||||||
'player params', flags=re.DOTALL)
|
'player params', flags=re.DOTALL)
|
||||||
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
|
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
|
||||||
@@ -99,12 +105,12 @@ class CondeNastIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
site = mobj.group('site')
|
site = mobj.group('site')
|
||||||
url_type = mobj.group('type')
|
url_type = mobj.group('type')
|
||||||
id = mobj.group('id')
|
item_id = mobj.group('id')
|
||||||
|
|
||||||
self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
||||||
webpage = self._download_webpage(url, id)
|
webpage = self._download_webpage(url, item_id)
|
||||||
|
|
||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
return self._extract_video(webpage)
|
return self._extract_video(webpage, url_type)
|
||||||
|
@@ -8,8 +8,6 @@ from .subtitles import SubtitlesInfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_str,
|
compat_str,
|
||||||
get_element_by_attribute,
|
|
||||||
get_element_by_id,
|
|
||||||
orderedSet,
|
orderedSet,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -202,11 +200,12 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
playlist_id = mobj.group('id')
|
playlist_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
return {'_type': 'playlist',
|
return {
|
||||||
'id': playlist_id,
|
'_type': 'playlist',
|
||||||
'title': get_element_by_id(u'playlist_name', webpage),
|
'id': playlist_id,
|
||||||
'entries': self._extract_entries(playlist_id),
|
'title': self._og_search_title(webpage),
|
||||||
}
|
'entries': self._extract_entries(playlist_id),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||||
|
27
youtube_dl/extractor/divxstage.py
Normal file
27
youtube_dl/extractor/divxstage.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .novamov import NovaMovIE
|
||||||
|
|
||||||
|
|
||||||
|
class DivxStageIE(NovaMovIE):
|
||||||
|
IE_NAME = 'divxstage'
|
||||||
|
IE_DESC = 'DivxStage'
|
||||||
|
|
||||||
|
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
|
||||||
|
|
||||||
|
_HOST = 'www.divxstage.eu'
|
||||||
|
|
||||||
|
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||||
|
_TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
|
||||||
|
_DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
|
||||||
|
'md5': '63969f6eb26533a1968c4d325be63e72',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '57f238e2e5e01',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'youtubedl test video',
|
||||||
|
'description': 'This is a test video for youtubedl.',
|
||||||
|
}
|
||||||
|
}
|
@@ -1,4 +1,5 @@
|
|||||||
import os
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -8,18 +9,23 @@ from ..utils import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ExtremeTubeIE(InfoExtractor):
|
class ExtremeTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||||
u'file': u'652431.mp4',
|
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||||
u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '652431',
|
||||||
u"title": u"Music Video 14 british euro brit european cumshots swallow",
|
'ext': 'mp4',
|
||||||
u"uploader": u"unknown",
|
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||||
u"age_limit": 18,
|
'uploader': 'unknown',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.extremetube.com/gay/video/abcde-1234',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -30,11 +36,14 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
|
video_title = self._html_search_regex(
|
||||||
uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
|
r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url'))
|
uploader = self._html_search_regex(
|
||||||
|
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
|
||||||
|
fatal=False)
|
||||||
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||||
|
r'video_url=(.+?)&', webpage, 'video_url'))
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
|
||||||
format = path.split('/')[5].split('_')[:2]
|
format = path.split('/')[5].split('_')[:2]
|
||||||
format = "-".join(format)
|
format = "-".join(format)
|
||||||
|
|
||||||
@@ -43,7 +52,6 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
'title': video_title,
|
'title': video_title,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': extension,
|
|
||||||
'format': format,
|
'format': format,
|
||||||
'format_id': format,
|
'format_id': format,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@@ -6,7 +6,6 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class FirstpostIE(InfoExtractor):
|
class FirstpostIE(InfoExtractor):
|
||||||
IE_NAME = 'Firstpost.com'
|
|
||||||
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
|
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@@ -16,7 +15,6 @@ class FirstpostIE(InfoExtractor):
|
|||||||
'id': '1025403',
|
'id': '1025403',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
|
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
|
||||||
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,15 +22,26 @@ class FirstpostIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
data = self._download_xml(
|
||||||
video_url = self._html_search_regex(
|
'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
|
||||||
r'<div.*?name="div_video".*?flashvars="([^"]+)">',
|
'Downloading video XML')
|
||||||
webpage, 'video URL')
|
|
||||||
|
item = data.find('./playlist/item')
|
||||||
|
thumbnail = item.find('./image').text
|
||||||
|
title = item.find('./title').text
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': details.find('./file').text,
|
||||||
|
'format_id': details.find('./label').text.strip(),
|
||||||
|
'width': int(details.find('./width').text.strip()),
|
||||||
|
'height': int(details.find('./height').text.strip()),
|
||||||
|
} for details in item.findall('./source/file_details') if details.find('./file').text
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'title': title,
|
||||||
'title': self._og_search_title(webpage),
|
'thumbnail': thumbnail,
|
||||||
'description': self._og_search_description(webpage),
|
'formats': formats,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
}
|
}
|
||||||
|
@@ -5,6 +5,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -16,16 +17,28 @@ class FiveMinIE(InfoExtractor):
|
|||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
|
{
|
||||||
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
|
# From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
|
||||||
'md5': '4f7b0b79bf1a470e5004f7112385941d',
|
'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
|
||||||
'info_dict': {
|
'md5': '4f7b0b79bf1a470e5004f7112385941d',
|
||||||
'id': '518013791',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '518013791',
|
||||||
'title': 'iPad Mini with Retina Display Review',
|
'ext': 'mp4',
|
||||||
|
'title': 'iPad Mini with Retina Display Review',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
{
|
||||||
|
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
|
||||||
|
'url': '5min:518086247',
|
||||||
|
'md5': 'e539a9dd682c288ef5a498898009f69e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '518086247',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How to Make a Next-Level Fruit Salad',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_result(cls, video_id):
|
def _build_result(cls, video_id):
|
||||||
@@ -34,9 +47,19 @@ class FiveMinIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
||||||
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
|
'Downloading embed page')
|
||||||
|
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
||||||
|
query = compat_urllib_parse.urlencode({
|
||||||
|
'func': 'GetResults',
|
||||||
|
'playlist': video_id,
|
||||||
|
'sid': sid,
|
||||||
|
'isPlayerSeed': 'true',
|
||||||
|
'url': embed_url,
|
||||||
|
})
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
|
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
|
||||||
'playlist=%s&url=https' % video_id,
|
|
||||||
video_id)['binding'][0]
|
video_id)['binding'][0]
|
||||||
|
|
||||||
second_id = compat_str(int(video_id[:-2]) + 1)
|
second_id = compat_str(int(video_id[:-2]) + 1)
|
||||||
|
@@ -35,9 +35,10 @@ class GenericIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||||
'file': '13601338388002.mp4',
|
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
||||||
'md5': '6e15c93721d7ec9e9ca3fdbf07982cfd',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '13601338388002',
|
||||||
|
'ext': 'mp4',
|
||||||
'uploader': 'www.hodiho.fr',
|
'uploader': 'www.hodiho.fr',
|
||||||
'title': 'R\u00e9gis plante sa Jeep',
|
'title': 'R\u00e9gis plante sa Jeep',
|
||||||
}
|
}
|
||||||
@@ -46,8 +47,9 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'add_ie': ['Bandcamp'],
|
'add_ie': ['Bandcamp'],
|
||||||
'url': 'http://bronyrock.com/track/the-pony-mash',
|
'url': 'http://bronyrock.com/track/the-pony-mash',
|
||||||
'file': '3235767654.mp3',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3235767654',
|
||||||
|
'ext': 'mp3',
|
||||||
'title': 'The Pony Mash',
|
'title': 'The Pony Mash',
|
||||||
'uploader': 'M_Pallante',
|
'uploader': 'M_Pallante',
|
||||||
},
|
},
|
||||||
@@ -73,9 +75,10 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
# https://github.com/rg3/youtube-dl/issues/2253
|
# https://github.com/rg3/youtube-dl/issues/2253
|
||||||
'url': 'http://bcove.me/i6nfkrc3',
|
'url': 'http://bcove.me/i6nfkrc3',
|
||||||
'file': '3101154703001.mp4',
|
|
||||||
'md5': '0ba9446db037002366bab3b3eb30c88c',
|
'md5': '0ba9446db037002366bab3b3eb30c88c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3101154703001',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Still no power',
|
'title': 'Still no power',
|
||||||
'uploader': 'thestar.com',
|
'uploader': 'thestar.com',
|
||||||
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
||||||
@@ -184,6 +187,17 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
|
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# Embeded Ustream video
|
||||||
|
{
|
||||||
|
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
|
||||||
|
'md5': '27b99cdb639c9b12a79bca876a073417',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '45734260',
|
||||||
|
'ext': 'flv',
|
||||||
|
'uploader': 'AU SPA: The NSA and Privacy',
|
||||||
|
'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
|
||||||
|
}
|
||||||
|
},
|
||||||
# nowvideo embed hidden behind percent encoding
|
# nowvideo embed hidden behind percent encoding
|
||||||
{
|
{
|
||||||
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
|
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
|
||||||
@@ -225,6 +239,16 @@ class GenericIE(InfoExtractor):
|
|||||||
'uploader_id': 'rbctv_2012_4',
|
'uploader_id': 'rbctv_2012_4',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Condé Nast embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
||||||
|
'md5': 'ba0dfe966fa007657bd1443ee672db0f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '53501be369702d3275860000',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@@ -471,6 +495,22 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj:
|
if mobj:
|
||||||
return self.url_result(mobj.group(1), 'BlipTV')
|
return self.url_result(mobj.group(1), 'BlipTV')
|
||||||
|
|
||||||
|
# Look for embedded condenast player
|
||||||
|
matches = re.findall(
|
||||||
|
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
|
||||||
|
webpage)
|
||||||
|
if matches:
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': [{
|
||||||
|
'_type': 'url',
|
||||||
|
'ie_key': 'CondeNast',
|
||||||
|
'url': ma,
|
||||||
|
} for ma in matches],
|
||||||
|
'title': video_title,
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
# Look for Bandcamp pages with custom domain
|
# Look for Bandcamp pages with custom domain
|
||||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@@ -500,17 +540,18 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group(1), 'Mpora')
|
return self.url_result(mobj.group(1), 'Mpora')
|
||||||
|
|
||||||
# Look for embedded NovaMov player
|
# Look for embedded NovaMov-based player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
|
r'''(?x)<iframe[^>]+?src=(["\'])
|
||||||
|
(?P<url>http://(?:(?:embed|www)\.)?
|
||||||
|
(?:novamov\.com|
|
||||||
|
nowvideo\.(?:ch|sx|eu|at|ag|co)|
|
||||||
|
videoweed\.(?:es|com)|
|
||||||
|
movshare\.(?:net|sx|ag)|
|
||||||
|
divxstage\.(?:eu|net|ch|co|at|ag))
|
||||||
|
/embed\.php.+?)\1''', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'NovaMov')
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for embedded NowVideo player
|
|
||||||
mobj = re.search(
|
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
|
|
||||||
if mobj is not None:
|
|
||||||
return self.url_result(mobj.group('url'), 'NowVideo')
|
|
||||||
|
|
||||||
# Look for embedded Facebook player
|
# Look for embedded Facebook player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@@ -556,6 +597,12 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'TED')
|
return self.url_result(mobj.group('url'), 'TED')
|
||||||
|
|
||||||
|
# Look for embedded Ustream videos
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Ustream')
|
||||||
|
|
||||||
# Look for embedded arte.tv player
|
# Look for embedded arte.tv player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
|
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
|
||||||
|
@@ -106,7 +106,7 @@ class OneUPIE(IGNIE):
|
|||||||
|
|
||||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://gamevideos.1up.com/video/id/34976',
|
'url': 'http://gamevideos.1up.com/video/id/34976',
|
||||||
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -115,10 +115,7 @@ class OneUPIE(IGNIE):
|
|||||||
'title': 'Sniper Elite V2 - Trailer',
|
'title': 'Sniper Elite V2 - Trailer',
|
||||||
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
||||||
}
|
}
|
||||||
}
|
}]
|
||||||
|
|
||||||
# Override IGN tests
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@@ -11,16 +11,15 @@ from ..utils import (
|
|||||||
|
|
||||||
class InfoQIE(InfoExtractor):
|
class InfoQIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"name": "InfoQ",
|
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
|
||||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
|
||||||
"file": "12-jan-pythonthings.mp4",
|
'info_dict': {
|
||||||
"info_dict": {
|
'id': '12-jan-pythonthings',
|
||||||
"description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
|
'ext': 'mp4',
|
||||||
"title": "A Few of My Favorite [Python] Things",
|
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
|
||||||
},
|
'title': 'A Few of My Favorite [Python] Things',
|
||||||
"params": {
|
|
||||||
"skip_download": True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -30,26 +29,39 @@ class InfoQIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
||||||
|
video_description = self._html_search_meta('description', webpage, 'description')
|
||||||
|
|
||||||
|
# The server URL is hardcoded
|
||||||
|
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
encoded_id = self._search_regex(
|
||||||
|
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
|
||||||
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||||
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
|
playpath = 'mp4:' + real_id
|
||||||
|
|
||||||
# Extract title
|
video_filename = playpath.split('/')[-1]
|
||||||
video_title = self._search_regex(r'contentTitle = "(.*?)";',
|
|
||||||
webpage, 'title')
|
|
||||||
|
|
||||||
# Extract description
|
|
||||||
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
|
|
||||||
video_filename = video_url.split('/')[-1]
|
|
||||||
video_id, extension = video_filename.split('.')
|
video_id, extension = video_filename.split('.')
|
||||||
|
|
||||||
|
http_base = self._search_regex(
|
||||||
|
r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
|
||||||
|
'HTTP base URL')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'url': video_url,
|
||||||
|
'ext': extension,
|
||||||
|
'play_path': playpath,
|
||||||
|
}, {
|
||||||
|
'format_id': 'http',
|
||||||
|
'url': http_base + real_id,
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@@ -1,9 +1,12 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
)
|
)
|
||||||
@@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor):
|
|||||||
/?(?:\#.*)?$
|
/?(?:\#.*)?$
|
||||||
"""
|
"""
|
||||||
_JUSTIN_PAGE_LIMIT = 100
|
_JUSTIN_PAGE_LIMIT = 100
|
||||||
IE_NAME = u'justin.tv'
|
IE_NAME = 'justin.tv'
|
||||||
|
IE_DESC = 'justin.tv and twitch.tv'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
|
'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
|
||||||
u'file': u'296128360.flv',
|
'md5': 'ecaa8a790c22a40770901460af191c9a',
|
||||||
u'md5': u'ecaa8a790c22a40770901460af191c9a',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '296128360',
|
||||||
u"upload_date": u"20110927",
|
'ext': 'flv',
|
||||||
u"uploader_id": 25114803,
|
'upload_date': '20110927',
|
||||||
u"uploader": u"thegamedevhub",
|
'uploader_id': 25114803,
|
||||||
u"title": u"Beginner Series - Scripting With Python Pt.1"
|
'uploader': 'thegamedevhub',
|
||||||
|
'title': 'Beginner Series - Scripting With Python Pt.1'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def report_download_page(self, channel, offset):
|
|
||||||
"""Report attempt to download a single page of videos."""
|
|
||||||
self.to_screen(u'%s: Downloading video information from %d to %d' %
|
|
||||||
(channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
|
|
||||||
|
|
||||||
# Return count of items, list of *valid* items
|
# Return count of items, list of *valid* items
|
||||||
def _parse_page(self, url, video_id):
|
def _parse_page(self, url, video_id):
|
||||||
info_json = self._download_webpage(url, video_id,
|
info_json = self._download_webpage(url, video_id,
|
||||||
u'Downloading video info JSON',
|
'Downloading video info JSON',
|
||||||
u'unable to download video info JSON')
|
'unable to download video info JSON')
|
||||||
|
|
||||||
response = json.loads(info_json)
|
response = json.loads(info_json)
|
||||||
if type(response) != list:
|
if type(response) != list:
|
||||||
error_text = response.get('error', 'unknown error')
|
error_text = response.get('error', 'unknown error')
|
||||||
raise ExtractorError(u'Justin.tv API: %s' % error_text)
|
raise ExtractorError('Justin.tv API: %s' % error_text)
|
||||||
info = []
|
info = []
|
||||||
for clip in response:
|
for clip in response:
|
||||||
video_url = clip['video_file_url']
|
video_url = clip['video_file_url']
|
||||||
@@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor):
|
|||||||
video_id = clip['id']
|
video_id = clip['id']
|
||||||
video_title = clip.get('title', video_id)
|
video_title = clip.get('title', video_id)
|
||||||
info.append({
|
info.append({
|
||||||
'id': video_id,
|
'id': compat_str(video_id),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'uploader': clip.get('channel_name', video_uploader_id),
|
'uploader': clip.get('channel_name', video_uploader_id),
|
||||||
@@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'invalid URL: %s' % url)
|
|
||||||
|
|
||||||
api_base = 'http://api.justin.tv'
|
api_base = 'http://api.justin.tv'
|
||||||
paged = False
|
paged = False
|
||||||
@@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, chapter_id)
|
webpage = self._download_webpage(url, chapter_id)
|
||||||
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
||||||
if not m:
|
if not m:
|
||||||
raise ExtractorError(u'Cannot find archive of a chapter')
|
raise ExtractorError('Cannot find archive of a chapter')
|
||||||
archive_id = m.group(1)
|
archive_id = m.group(1)
|
||||||
|
|
||||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||||
doc = self._download_xml(api, chapter_id,
|
doc = self._download_xml(
|
||||||
note=u'Downloading chapter information',
|
api, chapter_id,
|
||||||
errnote=u'Chapter information download failed')
|
note='Downloading chapter information',
|
||||||
|
errnote='Chapter information download failed')
|
||||||
for a in doc.findall('.//archive'):
|
for a in doc.findall('.//archive'):
|
||||||
if archive_id == a.find('./id').text:
|
if archive_id == a.find('./id').text:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'Could not find chapter in chapter information')
|
raise ExtractorError('Could not find chapter in chapter information')
|
||||||
|
|
||||||
video_url = a.find('./video_file_url').text
|
video_url = a.find('./video_file_url').text
|
||||||
video_ext = video_url.rpartition('.')[2] or u'flv'
|
video_ext = video_url.rpartition('.')[2] or 'flv'
|
||||||
|
|
||||||
chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
||||||
chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
|
chapter_info = self._download_json(
|
||||||
note='Downloading chapter metadata',
|
chapter_api_url, 'c' + chapter_id,
|
||||||
errnote='Download of chapter metadata failed')
|
note='Downloading chapter metadata',
|
||||||
chapter_info = json.loads(chapter_info_json)
|
errnote='Download of chapter metadata failed')
|
||||||
|
|
||||||
bracket_start = int(doc.find('.//bracket_start').text)
|
bracket_start = int(doc.find('.//bracket_start').text)
|
||||||
bracket_end = int(doc.find('.//bracket_end').text)
|
bracket_end = int(doc.find('.//bracket_end').text)
|
||||||
|
|
||||||
# TODO determine start (and probably fix up file)
|
# TODO determine start (and probably fix up file)
|
||||||
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
||||||
#video_url += u'?start=' + TODO:start_timestamp
|
#video_url += '?start=' + TODO:start_timestamp
|
||||||
# bracket_start is 13290, but we want 51670615
|
# bracket_start is 13290, but we want 51670615
|
||||||
self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
|
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
|
||||||
u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': u'c' + chapter_id,
|
'id': 'c' + chapter_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': video_ext,
|
'ext': video_ext,
|
||||||
'title': chapter_info['title'],
|
'title': chapter_info['title'],
|
||||||
@@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor):
|
|||||||
'uploader': chapter_info['channel']['display_name'],
|
'uploader': chapter_info['channel']['display_name'],
|
||||||
'uploader_id': chapter_info['channel']['name'],
|
'uploader_id': chapter_info['channel']['name'],
|
||||||
}
|
}
|
||||||
return [info]
|
return info
|
||||||
else:
|
else:
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
api = api_base + '/broadcast/by_archive/%s.json' % video_id
|
api = api_base + '/broadcast/by_archive/%s.json' % video_id
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
entries = []
|
||||||
|
|
||||||
info = []
|
|
||||||
offset = 0
|
offset = 0
|
||||||
limit = self._JUSTIN_PAGE_LIMIT
|
limit = self._JUSTIN_PAGE_LIMIT
|
||||||
while True:
|
while True:
|
||||||
@@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor):
|
|||||||
self.report_download_page(video_id, offset)
|
self.report_download_page(video_id, offset)
|
||||||
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
||||||
page_count, page_info = self._parse_page(page_url, video_id)
|
page_count, page_info = self._parse_page(page_url, video_id)
|
||||||
info.extend(page_info)
|
entries.extend(page_info)
|
||||||
if not paged or page_count != limit:
|
if not paged or page_count != limit:
|
||||||
break
|
break
|
||||||
offset += limit
|
offset += limit
|
||||||
return info
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': video_id,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -11,22 +13,22 @@ from ..aes import (
|
|||||||
aes_decrypt_text
|
aes_decrypt_text
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class KeezMoviesIE(InfoExtractor):
|
class KeezMoviesIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
||||||
u'file': u'1214711.mp4',
|
'file': '1214711.mp4',
|
||||||
u'md5': u'6e297b7e789329923fcf83abb67c9289',
|
'md5': '6e297b7e789329923fcf83abb67c9289',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Petite Asian Lady Mai Playing In Bathtub",
|
'title': 'Petite Asian Lady Mai Playing In Bathtub',
|
||||||
u"age_limit": 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
url = 'http://www.' + mobj.group('url')
|
|
||||||
|
|
||||||
req = compat_urllib_request.Request(url)
|
req = compat_urllib_request.Request(url)
|
||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
@@ -38,10 +40,10 @@ class KeezMoviesIE(InfoExtractor):
|
|||||||
embedded_url = mobj.group(1)
|
embedded_url = mobj.group(1)
|
||||||
return self.url_result(embedded_url)
|
return self.url_result(embedded_url)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
|
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url'))
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, 'video_url'))
|
||||||
if webpage.find('encrypted=true')!=-1:
|
if 'encrypted=true' in webpage:
|
||||||
password = self._html_search_regex(r'video_title=(.+?)&', webpage, u'password')
|
password = self._html_search_regex(r'video_title=(.+?)&', webpage, 'password')
|
||||||
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
|
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
extension = os.path.splitext(path)[1][1:]
|
||||||
|
@@ -1,15 +1,18 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MDRIE(InfoExtractor):
|
class MDRIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
|
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
|
||||||
|
|
||||||
# No tests, MDR regularily deletes its videos
|
# No tests, MDR regularily deletes its videos
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.mdr.de/fakt/video189002.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
@@ -19,9 +22,9 @@ class MDRIE(InfoExtractor):
|
|||||||
# determine title and media streams from webpage
|
# determine title and media streams from webpage
|
||||||
html = self._download_webpage(url, video_id)
|
html = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
|
title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
|
||||||
xmlurl = self._search_regex(
|
xmlurl = self._search_regex(
|
||||||
r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
|
r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
|
||||||
|
|
||||||
doc = self._download_xml(domain + xmlurl, video_id)
|
doc = self._download_xml(domain + xmlurl, video_id)
|
||||||
formats = []
|
formats = []
|
||||||
@@ -41,7 +44,7 @@ class MDRIE(InfoExtractor):
|
|||||||
if vbr_el is None:
|
if vbr_el is None:
|
||||||
format.update({
|
format.update({
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'format_id': u'%s-%d' % (media_type, abr),
|
'format_id': '%s-%d' % (media_type, abr),
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
vbr = int(vbr_el.text) // 1000
|
vbr = int(vbr_el.text) // 1000
|
||||||
@@ -49,12 +52,9 @@ class MDRIE(InfoExtractor):
|
|||||||
'vbr': vbr,
|
'vbr': vbr,
|
||||||
'width': int(a.find('frameWidth').text),
|
'width': int(a.find('frameWidth').text),
|
||||||
'height': int(a.find('frameHeight').text),
|
'height': int(a.find('frameHeight').text),
|
||||||
'format_id': u'%s-%d' % (media_type, vbr),
|
'format_id': '%s-%d' % (media_type, vbr),
|
||||||
})
|
})
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
if not formats:
|
|
||||||
raise ExtractorError(u'Could not find any valid formats')
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -1,22 +1,14 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
compat_parse_qs,
|
|
||||||
compat_str,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MorningstarIE(InfoExtractor):
|
class MorningstarIE(InfoExtractor):
|
||||||
IE_DESC = 'morningstar.com'
|
IE_DESC = 'morningstar.com'
|
||||||
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/cover/videocenter\.aspx\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
|
||||||
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
|
||||||
|
@@ -44,7 +44,7 @@ class MotorsportIE(InfoExtractor):
|
|||||||
e = compat_str(int(time.time()) + 24 * 60 * 60)
|
e = compat_str(int(time.time()) + 24 * 60 * 60)
|
||||||
base_video_url = params['location'] + '?e=' + e
|
base_video_url = params['location'] + '?e=' + e
|
||||||
s = 'h3hg713fh32'
|
s = 'h3hg713fh32'
|
||||||
h = hashlib.md5(s + base_video_url).hexdigest()
|
h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
|
||||||
video_url = base_video_url + '&h=' + h
|
video_url = base_video_url + '&h=' + h
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
|
27
youtube_dl/extractor/movshare.py
Normal file
27
youtube_dl/extractor/movshare.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .novamov import NovaMovIE
|
||||||
|
|
||||||
|
|
||||||
|
class MovShareIE(NovaMovIE):
|
||||||
|
IE_NAME = 'movshare'
|
||||||
|
IE_DESC = 'MovShare'
|
||||||
|
|
||||||
|
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
|
||||||
|
|
||||||
|
_HOST = 'www.movshare.net'
|
||||||
|
|
||||||
|
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||||
|
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
|
||||||
|
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.movshare.net/video/559e28be54d96',
|
||||||
|
'md5': 'abd31a2132947262c50429e1d16c1bfd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '559e28be54d96',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'dissapeared image',
|
||||||
|
'description': 'optical illusion dissapeared image magic illusion',
|
||||||
|
}
|
||||||
|
}
|
@@ -4,9 +4,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import int_or_none
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MporaIE(InfoExtractor):
|
class MporaIE(InfoExtractor):
|
||||||
@@ -20,7 +18,7 @@ class MporaIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Katy Curd - Winter in the Forest',
|
'title': 'Katy Curd - Winter in the Forest',
|
||||||
'duration': 416,
|
'duration': 416,
|
||||||
'uploader': 'petenewman',
|
'uploader': 'Peter Newman Media',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,15 +1,22 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import str_to_int
|
||||||
|
|
||||||
|
|
||||||
class NineGagIE(InfoExtractor):
|
class NineGagIE(InfoExtractor):
|
||||||
IE_NAME = '9gag'
|
IE_NAME = '9gag'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
|
_VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
|
||||||
|
(?:
|
||||||
|
v/(?P<numid>[0-9]+)|
|
||||||
|
p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
"url": "http://9gag.tv/v/1912",
|
"url": "http://9gag.tv/v/1912",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
"id": "1912",
|
"id": "1912",
|
||||||
@@ -20,34 +27,42 @@ class NineGagIE(InfoExtractor):
|
|||||||
"thumbnail": "re:^https?://",
|
"thumbnail": "re:^https?://",
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube']
|
'add_ie': ['Youtube']
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'KklwM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'alternate-banned-opening-scene-of-gravity',
|
||||||
|
"description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
|
||||||
|
'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('numid') or mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
youtube_id = self._html_search_regex(
|
post_view = json.loads(self._html_search_regex(
|
||||||
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
|
r'var postView = new app\.PostView\({ post: ({.+?}),', webpage, 'post view'))
|
||||||
webpage, 'video ID')
|
|
||||||
description = self._html_search_regex(
|
youtube_id = post_view['videoExternalId']
|
||||||
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
|
title = post_view['title']
|
||||||
'description', fatal=False)
|
description = post_view['description']
|
||||||
view_count_str = self._html_search_regex(
|
view_count = str_to_int(post_view['externalView'])
|
||||||
r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
|
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||||
fatal=False)
|
|
||||||
view_count = (
|
|
||||||
None if view_count_str is None
|
|
||||||
else int(view_count_str.replace(',', '')))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': youtube_id,
|
'url': youtube_id,
|
||||||
'ie_key': 'Youtube',
|
'ie_key': 'Youtube',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._og_search_title(webpage),
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
105
youtube_dl/extractor/noco.py
Normal file
105
youtube_dl/extractor/noco.py
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NocoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
|
||||||
|
'md5': '0a993f0058ddbcd902630b2047ef710e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11538',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ami Ami Idol - Hello! France',
|
||||||
|
'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
|
||||||
|
'upload_date': '20140412',
|
||||||
|
'uploader': 'Nolife',
|
||||||
|
'uploader_id': 'NOL',
|
||||||
|
'duration': 2851.2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
medias = self._download_json(
|
||||||
|
'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for fmt in medias['fr']['video_list']['default']['quality_list']:
|
||||||
|
format_id = fmt['quality_key']
|
||||||
|
|
||||||
|
file = self._download_json(
|
||||||
|
'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
|
||||||
|
video_id, 'Downloading %s video JSON' % format_id)
|
||||||
|
|
||||||
|
file_url = file['file']
|
||||||
|
if not file_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if file_url == 'forbidden':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s - %s' % (
|
||||||
|
self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': file_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'width': fmt['res_width'],
|
||||||
|
'height': fmt['res_lines'],
|
||||||
|
'abr': fmt['audiobitrate'],
|
||||||
|
'vbr': fmt['videobitrate'],
|
||||||
|
'filesize': fmt['filesize'],
|
||||||
|
'format_note': fmt['quality_name'],
|
||||||
|
'preference': fmt['priority'],
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
show = self._download_json(
|
||||||
|
'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
|
||||||
|
|
||||||
|
upload_date = unified_strdate(show['indexed'])
|
||||||
|
uploader = show['partner_name']
|
||||||
|
uploader_id = show['partner_key']
|
||||||
|
duration = show['duration_ms'] / 1000.0
|
||||||
|
thumbnail = show['screenshot']
|
||||||
|
|
||||||
|
episode = show.get('show_TT') or show.get('show_OT')
|
||||||
|
family = show.get('family_TT') or show.get('family_OT')
|
||||||
|
episode_number = show.get('episode_number')
|
||||||
|
|
||||||
|
title = ''
|
||||||
|
if family:
|
||||||
|
title += family
|
||||||
|
if episode_number:
|
||||||
|
title += ' #' + compat_str(episode_number)
|
||||||
|
if episode:
|
||||||
|
title += ' - ' + episode
|
||||||
|
|
||||||
|
description = show.get('show_resume') or show.get('family_resume')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor):
|
|||||||
IE_NAME = 'novamov'
|
IE_NAME = 'novamov'
|
||||||
IE_DESC = 'NovaMov'
|
IE_DESC = 'NovaMov'
|
||||||
|
|
||||||
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
|
_VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
|
||||||
|
_VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
|
||||||
|
|
||||||
_HOST = 'www.novamov.com'
|
_HOST = 'www.novamov.com'
|
||||||
|
|
||||||
@@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
page = self._download_webpage(
|
page = self._download_webpage(
|
||||||
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
|
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
|
||||||
|
|
||||||
if re.search(self._FILE_DELETED_REGEX, page) is not None:
|
if re.search(self._FILE_DELETED_REGEX, page) is not None:
|
||||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
|
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
|
||||||
|
|
||||||
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
|
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
|
||||||
|
|
||||||
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
|
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
|
||||||
|
|
||||||
api_response = self._download_webpage(
|
api_response = self._download_webpage(
|
||||||
|
@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
|
|||||||
IE_NAME = 'nowvideo'
|
IE_NAME = 'nowvideo'
|
||||||
IE_DESC = 'NowVideo'
|
IE_DESC = 'NowVideo'
|
||||||
|
|
||||||
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
|
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}
|
||||||
|
|
||||||
_HOST = 'www.nowvideo.ch'
|
_HOST = 'www.nowvideo.ch'
|
||||||
|
|
||||||
|
@@ -59,11 +59,11 @@ class NTVIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '750783',
|
'id': '758100',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Остросюжетный фильм «Кома» 4 апреля вечером на НТВ',
|
'title': 'Остросюжетный фильм «Кома»',
|
||||||
'description': 'Остросюжетный фильм «Кома» 4 апреля вечером на НТВ',
|
'description': 'Остросюжетный фильм «Кома»',
|
||||||
'duration': 28,
|
'duration': 5592,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
|
@@ -6,22 +6,36 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class PodomaticIE(InfoExtractor):
|
class PodomaticIE(InfoExtractor):
|
||||||
IE_NAME = 'podomatic'
|
IE_NAME = 'podomatic'
|
||||||
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
"url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
{
|
||||||
"file": "2009-01-02T16_03_35-08_00.mp3",
|
'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
|
||||||
"md5": "84bb855fcf3429e6bf72460e1eed782d",
|
'md5': '84bb855fcf3429e6bf72460e1eed782d',
|
||||||
"info_dict": {
|
'info_dict': {
|
||||||
"uploader": "Science Teaching Tips",
|
'id': '2009-01-02T16_03_35-08_00',
|
||||||
"uploader_id": "scienceteachingtips",
|
'ext': 'mp3',
|
||||||
"title": "64. When the Moon Hits Your Eye",
|
'uploader': 'Science Teaching Tips',
|
||||||
"duration": 446,
|
'uploader_id': 'scienceteachingtips',
|
||||||
}
|
'title': '64. When the Moon Hits Your Eye',
|
||||||
}
|
'duration': 446,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
|
||||||
|
'md5': 'd2cf443931b6148e27638650e2638297',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2013-11-15T16_31_21-08_00',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'uploader': 'Ostbahnhof / Techno Mix',
|
||||||
|
'uploader_id': 'ostbahnhof',
|
||||||
|
'title': 'Einunddreizig',
|
||||||
|
'duration': 3799,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -32,10 +46,12 @@ class PodomaticIE(InfoExtractor):
|
|||||||
'?permalink=true&rtmp=0') %
|
'?permalink=true&rtmp=0') %
|
||||||
(mobj.group('proto'), channel, video_id))
|
(mobj.group('proto'), channel, video_id))
|
||||||
data_json = self._download_webpage(
|
data_json = self._download_webpage(
|
||||||
json_url, video_id, note=u'Downloading video info')
|
json_url, video_id, 'Downloading video info')
|
||||||
data = json.loads(data_json)
|
data = json.loads(data_json)
|
||||||
|
|
||||||
video_url = data['downloadLink']
|
video_url = data['downloadLink']
|
||||||
|
if not video_url:
|
||||||
|
video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation'])
|
||||||
uploader = data['podcast']
|
uploader = data['podcast']
|
||||||
title = data['title']
|
title = data['title']
|
||||||
thumbnail = data['imageLocation']
|
thumbnail = data['imageLocation']
|
||||||
|
@@ -1,44 +1,81 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import compat_urllib_parse
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class PornHdIE(InfoExtractor):
|
class PornHdIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<video_id>[0-9]+)/(?P<video_title>.+)'
|
_VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||||
'file': '1962.flv',
|
'md5': '956b8ca569f7f4d8ec563e2c41598441',
|
||||||
'md5': '35272469887dca97abd30abecc6cdf75',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "sierra-day-gets-his-cum-all-over-herself-hd-porn-video",
|
'id': '1962',
|
||||||
"age_limit": 18,
|
'ext': 'mp4',
|
||||||
|
'title': 'Sierra loves doing laundry',
|
||||||
|
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
video_id = mobj.group('video_id')
|
|
||||||
video_title = mobj.group('video_title')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
next_url = self._html_search_regex(
|
title = self._og_search_title(webpage)
|
||||||
r'&hd=(http.+?)&', webpage, 'video URL')
|
TITLE_SUFFIX = ' porn HD Video | PornHD.com '
|
||||||
next_url = compat_urllib_parse.unquote(next_url)
|
if title.endswith(TITLE_SUFFIX):
|
||||||
|
title = title[:-len(TITLE_SUFFIX)]
|
||||||
|
|
||||||
video_url = self._download_webpage(
|
description = self._html_search_regex(
|
||||||
next_url, video_id, note='Retrieving video URL',
|
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
||||||
errnote='Could not retrieve video URL')
|
view_count = int_or_none(self._html_search_regex(
|
||||||
age_limit = 18
|
r'(\d+) views </span>', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': format_url,
|
||||||
|
'ext': format.lower(),
|
||||||
|
'format_id': '%s-%s' % (format.lower(), quality.lower()),
|
||||||
|
'quality': 1 if quality.lower() == 'high' else 0,
|
||||||
|
} for format, quality, format_url in re.findall(
|
||||||
|
r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
|
||||||
|
]
|
||||||
|
|
||||||
|
mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
|
||||||
|
if mobj:
|
||||||
|
flashvars = json.loads(mobj.group('flashvars'))
|
||||||
|
formats.extend([
|
||||||
|
{
|
||||||
|
'url': flashvars['hashlink'].replace('?noProxy=1', ''),
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': 'flv-low',
|
||||||
|
'quality': 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': flashvars['hd'].replace('?noProxy=1', ''),
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': 'flv-high',
|
||||||
|
'quality': 1,
|
||||||
|
}
|
||||||
|
])
|
||||||
|
thumbnail = flashvars['urlWallpaper']
|
||||||
|
else:
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'title': title,
|
||||||
'ext': 'flv',
|
'description': description,
|
||||||
'title': video_title,
|
'thumbnail': thumbnail,
|
||||||
'age_limit': age_limit,
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
@@ -160,6 +160,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||||||
_CLIPID_REGEXES = [
|
_CLIPID_REGEXES = [
|
||||||
r'"clip_id"\s*:\s+"(\d+)"',
|
r'"clip_id"\s*:\s+"(\d+)"',
|
||||||
r'clipid: "(\d+)"',
|
r'clipid: "(\d+)"',
|
||||||
|
r'clipId=(\d+)',
|
||||||
]
|
]
|
||||||
_TITLE_REGEXES = [
|
_TITLE_REGEXES = [
|
||||||
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
||||||
|
@@ -46,7 +46,8 @@ class PyvideoIE(InfoExtractor):
|
|||||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
|
r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
|
||||||
|
webpage, 'title', flags=re.DOTALL)
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
||||||
webpage, 'video url', flags=re.DOTALL)
|
webpage, 'video url', flags=re.DOTALL)
|
||||||
|
@@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor):
|
|||||||
'md5': '03af18b73a07b4088753930db7a34add',
|
'md5': '03af18b73a07b4088753930db7a34add',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "Luati-le Banii sez 4 ep 1",
|
"title": "Luati-le Banii sez 4 ep 1",
|
||||||
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
"description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -9,46 +9,136 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RTSIE(InfoExtractor):
|
class RTSIE(InfoExtractor):
|
||||||
IE_DESC = 'RTS.ch'
|
IE_DESC = 'RTS.ch'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
|
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
{
|
||||||
'md5': '753b877968ad8afaeddccc374d4256a5',
|
'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
|
||||||
'info_dict': {
|
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||||
'id': '3449373',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '3449373',
|
||||||
'duration': 1488,
|
'ext': 'mp4',
|
||||||
'title': 'Les Enfants Terribles',
|
'duration': 1488,
|
||||||
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
|
'title': 'Les Enfants Terribles',
|
||||||
'uploader': 'Divers',
|
'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
|
||||||
'upload_date': '19680921',
|
'uploader': 'Divers',
|
||||||
'timestamp': -40280400,
|
'upload_date': '19680921',
|
||||||
'thumbnail': 're:^https?://.*\.image'
|
'timestamp': -40280400,
|
||||||
|
'thumbnail': 're:^https?://.*\.image'
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
{
|
||||||
|
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
|
||||||
|
'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5624067',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 3720,
|
||||||
|
'title': 'Les yeux dans les cieux - Mon homard au Canada',
|
||||||
|
'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
|
||||||
|
'uploader': 'Passe-moi les jumelles',
|
||||||
|
'upload_date': '20140404',
|
||||||
|
'timestamp': 1396635300,
|
||||||
|
'thumbnail': 're:^https?://.*\.image'
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html',
|
||||||
|
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5745975',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 48,
|
||||||
|
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
|
||||||
|
'description': 'Hockey - Playoff',
|
||||||
|
'uploader': 'Hockey',
|
||||||
|
'upload_date': '20140403',
|
||||||
|
'timestamp': 1396556882,
|
||||||
|
'thumbnail': 're:^https?://.*\.image'
|
||||||
|
},
|
||||||
|
'skip': 'Blocked outside Switzerland',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html',
|
||||||
|
'md5': '9bb06503773c07ce83d3cbd793cebb91',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5745356',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 33,
|
||||||
|
'title': 'Londres cachée par un épais smog',
|
||||||
|
'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.',
|
||||||
|
'uploader': 'Le Journal en continu',
|
||||||
|
'upload_date': '20140403',
|
||||||
|
'timestamp': 1396537322,
|
||||||
|
'thumbnail': 're:^https?://.*\.image'
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html',
|
||||||
|
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5706148',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 123,
|
||||||
|
'title': '"Urban Hippie", de Damien Krisl',
|
||||||
|
'description': 'Des Hippies super glam.',
|
||||||
|
'upload_date': '20140403',
|
||||||
|
'timestamp': 1396551600,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('id')
|
video_id = m.group('id')
|
||||||
|
|
||||||
all_info = self._download_json(
|
def download_json(internal_id):
|
||||||
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
|
return self._download_json(
|
||||||
info = all_info['video']['JSONinfo']
|
'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
all_info = download_json(video_id)
|
||||||
|
|
||||||
|
# video_id extracted out of URL is not always a real id
|
||||||
|
if 'video' not in all_info and 'audio' not in all_info:
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
internal_id = self._html_search_regex(
|
||||||
|
r'<(?:video|audio) data-id="([0-9]+)"', page,
|
||||||
|
'internal video id')
|
||||||
|
all_info = download_json(internal_id)
|
||||||
|
|
||||||
|
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']
|
||||||
|
|
||||||
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
|
upload_timestamp = parse_iso8601(info.get('broadcast_date'))
|
||||||
duration = parse_duration(info.get('duration'))
|
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
|
||||||
|
if isinstance(duration, compat_str):
|
||||||
|
duration = parse_duration(duration)
|
||||||
|
view_count = info.get('plays')
|
||||||
thumbnail = unescapeHTML(info.get('preview_image_url'))
|
thumbnail = unescapeHTML(info.get('preview_image_url'))
|
||||||
|
|
||||||
|
def extract_bitrate(url):
|
||||||
|
return int_or_none(self._search_regex(
|
||||||
|
r'-([0-9]+)k\.', url, 'bitrate', default=None))
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': fid,
|
'format_id': fid,
|
||||||
'url': furl,
|
'url': furl,
|
||||||
'tbr': int_or_none(self._search_regex(
|
'tbr': extract_bitrate(furl),
|
||||||
r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
|
|
||||||
} for fid, furl in info['streams'].items()]
|
} for fid, furl in info['streams'].items()]
|
||||||
|
|
||||||
|
if 'media' in info:
|
||||||
|
formats.extend([{
|
||||||
|
'format_id': '%s-%sk' % (media['ext'], media['rate']),
|
||||||
|
'url': 'http://download-video.rts.ch/%s' % media['url'],
|
||||||
|
'tbr': media['rate'] or extract_bitrate(media['url']),
|
||||||
|
} for media in info['media'] if media.get('rate')])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -57,6 +147,7 @@ class RTSIE(InfoExtractor):
|
|||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'description': info.get('intro'),
|
'description': info.get('intro'),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
'uploader': info.get('programName'),
|
'uploader': info.get('programName'),
|
||||||
'timestamp': upload_timestamp,
|
'timestamp': upload_timestamp,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
84
youtube_dl/extractor/rtve.py
Normal file
84
youtube_dl/extractor/rtve.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
struct_unpack,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RTVEALaCartaIE(InfoExtractor):
|
||||||
|
IE_NAME = 'rtve.es:alacarta'
|
||||||
|
IE_DESC = 'RTVE a la carta'
|
||||||
|
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||||
|
'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2491869',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _decrypt_url(self, png):
|
||||||
|
encrypted_data = base64.b64decode(png)
|
||||||
|
text_index = encrypted_data.find(b'tEXt')
|
||||||
|
text_chunk = encrypted_data[text_index-4:]
|
||||||
|
length = struct_unpack('!I', text_chunk[:4])[0]
|
||||||
|
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
||||||
|
data = bytearray(text_chunk[8:8+length])
|
||||||
|
data = [chr(b) for b in data if b != 0]
|
||||||
|
hash_index = data.index('#')
|
||||||
|
alphabet_data = data[:hash_index]
|
||||||
|
url_data = data[hash_index+1:]
|
||||||
|
|
||||||
|
alphabet = []
|
||||||
|
e = 0
|
||||||
|
d = 0
|
||||||
|
for l in alphabet_data:
|
||||||
|
if d == 0:
|
||||||
|
alphabet.append(l)
|
||||||
|
d = e = (e + 1) % 4
|
||||||
|
else:
|
||||||
|
d -= 1
|
||||||
|
url = ''
|
||||||
|
f = 0
|
||||||
|
e = 3
|
||||||
|
b = 1
|
||||||
|
for letter in url_data:
|
||||||
|
if f == 0:
|
||||||
|
l = int(letter)*10
|
||||||
|
f = 1
|
||||||
|
else:
|
||||||
|
if e == 0:
|
||||||
|
l += int(letter)
|
||||||
|
url += alphabet[l]
|
||||||
|
e = (b + 3) % 4
|
||||||
|
f = 0
|
||||||
|
b += 1
|
||||||
|
else:
|
||||||
|
e -= 1
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info = self._download_json(
|
||||||
|
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||||
|
video_id)['page']['items'][0]
|
||||||
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
||||||
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
|
video_url = self._decrypt_url(png)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info['title'],
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': info['image'],
|
||||||
|
}
|
@@ -43,13 +43,14 @@ class RutubeIE(InfoExtractor):
|
|||||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
trackinfo = self._download_json(
|
|
||||||
'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
|
||||||
video_id, 'Downloading trackinfo JSON')
|
|
||||||
|
|
||||||
# Some videos don't have the author field
|
# Some videos don't have the author field
|
||||||
author = trackinfo.get('author') or {}
|
author = video.get('author') or {}
|
||||||
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
|
||||||
|
options = self._download_json(
|
||||||
|
'http://rutube.ru/api/play/options/%s/?format=json' %video_id,
|
||||||
|
video_id, 'Downloading options JSON')
|
||||||
|
|
||||||
|
m3u8_url = options['video_balancer'].get('m3u8')
|
||||||
if m3u8_url is None:
|
if m3u8_url is None:
|
||||||
raise ExtractorError('Couldn\'t find m3u8 manifest url')
|
raise ExtractorError('Couldn\'t find m3u8 manifest url')
|
||||||
|
|
||||||
|
@@ -39,7 +39,8 @@ class SlideshareIE(InfoExtractor):
|
|||||||
ext = info['jsplayer']['video_extension']
|
ext = info['jsplayer']['video_extension']
|
||||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
|
r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -8,78 +10,114 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class SteamIE(InfoExtractor):
|
class SteamIE(InfoExtractor):
|
||||||
_VALID_URL = r"""http://store\.steampowered\.com/
|
_VALID_URL = r"""(?x)
|
||||||
(agecheck/)?
|
https?://store\.steampowered\.com/
|
||||||
(?P<urltype>video|app)/ #If the page is only for videos or for a game
|
(agecheck/)?
|
||||||
(?P<gameID>\d+)/?
|
(?P<urltype>video|app)/ #If the page is only for videos or for a game
|
||||||
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
|
(?P<gameID>\d+)/?
|
||||||
"""
|
(?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
|
||||||
|
|
|
||||||
|
https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
|
||||||
|
"""
|
||||||
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
|
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
|
||||||
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
|
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u"url": u"http://store.steampowered.com/video/105600/",
|
"url": "http://store.steampowered.com/video/105600/",
|
||||||
u"playlist": [
|
"playlist": [
|
||||||
{
|
{
|
||||||
u"file": u"81300.flv",
|
"md5": "f870007cee7065d7c76b88f0a45ecc07",
|
||||||
u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
|
"info_dict": {
|
||||||
u"info_dict": {
|
'id': '81300',
|
||||||
u"title": u"Terraria 1.1 Trailer",
|
'ext': 'flv',
|
||||||
u'playlist_index': 1,
|
"title": "Terraria 1.1 Trailer",
|
||||||
|
'playlist_index': 1,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"80859.flv",
|
"md5": "61aaf31a5c5c3041afb58fb83cbb5751",
|
||||||
u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
|
"info_dict": {
|
||||||
u"info_dict": {
|
'id': '80859',
|
||||||
u"title": u"Terraria Trailer",
|
'ext': 'flv',
|
||||||
u'playlist_index': 2,
|
"title": "Terraria Trailer",
|
||||||
|
'playlist_index': 2,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
}
|
'params': {
|
||||||
|
'playlistend': 2,
|
||||||
|
}
|
||||||
@classmethod
|
}, {
|
||||||
def suitable(cls, url):
|
'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
'info_dict': {
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
'id': 'WB5DvDOOvAY',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20140329',
|
||||||
|
'title': 'FRONTIERS - Final Greenlight Trailer',
|
||||||
|
'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
|
||||||
|
'uploader': 'AAD Productions',
|
||||||
|
'uploader_id': 'AtomicAgeDogGames',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
m = re.match(self._VALID_URL, url)
|
||||||
gameID = m.group('gameID')
|
fileID = m.group('fileID')
|
||||||
|
if fileID:
|
||||||
videourl = self._VIDEO_PAGE_TEMPLATE % gameID
|
videourl = url
|
||||||
webpage = self._download_webpage(videourl, gameID)
|
playlist_id = fileID
|
||||||
|
else:
|
||||||
|
gameID = m.group('gameID')
|
||||||
|
playlist_id = gameID
|
||||||
|
videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
|
||||||
|
webpage = self._download_webpage(videourl, playlist_id)
|
||||||
|
|
||||||
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
|
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
|
||||||
videourl = self._AGECHECK_TEMPLATE % gameID
|
videourl = self._AGECHECK_TEMPLATE % playlist_id
|
||||||
self.report_age_confirmation()
|
self.report_age_confirmation()
|
||||||
webpage = self._download_webpage(videourl, gameID)
|
webpage = self._download_webpage(videourl, playlist_id)
|
||||||
|
|
||||||
self.report_extraction(gameID)
|
if fileID:
|
||||||
game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
|
playlist_title = self._html_search_regex(
|
||||||
webpage, 'game title')
|
r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
|
||||||
|
mweb = re.finditer(r'''(?x)
|
||||||
|
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
|
||||||
|
YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
|
||||||
|
''', webpage)
|
||||||
|
videos = [{
|
||||||
|
'_type': 'url',
|
||||||
|
'url': vid.group('youtube_id'),
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
} for vid in mweb]
|
||||||
|
else:
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
|
||||||
|
|
||||||
urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
|
mweb = re.finditer(r'''(?x)
|
||||||
mweb = re.finditer(urlRE, webpage)
|
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
|
||||||
namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
|
FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
|
||||||
titles = re.finditer(namesRE, webpage)
|
(,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
|
||||||
thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
|
''', webpage)
|
||||||
thumbs = re.finditer(thumbsRE, webpage)
|
titles = re.finditer(
|
||||||
videos = []
|
r'<span class="title">(?P<videoName>.+?)</span>', webpage)
|
||||||
for vid,vtitle,thumb in zip(mweb,titles,thumbs):
|
thumbs = re.finditer(
|
||||||
video_id = vid.group('videoID')
|
r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
|
||||||
title = vtitle.group('videoName')
|
videos = []
|
||||||
video_url = vid.group('videoURL')
|
|
||||||
video_thumb = thumb.group('thumbnail')
|
for vid, vtitle, thumb in zip(mweb, titles, thumbs):
|
||||||
if not video_url:
|
video_id = vid.group('videoID')
|
||||||
raise ExtractorError(u'Cannot find video url for %s' % video_id)
|
title = vtitle.group('videoName')
|
||||||
info = {
|
video_url = vid.group('videoURL')
|
||||||
'id':video_id,
|
video_thumb = thumb.group('thumbnail')
|
||||||
'url':video_url,
|
if not video_url:
|
||||||
'ext': 'flv',
|
raise ExtractorError('Cannot find video url for %s' % video_id)
|
||||||
'title': unescapeHTML(title),
|
videos.append({
|
||||||
'thumbnail': video_thumb
|
'id': video_id,
|
||||||
}
|
'url': video_url,
|
||||||
videos.append(info)
|
'ext': 'flv',
|
||||||
return [self.playlist_result(videos, gameID, game_title)]
|
'title': unescapeHTML(title),
|
||||||
|
'thumbnail': video_thumb
|
||||||
|
})
|
||||||
|
if not videos:
|
||||||
|
raise ExtractorError('Could not find any videos')
|
||||||
|
|
||||||
|
return self.playlist_result(videos, playlist_id, playlist_title)
|
||||||
|
@@ -3,14 +3,21 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TeamcocoIE(InfoExtractor):
|
class TeamcocoIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
|
_VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
||||||
|
'file': '80187.mp4',
|
||||||
|
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
||||||
|
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||||
'file': '19705.mp4',
|
'file': '19705.mp4',
|
||||||
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
||||||
@@ -19,22 +26,23 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
url_title = mobj.group('url_title')
|
|
||||||
webpage = self._download_webpage(url, url_title)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(
|
display_id = mobj.group('display_id')
|
||||||
r'<article class="video" data-id="(\d+?)"',
|
webpage = self._download_webpage(url, display_id)
|
||||||
webpage, 'video id')
|
|
||||||
|
video_id = mobj.group("video_id")
|
||||||
self.report_extraction(video_id)
|
if not video_id:
|
||||||
|
video_id = self._html_search_regex(
|
||||||
|
r'<article class="video" data-id="(\d+?)"',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
|
data = self._download_xml(
|
||||||
|
data_url, display_id, 'Downloading data webpage')
|
||||||
|
|
||||||
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
||||||
formats = []
|
formats = []
|
||||||
@@ -69,6 +77,7 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
@@ -37,6 +37,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'consciousness, but that half the time our brains are '
|
'consciousness, but that half the time our brains are '
|
||||||
'actively fooling us.'),
|
'actively fooling us.'),
|
||||||
'uploader': 'Dan Dennett',
|
'uploader': 'Dan Dennett',
|
||||||
|
'width': 854,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||||
@@ -48,12 +49,25 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.+\.jpg',
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1972',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Be passionate. Be courageous. Be your best.',
|
||||||
|
'uploader': 'Gabby Giffords and Mark Kelly',
|
||||||
|
'description': 'md5:d89e1d8ebafdac8e55df4c219ecdbfe9',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMATS_PREFERENCE = {
|
_NATIVE_FORMATS = {
|
||||||
'low': 1,
|
'low': {'preference': 1, 'width': 320, 'height': 180},
|
||||||
'medium': 2,
|
'medium': {'preference': 2, 'width': 512, 'height': 288},
|
||||||
'high': 3,
|
'high': {'preference': 3, 'width': 854, 'height': 480},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_info(self, webpage):
|
def _extract_info(self, webpage):
|
||||||
@@ -98,12 +112,26 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
talk_info = self._extract_info(webpage)['talks'][0]
|
talk_info = self._extract_info(webpage)['talks'][0]
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'ext': 'mp4',
|
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'format': format_id,
|
'format': format_id,
|
||||||
'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
|
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
|
||||||
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
|
if formats:
|
||||||
|
for f in formats:
|
||||||
|
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
||||||
|
if finfo:
|
||||||
|
f.update(finfo)
|
||||||
|
else:
|
||||||
|
# Use rtmp downloads
|
||||||
|
formats = [{
|
||||||
|
'format_id': f['name'],
|
||||||
|
'url': talk_info['streamer'],
|
||||||
|
'play_path': f['file'],
|
||||||
|
'ext': 'flv',
|
||||||
|
'width': f['width'],
|
||||||
|
'height': f['height'],
|
||||||
|
'tbr': f['bitrate'],
|
||||||
|
} for f in talk_info['resources']['rtmp']]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
video_id = compat_str(talk_info['id'])
|
video_id = compat_str(talk_info['id'])
|
||||||
|
60
youtube_dl/extractor/tlc.py
Normal file
60
youtube_dl/extractor/tlc.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .brightcove import BrightcoveIE
|
||||||
|
from .discovery import DiscoveryIE
|
||||||
|
|
||||||
|
|
||||||
|
class TlcIE(DiscoveryIE):
|
||||||
|
IE_NAME = 'tlc.com'
|
||||||
|
_VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
|
||||||
|
'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '853232',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cake Boss: Too Big to Fly',
|
||||||
|
'description': 'Buddy has taken on a high flying task.',
|
||||||
|
'duration': 119,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TlcDeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tlc.de'
|
||||||
|
_VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3235167922001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Breaking Amish: Die Welt da draußen',
|
||||||
|
'uploader': 'Discovery Networks - Germany',
|
||||||
|
'description': 'Vier Amische und eine Mennonitin wagen in New York'
|
||||||
|
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
|
||||||
|
' ihrem spannenden Weg.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
iframe_url = self._search_regex(
|
||||||
|
'<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
|
||||||
|
'iframe url')
|
||||||
|
# Otherwise we don't get the correct 'BrightcoveExperience' element,
|
||||||
|
# example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
|
||||||
|
iframe_url = iframe_url.replace('.htm?', '.php?')
|
||||||
|
iframe = self._download_webpage(iframe_url, title)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': BrightcoveIE._extract_brightcove_url(iframe),
|
||||||
|
'ie': BrightcoveIE.ie_key(),
|
||||||
|
}
|
@@ -1,63 +1,83 @@
|
|||||||
import os
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
int_or_none,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import aes_decrypt_text
|
||||||
aes_decrypt_text
|
|
||||||
)
|
|
||||||
|
|
||||||
class Tube8IE(InfoExtractor):
|
class Tube8IE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
|
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||||
u'file': u'229795.mp4',
|
'file': '229795.mp4',
|
||||||
u'md5': u'e9e0b0c86734e5e3766e653509475db0',
|
'md5': 'e9e0b0c86734e5e3766e653509475db0',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"description": u"hot teen Kasia grinding",
|
'description': 'hot teen Kasia grinding',
|
||||||
u"uploader": u"unknown",
|
'uploader': 'unknown',
|
||||||
u"title": u"Kasia music video",
|
'title': 'Kasia music video',
|
||||||
u"age_limit": 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('id')
|
||||||
url = 'http://www.' + mobj.group('url')
|
|
||||||
|
|
||||||
req = compat_urllib_request.Request(url)
|
req = compat_urllib_request.Request(url)
|
||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title')
|
flashvars = json.loads(self._html_search_regex(
|
||||||
video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
|
r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
|
||||||
video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
|
|
||||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
|
|
||||||
if thumbnail:
|
|
||||||
thumbnail = thumbnail.replace('\\/', '/')
|
|
||||||
|
|
||||||
video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
|
video_url = flashvars['video_url']
|
||||||
if webpage.find('"encrypted":true')!=-1:
|
if flashvars.get('encrypted') is True:
|
||||||
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
|
video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8')
|
||||||
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
|
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
format_id = '-'.join(path.split('/')[4].split('_')[:2])
|
||||||
format = path.split('/')[4].split('_')[:2]
|
|
||||||
format = "-".join(format)
|
thumbnail = flashvars.get('image_url')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
like_count = int_or_none(self._html_search_regex(
|
||||||
|
r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
|
||||||
|
dislike_count = int_or_none(self._html_search_regex(
|
||||||
|
r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
|
||||||
|
if view_count:
|
||||||
|
view_count = str_to_int(view_count)
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
|
||||||
|
if comment_count:
|
||||||
|
comment_count = str_to_int(comment_count)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
|
||||||
'title': video_title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': video_description,
|
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': extension,
|
'title': title,
|
||||||
'format': format,
|
'description': description,
|
||||||
'format_id': format,
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'format_id': format_id,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class UstreamIE(InfoExtractor):
|
class UstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
|
||||||
IE_NAME = 'ustream'
|
IE_NAME = 'ustream'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.ustream.tv/recorded/20274954',
|
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||||
@@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
|
if m.group('type') == 'embed':
|
||||||
|
video_id = m.group('videoID')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
|
||||||
|
desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
|
||||||
|
return self.url_result(desktop_url, 'Ustream')
|
||||||
|
|
||||||
video_id = m.group('videoID')
|
video_id = m.group('videoID')
|
||||||
|
|
||||||
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
|
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
|
||||||
|
26
youtube_dl/extractor/videoweed.py
Normal file
26
youtube_dl/extractor/videoweed.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .novamov import NovaMovIE
|
||||||
|
|
||||||
|
|
||||||
|
class VideoWeedIE(NovaMovIE):
|
||||||
|
IE_NAME = 'videoweed'
|
||||||
|
IE_DESC = 'VideoWeed'
|
||||||
|
|
||||||
|
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
|
||||||
|
|
||||||
|
_HOST = 'www.videoweed.es'
|
||||||
|
|
||||||
|
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||||
|
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.videoweed.es/file/b42178afbea14',
|
||||||
|
'md5': 'abd31a2132947262c50429e1d16c1bfd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b42178afbea14',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'optical illusion dissapeared image magic illusion',
|
||||||
|
'description': ''
|
||||||
|
},
|
||||||
|
}
|
@@ -1,10 +1,11 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class WeiboIE(InfoExtractor):
|
class WeiboIE(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
The videos in Weibo come from different sites, this IE just finds the link
|
The videos in Weibo come from different sites, this IE just finds the link
|
||||||
@@ -13,16 +14,16 @@ class WeiboIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
|
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'add_ie': ['Sina'],
|
'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
|
||||||
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
|
'info_dict': {
|
||||||
u'file': u'98322879.flv',
|
'id': '98322879',
|
||||||
u'info_dict': {
|
'ext': 'flv',
|
||||||
u'title': u'魔声耳机最新广告“All Eyes On Us”',
|
'title': '魔声耳机最新广告“All Eyes On Us”',
|
||||||
},
|
},
|
||||||
u'note': u'Sina video',
|
'params': {
|
||||||
u'params': {
|
'skip_download': True,
|
||||||
u'skip_download': True,
|
|
||||||
},
|
},
|
||||||
|
'add_ie': ['Sina'],
|
||||||
}
|
}
|
||||||
|
|
||||||
# Additional example videos from different sites
|
# Additional example videos from different sites
|
||||||
@@ -33,17 +34,16 @@ class WeiboIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
|
info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
|
||||||
info_page = self._download_webpage(info_url, video_id)
|
info = self._download_json(info_url, video_id)
|
||||||
info = json.loads(info_page)
|
|
||||||
|
|
||||||
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
|
videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
|
||||||
#Prefer sina video since they have thumbnails
|
# Prefer sina video since they have thumbnails
|
||||||
videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
|
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
|
||||||
player_url = videos_urls[-1]
|
player_url = videos_urls[-1]
|
||||||
m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
|
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
|
||||||
|
player_url)
|
||||||
if m_sina is not None:
|
if m_sina is not None:
|
||||||
self.to_screen('Sina video detected')
|
self.to_screen('Sina video detected')
|
||||||
sina_id = m_sina.group(1)
|
sina_id = m_sina.group(1)
|
||||||
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
|
player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
|
||||||
return self.url_result(player_url)
|
return self.url_result(player_url)
|
||||||
|
|
||||||
|
@@ -14,27 +14,39 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class YahooIE(InfoExtractor):
|
class YahooIE(InfoExtractor):
|
||||||
IE_DESC = 'Yahoo screen'
|
IE_DESC = 'Yahoo screen and movies'
|
||||||
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
|
_VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||||
'file': '214727115.mp4',
|
|
||||||
'md5': '4962b075c08be8690a922ee026d05e69',
|
'md5': '4962b075c08be8690a922ee026d05e69',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '214727115',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Julian Smith & Travis Legg Watch Julian Smith',
|
'title': 'Julian Smith & Travis Legg Watch Julian Smith',
|
||||||
'description': 'Julian and Travis watch Julian Smith',
|
'description': 'Julian and Travis watch Julian Smith',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
||||||
'file': '103000935.mp4',
|
|
||||||
'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
|
'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '103000935',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Codefellas - The Cougar Lies with Spanish Moss',
|
'title': 'Codefellas - The Cougar Lies with Spanish Moss',
|
||||||
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html',
|
||||||
|
'md5': '410b7104aa9893b765bc22787a22f3d9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '516ed8e2-2c4f-339f-a211-7a8b49d30845',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The World Loves Spider-Man',
|
||||||
|
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -42,13 +54,20 @@ class YahooIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
items_json = self._search_regex(r'mediaItems: ({.*?})$',
|
items_json = self._search_regex(
|
||||||
webpage, 'items', flags=re.MULTILINE)
|
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
||||||
items = json.loads(items_json)
|
default=None)
|
||||||
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
if items_json is None:
|
||||||
# The 'meta' field is not always in the video webpage, we request it
|
long_id = self._search_regex(
|
||||||
# from another page
|
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
|
||||||
long_id = info['id']
|
webpage, 'content ID')
|
||||||
|
video_id = long_id
|
||||||
|
else:
|
||||||
|
items = json.loads(items_json)
|
||||||
|
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
||||||
|
# The 'meta' field is not always in the video webpage, we request it
|
||||||
|
# from another page
|
||||||
|
long_id = info['id']
|
||||||
return self._get_info(long_id, video_id)
|
return self._get_info(long_id, video_id)
|
||||||
|
|
||||||
def _get_info(self, long_id, video_id):
|
def _get_info(self, long_id, video_id):
|
||||||
@@ -60,10 +79,9 @@ class YahooIE(InfoExtractor):
|
|||||||
'env': 'prod',
|
'env': 'prod',
|
||||||
'format': 'json',
|
'format': 'json',
|
||||||
})
|
})
|
||||||
query_result_json = self._download_webpage(
|
query_result = self._download_json(
|
||||||
'http://video.query.yahoo.com/v1/public/yql?' + data,
|
'http://video.query.yahoo.com/v1/public/yql?' + data,
|
||||||
video_id, 'Downloading video info')
|
video_id, 'Downloading video info')
|
||||||
query_result = json.loads(query_result_json)
|
|
||||||
info = query_result['query']['results']['mediaObj'][0]
|
info = query_result['query']['results']['mediaObj'][0]
|
||||||
meta = info['meta']
|
meta = info['meta']
|
||||||
|
|
||||||
@@ -86,7 +104,6 @@ class YahooIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
format_url = compat_urlparse.urljoin(host, path)
|
format_url = compat_urlparse.urljoin(host, path)
|
||||||
format_info['url'] = format_url
|
format_info['url'] = format_url
|
||||||
|
|
||||||
formats.append(format_info)
|
formats.append(format_info)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@@ -104,7 +121,7 @@ class YahooNewsIE(YahooIE):
|
|||||||
IE_NAME = 'yahoo:news'
|
IE_NAME = 'yahoo:news'
|
||||||
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
|
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||||
'md5': '67010fdf3a08d290e060a4dd96baa07b',
|
'md5': '67010fdf3a08d290e060a4dd96baa07b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -113,10 +130,7 @@ class YahooNewsIE(YahooIE):
|
|||||||
'title': 'China Moses Is Crazy About the Blues',
|
'title': 'China Moses Is Crazy About the Blues',
|
||||||
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
|
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
# Overwrite YahooIE properties we don't want
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -134,27 +148,25 @@ class YahooSearchIE(SearchInfoExtractor):
|
|||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
|
entries = []
|
||||||
res = {
|
for pagenum in itertools.count(0):
|
||||||
'_type': 'playlist',
|
|
||||||
'id': query,
|
|
||||||
'entries': []
|
|
||||||
}
|
|
||||||
for pagenum in itertools.count(0):
|
|
||||||
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
|
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
|
||||||
webpage = self._download_webpage(result_url, query,
|
info = self._download_json(result_url, query,
|
||||||
note='Downloading results page '+str(pagenum+1))
|
note='Downloading results page '+str(pagenum+1))
|
||||||
info = json.loads(webpage)
|
|
||||||
m = info['m']
|
m = info['m']
|
||||||
results = info['results']
|
results = info['results']
|
||||||
|
|
||||||
for (i, r) in enumerate(results):
|
for (i, r) in enumerate(results):
|
||||||
if (pagenum * 30) +i >= n:
|
if (pagenum * 30) + i >= n:
|
||||||
break
|
break
|
||||||
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
|
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
|
||||||
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
|
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
|
||||||
res['entries'].append(e)
|
entries.append(e)
|
||||||
if (pagenum * 30 +i >= n) or (m['last'] >= (m['total'] -1)):
|
if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
|
||||||
break
|
break
|
||||||
|
|
||||||
return res
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': query,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
@@ -151,6 +151,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
)
|
)
|
||||||
))
|
))
|
||||||
|youtu\.be/ # just youtu.be/xxxx
|
|youtu\.be/ # just youtu.be/xxxx
|
||||||
|
|https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||||
)
|
)
|
||||||
)? # all until now is optional -> you can pass the naked ID
|
)? # all until now is optional -> you can pass the naked ID
|
||||||
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
||||||
@@ -1418,7 +1419,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
||||||
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
else:
|
else:
|
||||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
if playlist_id.startswith('RD'):
|
if playlist_id.startswith('RD'):
|
||||||
# Mixes require a custom extraction process
|
# Mixes require a custom extraction process
|
||||||
|
@@ -1,5 +1,7 @@
|
|||||||
|
|
||||||
|
from .atomicparsley import AtomicParsleyPP
|
||||||
from .ffmpeg import (
|
from .ffmpeg import (
|
||||||
|
FFmpegAudioFixPP,
|
||||||
FFmpegMergerPP,
|
FFmpegMergerPP,
|
||||||
FFmpegMetadataPP,
|
FFmpegMetadataPP,
|
||||||
FFmpegVideoConvertor,
|
FFmpegVideoConvertor,
|
||||||
@@ -9,6 +11,8 @@ from .ffmpeg import (
|
|||||||
from .xattrpp import XAttrMetadataPP
|
from .xattrpp import XAttrMetadataPP
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
'AtomicParsleyPP',
|
||||||
|
'FFmpegAudioFixPP',
|
||||||
'FFmpegMergerPP',
|
'FFmpegMergerPP',
|
||||||
'FFmpegMetadataPP',
|
'FFmpegMetadataPP',
|
||||||
'FFmpegVideoConvertor',
|
'FFmpegVideoConvertor',
|
||||||
|
56
youtube_dl/postprocessor/atomicparsley.py
Normal file
56
youtube_dl/postprocessor/atomicparsley.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from .common import PostProcessor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
check_executable,
|
||||||
|
compat_urlretrieve,
|
||||||
|
encodeFilename,
|
||||||
|
PostProcessingError,
|
||||||
|
prepend_extension,
|
||||||
|
shell_quote
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AtomicParsleyPPError(PostProcessingError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AtomicParsleyPP(PostProcessor):
|
||||||
|
def run(self, info):
|
||||||
|
if not check_executable('AtomicParsley', ['-v']):
|
||||||
|
raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
|
||||||
|
|
||||||
|
filename = info['filepath']
|
||||||
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
|
temp_thumbnail = prepend_extension(filename, 'thumb')
|
||||||
|
|
||||||
|
if not info.get('thumbnail'):
|
||||||
|
raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
|
||||||
|
|
||||||
|
compat_urlretrieve(info['thumbnail'], temp_thumbnail)
|
||||||
|
|
||||||
|
cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
|
||||||
|
|
||||||
|
self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
|
||||||
|
|
||||||
|
if self._downloader.params.get('verbose', False):
|
||||||
|
self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
|
||||||
|
|
||||||
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
stdout, stderr = p.communicate()
|
||||||
|
|
||||||
|
if p.returncode != 0:
|
||||||
|
msg = stderr.decode('utf-8', 'replace').strip()
|
||||||
|
raise AtomicParsleyPPError(msg)
|
||||||
|
|
||||||
|
os.remove(encodeFilename(filename))
|
||||||
|
os.remove(encodeFilename(temp_thumbnail))
|
||||||
|
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||||
|
|
||||||
|
return True, info
|
@@ -53,8 +53,7 @@ class FFmpegPostProcessor(PostProcessor):
|
|||||||
|
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
||||||
bcmd = [self._downloader.encode(c) for c in cmd]
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
stderr = stderr.decode('utf-8', 'replace')
|
stderr = stderr.decode('utf-8', 'replace')
|
||||||
@@ -465,7 +464,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
filename = info['filepath']
|
filename = info['filepath']
|
||||||
temp_filename = prepend_extension(filename, 'temp')
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
|
|
||||||
options = ['-c', 'copy']
|
if info['ext'] == u'm4a':
|
||||||
|
options = ['-vn', '-acodec', 'copy']
|
||||||
|
else:
|
||||||
|
options = ['-c', 'copy']
|
||||||
|
|
||||||
for (name, value) in metadata.items():
|
for (name, value) in metadata.items():
|
||||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||||
|
|
||||||
@@ -484,3 +487,17 @@ class FFmpegMergerPP(FFmpegPostProcessor):
|
|||||||
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
|
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
|
||||||
return True, info
|
return True, info
|
||||||
|
|
||||||
|
|
||||||
|
class FFmpegAudioFixPP(FFmpegPostProcessor):
|
||||||
|
def run(self, info):
|
||||||
|
filename = info['filepath']
|
||||||
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
|
|
||||||
|
options = ['-vn', '-acodec', 'copy']
|
||||||
|
self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
|
||||||
|
self.run_ffmpeg(filename, temp_filename, options)
|
||||||
|
|
||||||
|
os.remove(encodeFilename(filename))
|
||||||
|
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||||
|
|
||||||
|
return True, info
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import calendar
|
import calendar
|
||||||
|
import codecs
|
||||||
import contextlib
|
import contextlib
|
||||||
import ctypes
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
@@ -909,25 +910,93 @@ def platform_name():
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def write_string(s, out=None):
|
def _windows_write_string(s, out):
|
||||||
|
""" Returns True if the string was written using special methods,
|
||||||
|
False if it has yet to be written out."""
|
||||||
|
# Adapted from http://stackoverflow.com/a/3259271/35070
|
||||||
|
|
||||||
|
import ctypes
|
||||||
|
import ctypes.wintypes
|
||||||
|
|
||||||
|
WIN_OUTPUT_IDS = {
|
||||||
|
1: -11,
|
||||||
|
2: -12,
|
||||||
|
}
|
||||||
|
|
||||||
|
fileno = out.fileno()
|
||||||
|
if fileno not in WIN_OUTPUT_IDS:
|
||||||
|
return False
|
||||||
|
|
||||||
|
GetStdHandle = ctypes.WINFUNCTYPE(
|
||||||
|
ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
|
||||||
|
("GetStdHandle", ctypes.windll.kernel32))
|
||||||
|
h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
|
||||||
|
|
||||||
|
WriteConsoleW = ctypes.WINFUNCTYPE(
|
||||||
|
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
|
||||||
|
ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
|
||||||
|
ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
|
||||||
|
written = ctypes.wintypes.DWORD(0)
|
||||||
|
|
||||||
|
GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
|
||||||
|
FILE_TYPE_CHAR = 0x0002
|
||||||
|
FILE_TYPE_REMOTE = 0x8000
|
||||||
|
GetConsoleMode = ctypes.WINFUNCTYPE(
|
||||||
|
ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
|
||||||
|
ctypes.POINTER(ctypes.wintypes.DWORD))(
|
||||||
|
("GetConsoleMode", ctypes.windll.kernel32))
|
||||||
|
INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
|
||||||
|
|
||||||
|
def not_a_console(handle):
|
||||||
|
if handle == INVALID_HANDLE_VALUE or handle is None:
|
||||||
|
return True
|
||||||
|
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
|
||||||
|
or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
|
||||||
|
|
||||||
|
if not_a_console(h):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def next_nonbmp_pos(s):
|
||||||
|
try:
|
||||||
|
return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
|
||||||
|
except StopIteration:
|
||||||
|
return len(s)
|
||||||
|
|
||||||
|
while s:
|
||||||
|
count = min(next_nonbmp_pos(s), 1024)
|
||||||
|
|
||||||
|
ret = WriteConsoleW(
|
||||||
|
h, s, count if count else 2, ctypes.byref(written), None)
|
||||||
|
if ret == 0:
|
||||||
|
raise OSError('Failed to write string')
|
||||||
|
if not count: # We just wrote a non-BMP character
|
||||||
|
assert written.value == 2
|
||||||
|
s = s[1:]
|
||||||
|
else:
|
||||||
|
assert written.value > 0
|
||||||
|
s = s[written.value:]
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def write_string(s, out=None, encoding=None):
|
||||||
if out is None:
|
if out is None:
|
||||||
out = sys.stderr
|
out = sys.stderr
|
||||||
assert type(s) == compat_str
|
assert type(s) == compat_str
|
||||||
|
|
||||||
|
if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
|
||||||
|
if _windows_write_string(s, out):
|
||||||
|
return
|
||||||
|
|
||||||
if ('b' in getattr(out, 'mode', '') or
|
if ('b' in getattr(out, 'mode', '') or
|
||||||
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
|
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
|
||||||
s = s.encode(preferredencoding(), 'ignore')
|
byt = s.encode(encoding or preferredencoding(), 'ignore')
|
||||||
try:
|
out.write(byt)
|
||||||
|
elif hasattr(out, 'buffer'):
|
||||||
|
enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
|
||||||
|
byt = s.encode(enc, 'ignore')
|
||||||
|
out.buffer.write(byt)
|
||||||
|
else:
|
||||||
out.write(s)
|
out.write(s)
|
||||||
except UnicodeEncodeError:
|
|
||||||
# In Windows shells, this can fail even when the codec is just charmap!?
|
|
||||||
# See https://wiki.python.org/moin/PrintFails#Issue
|
|
||||||
if sys.platform == 'win32' and hasattr(out, 'encoding'):
|
|
||||||
s = s.encode(out.encoding, 'ignore').decode(out.encoding)
|
|
||||||
out.write(s)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
out.flush()
|
out.flush()
|
||||||
|
|
||||||
|
|
||||||
@@ -1263,9 +1332,11 @@ class PagedList(object):
|
|||||||
|
|
||||||
|
|
||||||
def uppercase_escape(s):
|
def uppercase_escape(s):
|
||||||
|
unicode_escape = codecs.getdecoder('unicode_escape')
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'\\U[0-9a-fA-F]{8}',
|
r'\\U[0-9a-fA-F]{8}',
|
||||||
lambda m: m.group(0).decode('unicode-escape'), s)
|
lambda m: unicode_escape(m.group(0))[0],
|
||||||
|
s)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
struct.pack(u'!I', 0)
|
struct.pack(u'!I', 0)
|
||||||
@@ -1335,3 +1406,14 @@ US_RATINGS = {
|
|||||||
|
|
||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
||||||
|
|
||||||
|
|
||||||
|
def qualities(quality_ids):
|
||||||
|
""" Get a numeric quality value out of a list of possible values """
|
||||||
|
def q(qid):
|
||||||
|
try:
|
||||||
|
return quality_ids.index(qid)
|
||||||
|
except ValueError:
|
||||||
|
return -1
|
||||||
|
return q
|
||||||
|
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.04.04.2'
|
__version__ = '2014.04.21.2'
|
||||||
|
Reference in New Issue
Block a user