Compare commits
85 Commits
2014.02.03
...
2014.02.10
Author | SHA1 | Date | |
---|---|---|---|
2e20bba708 | |||
e70dc1d14b | |||
026fcc0495 | |||
81c2f20b53 | |||
1afe753462 | |||
524c2c716a | |||
b542d4bbd7 | |||
17968e444c | |||
2e3fd9ec2f | |||
d6a283b025 | |||
9766538124 | |||
98dbee8681 | |||
e421491b3b | |||
6828d37c41 | |||
bf5f610099 | |||
8b7f73404a | |||
85cacb2f51 | |||
b3fa3917e2 | |||
082c6c867a | |||
03fcf1ab57 | |||
3b00dea5eb | |||
8bc6c8e3c0 | |||
79bc27b53a | |||
84dd703199 | |||
c6fdba23a6 | |||
b19fe521a9 | |||
c1e672d121 | |||
f4371f4784 | |||
d914d9d187 | |||
845d14d377 | |||
4a9540b6d2 | |||
9f31be7000 | |||
41fa1b627d | |||
c0c4e66b29 | |||
cd8662de22 | |||
3587159614 | |||
d67cc9fa7c | |||
bf3a2fe923 | |||
e9ea0bf123 | |||
63424b6233 | |||
0bf35c5cf5 | |||
95c29381eb | |||
94c4abce7f | |||
f2dffe55f8 | |||
46a073bfac | |||
df872ec4e7 | |||
5de90176d9 | |||
dcf3eec47a | |||
e9e4f30d26 | |||
83cebd73d4 | |||
1df4229bd7 | |||
3c995527e9 | |||
7c62b568a2 | |||
ccf9114e84 | |||
d8061908bb | |||
211e17dd43 | |||
6cb38a9994 | |||
fa7df757a7 | |||
8c82077619 | |||
e5d1f9e50a | |||
7ee50ae7b5 | |||
de563c9da0 | |||
50451f2a18 | |||
9bc70948e1 | |||
5dc733f071 | |||
bc4850908c | |||
20650c8654 | |||
56dced2670 | |||
eef726c04b | |||
acf1555d76 | |||
22e7f1a6ec | |||
3c49325658 | |||
bb1cd2bea1 | |||
fdf1f8d4ce | |||
117c8c6b97 | |||
5cef4ff09b | |||
91264ce572 | |||
c79ef8e1ae | |||
58d915df51 | |||
7881a64499 | |||
90159f5561 | |||
99877772d0 | |||
b0268cb6ce | |||
4edff4cfa8 | |||
1eac553e7e |
@ -1,5 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@ -13,6 +15,7 @@ from youtube_dl.extractor import (
|
|||||||
FacebookIE,
|
FacebookIE,
|
||||||
gen_extractors,
|
gen_extractors,
|
||||||
JustinTVIE,
|
JustinTVIE,
|
||||||
|
PBSIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -29,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_youtube_playlist_matching(self):
|
def test_youtube_playlist_matching(self):
|
||||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||||
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||||
assertPlaylist(u'PL63F0C78739B09958')
|
assertPlaylist('PL63F0C78739B09958')
|
||||||
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||||
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
|
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||||
|
# Top tracks
|
||||||
|
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||||
|
|
||||||
def test_youtube_matching(self):
|
def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||||
@ -80,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
@ -89,7 +94,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
def test_facebook_matching(self):
|
def test_facebook_matching(self):
|
||||||
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
@ -124,5 +129,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
||||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
||||||
|
|
||||||
|
def test_pbs(self):
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/2350
|
||||||
|
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -22,6 +22,7 @@ import socket
|
|||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
compat_http_client,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
@ -110,7 +111,7 @@ def generator(test_case):
|
|||||||
ydl.download([test_case['url']])
|
ydl.download([test_case['url']])
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if try_num == RETRIES:
|
if try_num == RETRIES:
|
||||||
|
@ -34,6 +34,8 @@ from youtube_dl.extractor import (
|
|||||||
KhanAcademyIE,
|
KhanAcademyIE,
|
||||||
EveryonesMixtapeIE,
|
EveryonesMixtapeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
GoogleSearchIE,
|
||||||
|
GenericIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -229,6 +231,24 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['id'], '1409')
|
self.assertEqual(result['id'], '1409')
|
||||||
self.assertTrue(len(result['entries']) >= 34)
|
self.assertTrue(len(result['entries']) >= 34)
|
||||||
|
|
||||||
|
def test_multiple_brightcove_videos(self):
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/2283
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = GenericIE(dl)
|
||||||
|
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
|
||||||
|
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
||||||
|
self.assertEqual(len(result['entries']), 3)
|
||||||
|
|
||||||
|
def test_GoogleSearch(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = GoogleSearchIE(dl)
|
||||||
|
result = ie.extract('gvsearch15:python language')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'python language')
|
||||||
|
self.assertEqual(result['title'], 'python language')
|
||||||
|
self.assertTrue(len(result['entries']) == 15)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -14,6 +14,7 @@ from youtube_dl.extractor import (
|
|||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
TEDIE,
|
TEDIE,
|
||||||
|
VimeoIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -223,5 +224,60 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
|
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
|
||||||
|
|
||||||
|
|
||||||
|
class TestVimeoSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://vimeo.com/76979871'
|
||||||
|
IE = VimeoIE
|
||||||
|
|
||||||
|
def test_no_writesubtitles(self):
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||||
|
|
||||||
|
def test_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['fr']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_automatic_captions(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslang'] = ['en']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(len(subtitles.keys()) == 0)
|
||||||
|
|
||||||
|
def test_nosubtitles(self):
|
||||||
|
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||||
|
self.url = 'http://vimeo.com/56015672'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
def test_multiple_langs(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['es', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -127,6 +127,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||||
|
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = u'''<root>
|
testxml = u'''<root>
|
||||||
|
@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], 'ytdl test PL')
|
self.assertEqual(result['title'], 'ytdl test PL')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
||||||
|
|
||||||
def test_youtube_playlist_noplaylist(self):
|
def test_youtube_playlist_noplaylist(self):
|
||||||
@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertEqual(result['_type'], 'url')
|
self.assertEqual(result['_type'], 'url')
|
||||||
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
|
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||||
|
|
||||||
def test_issue_673(self):
|
def test_issue_673(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||||
|
|
||||||
@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
# TODO find a > 100 (paginating?) videos course
|
# TODO find a > 100 (paginating?) videos course
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||||
|
|
||||||
def test_youtube_channel(self):
|
def test_youtube_channel(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
original_video = entries[0]
|
original_video = entries[0]
|
||||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||||
|
|
||||||
|
def test_youtube_toptracks(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubePlaylistIE(dl)
|
||||||
|
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertEqual(len(entries), 100)
|
||||||
|
|
||||||
def test_youtube_toplist(self):
|
def test_youtube_toplist(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeTopListIE(dl)
|
ie = YoutubeTopListIE(dl)
|
||||||
|
@ -27,6 +27,12 @@ _TESTS = [
|
|||||||
85,
|
85,
|
||||||
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||||
|
u'js',
|
||||||
|
90,
|
||||||
|
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -41,6 +41,7 @@ __authors__ = (
|
|||||||
'Chris Gahan',
|
'Chris Gahan',
|
||||||
'Saimadhav Heblikar',
|
'Saimadhav Heblikar',
|
||||||
'Mike Col',
|
'Mike Col',
|
||||||
|
'Andreas Schmitz',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
|
|||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
player_url = info_dict.get('player_url', None)
|
player_url = info_dict.get('player_url', None)
|
||||||
page_url = info_dict.get('page_url', None)
|
page_url = info_dict.get('page_url', None)
|
||||||
|
app = info_dict.get('app', None)
|
||||||
play_path = info_dict.get('play_path', None)
|
play_path = info_dict.get('play_path', None)
|
||||||
tc_url = info_dict.get('tc_url', None)
|
tc_url = info_dict.get('tc_url', None)
|
||||||
|
flash_version = info_dict.get('flash_version', None)
|
||||||
live = info_dict.get('rtmp_live', False)
|
live = info_dict.get('rtmp_live', False)
|
||||||
conn = info_dict.get('rtmp_conn', None)
|
conn = info_dict.get('rtmp_conn', None)
|
||||||
|
|
||||||
@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
|
|||||||
basic_args += ['--swfVfy', player_url]
|
basic_args += ['--swfVfy', player_url]
|
||||||
if page_url is not None:
|
if page_url is not None:
|
||||||
basic_args += ['--pageUrl', page_url]
|
basic_args += ['--pageUrl', page_url]
|
||||||
|
if app is not None:
|
||||||
|
basic_args += ['--app', app]
|
||||||
if play_path is not None:
|
if play_path is not None:
|
||||||
basic_args += ['--playpath', play_path]
|
basic_args += ['--playpath', play_path]
|
||||||
if tc_url is not None:
|
if tc_url is not None:
|
||||||
basic_args += ['--tcUrl', url]
|
basic_args += ['--tcUrl', url]
|
||||||
if test:
|
if test:
|
||||||
basic_args += ['--stop', '1']
|
basic_args += ['--stop', '1']
|
||||||
|
if flash_version is not None:
|
||||||
|
basic_args += ['--flashVer', flash_version]
|
||||||
if live:
|
if live:
|
||||||
basic_args += ['--live']
|
basic_args += ['--live']
|
||||||
if conn:
|
if conn:
|
||||||
|
@ -15,6 +15,7 @@ from .arte import (
|
|||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
|
from .bbccouk import BBCCoUkIE
|
||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
|
|||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
|
from .chilloutzone import ChilloutzoneIE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
@ -54,12 +56,14 @@ from .ebaumsworld import EbaumsWorldIE
|
|||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
|
from .elpais import ElPaisIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
from .extremetube import ExtremeTubeIE
|
from .extremetube import ExtremeTubeIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
|
from .firstpost import FirstpostIE
|
||||||
from .fktv import (
|
from .fktv import (
|
||||||
FKTVIE,
|
FKTVIE,
|
||||||
FKTVPosteckeIE,
|
FKTVPosteckeIE,
|
||||||
@ -96,10 +100,12 @@ from .ina import InaIE
|
|||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from .internetvideoarchive import InternetVideoArchiveIE
|
||||||
|
from .iprima import IPrimaIE
|
||||||
from .ivi import (
|
from .ivi import (
|
||||||
IviIE,
|
IviIE,
|
||||||
IviCompilationIE
|
IviCompilationIE
|
||||||
)
|
)
|
||||||
|
from .jadorecettepub import JadoreCettePubIE
|
||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .justintv import JustinTVIE
|
from .justintv import JustinTVIE
|
||||||
@ -109,6 +115,7 @@ from .keezmovies import KeezMoviesIE
|
|||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import KhanAcademyIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
|
from .kontrtube import KontrTubeIE
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
from .lifenews import LifeNewsIE
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
@ -117,6 +124,7 @@ from .lynda import (
|
|||||||
LyndaIE,
|
LyndaIE,
|
||||||
LyndaCourseIE
|
LyndaCourseIE
|
||||||
)
|
)
|
||||||
|
from .m6 import M6IE
|
||||||
from .macgamestore import MacGameStoreIE
|
from .macgamestore import MacGameStoreIE
|
||||||
from .malemotion import MalemotionIE
|
from .malemotion import MalemotionIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
@ -126,6 +134,7 @@ from .mit import TechTVMITIE, MITIE
|
|||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
from .mpora import MporaIE
|
from .mpora import MporaIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
|
from .mooshare import MooshareIE
|
||||||
from .mtv import (
|
from .mtv import (
|
||||||
MTVIE,
|
MTVIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
@ -137,8 +146,10 @@ from .myvideo import MyVideoIE
|
|||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import NBCNewsIE
|
from .nbc import NBCNewsIE
|
||||||
|
from .ndr import NDRIE
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
|
from .nfb import NFBIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
|
217
youtube_dl/extractor/bbccouk.py
Normal file
217
youtube_dl/extractor/bbccouk.py
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||||
|
IE_NAME = 'bbc.co.uk'
|
||||||
|
IE_DESC = 'BBC iPlayer'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p01q7wz4',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
|
||||||
|
'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
|
||||||
|
'duration': 1936,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00yng1d',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||||
|
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||||
|
'duration': 1800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00yng1d',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||||
|
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||||
|
'duration': 5100,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_asx_playlist(self, connection, programme_id):
|
||||||
|
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||||
|
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||||
|
|
||||||
|
def _extract_connection(self, connection, programme_id):
|
||||||
|
formats = []
|
||||||
|
protocol = connection.get('protocol')
|
||||||
|
supplier = connection.get('supplier')
|
||||||
|
if protocol == 'http':
|
||||||
|
href = connection.get('href')
|
||||||
|
# ASX playlist
|
||||||
|
if supplier == 'asx':
|
||||||
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
formats.append({
|
||||||
|
'url': ref,
|
||||||
|
'format_id': 'ref%s_%s' % (i, supplier),
|
||||||
|
})
|
||||||
|
# Direct link
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': href,
|
||||||
|
'format_id': supplier,
|
||||||
|
})
|
||||||
|
elif protocol == 'rtmp':
|
||||||
|
application = connection.get('application', 'ondemand')
|
||||||
|
auth_string = connection.get('authString')
|
||||||
|
identifier = connection.get('identifier')
|
||||||
|
server = connection.get('server')
|
||||||
|
formats.append({
|
||||||
|
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||||
|
'play_path': identifier,
|
||||||
|
'app': '%s?%s' % (application, auth_string),
|
||||||
|
'page_url': 'http://www.bbc.co.uk',
|
||||||
|
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': supplier,
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_items(self, playlist):
|
||||||
|
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||||
|
|
||||||
|
def _extract_medias(self, media_selection):
|
||||||
|
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||||
|
|
||||||
|
def _extract_connections(self, media):
|
||||||
|
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||||
|
|
||||||
|
def _extract_video(self, media, programme_id):
|
||||||
|
formats = []
|
||||||
|
vbr = int(media.get('bitrate'))
|
||||||
|
vcodec = media.get('encoding')
|
||||||
|
service = media.get('service')
|
||||||
|
width = int(media.get('width'))
|
||||||
|
height = int(media.get('height'))
|
||||||
|
file_size = int(media.get('media_file_size'))
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
|
for format in conn_formats:
|
||||||
|
format.update({
|
||||||
|
'format_id': '%s_%s' % (service, format['format_id']),
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': vbr,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'filesize': file_size,
|
||||||
|
})
|
||||||
|
formats.extend(conn_formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_audio(self, media, programme_id):
|
||||||
|
formats = []
|
||||||
|
abr = int(media.get('bitrate'))
|
||||||
|
acodec = media.get('encoding')
|
||||||
|
service = media.get('service')
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
|
for format in conn_formats:
|
||||||
|
format.update({
|
||||||
|
'format_id': '%s_%s' % (service, format['format_id']),
|
||||||
|
'abr': abr,
|
||||||
|
'acodec': acodec,
|
||||||
|
})
|
||||||
|
formats.extend(conn_formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_captions(self, media, programme_id):
|
||||||
|
subtitles = {}
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||||
|
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
|
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||||
|
srt = ''
|
||||||
|
for pos, p in enumerate(ps):
|
||||||
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
||||||
|
p.text.strip() if p.text is not None else '')
|
||||||
|
subtitles[lang] = srt
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
group_id = mobj.group('id')
|
||||||
|
|
||||||
|
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||||
|
'Downloading playlist XML')
|
||||||
|
|
||||||
|
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||||
|
if no_items is not None:
|
||||||
|
reason = no_items.get('reason')
|
||||||
|
if reason == 'preAvailability':
|
||||||
|
msg = 'Episode %s is not yet available' % group_id
|
||||||
|
elif reason == 'postAvailability':
|
||||||
|
msg = 'Episode %s is no longer available' % group_id
|
||||||
|
else:
|
||||||
|
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
subtitles = None
|
||||||
|
|
||||||
|
for item in self._extract_items(playlist):
|
||||||
|
kind = item.get('kind')
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||||
|
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||||
|
|
||||||
|
programme_id = item.get('identifier')
|
||||||
|
duration = int(item.get('duration'))
|
||||||
|
|
||||||
|
media_selection = self._download_xml(
|
||||||
|
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||||
|
programme_id, 'Downloading media selection XML')
|
||||||
|
|
||||||
|
for media in self._extract_medias(media_selection):
|
||||||
|
kind = media.get('kind')
|
||||||
|
if kind == 'audio':
|
||||||
|
formats.extend(self._extract_audio(media, programme_id))
|
||||||
|
elif kind == 'video':
|
||||||
|
formats.extend(self._extract_video(media, programme_id))
|
||||||
|
elif kind == 'captions':
|
||||||
|
subtitles = self._extract_captions(media, programme_id)
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(programme_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@ -1,19 +1,14 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
|
||||||
ExtractorError,
|
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -24,5 +24,7 @@ class BloombergIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
ooyala_url = self._twitter_search_player(webpage)
|
embed_code = self._search_regex(
|
||||||
return self.url_result(ooyala_url, OoyalaIE.ie_key())
|
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
||||||
|
'embed code')
|
||||||
|
return OoyalaIE._build_url_result(embed_code)
|
||||||
|
@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_brightcove_url(cls, webpage):
|
def _extract_brightcove_url(cls, webpage):
|
||||||
"""Try to extract the brightcove url from the wepbage, returns None
|
"""Try to extract the brightcove url from the webpage, returns None
|
||||||
if it can't be found
|
if it can't be found
|
||||||
"""
|
"""
|
||||||
|
urls = cls._extract_brightcove_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_brightcove_urls(cls, webpage):
|
||||||
|
"""Return a list of all Brightcove URLs from the webpage """
|
||||||
|
|
||||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
return url_m.group(1)
|
return [url_m.group(1)]
|
||||||
|
|
||||||
m_brightcove = re.search(
|
matches = re.findall(
|
||||||
r'''(?sx)<object
|
r'''(?sx)<object
|
||||||
(?:
|
(?:
|
||||||
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
|
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
|
||||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||||
).+?</object>''',
|
).+?</object>''',
|
||||||
webpage)
|
webpage)
|
||||||
if m_brightcove is not None:
|
return [cls._build_brighcove_url(m) for m in matches]
|
||||||
return cls._build_brighcove_url(m_brightcove.group())
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
IE_DESC = 'Channel 9'
|
IE_DESC = 'Channel 9'
|
||||||
IE_NAME = 'channel9'
|
IE_NAME = 'channel9'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||||
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
|
|
||||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||||
'duration': 4576,
|
'duration': 4576,
|
||||||
@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
|
||||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||||
'duration': 1540,
|
'duration': 1540,
|
||||||
|
97
youtube_dl/extractor/chilloutzone.py
Normal file
97
youtube_dl/extractor/chilloutzone.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
ExtractorError
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChilloutzoneIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
|
||||||
|
'md5': 'a76f3457e813ea0037e5244f509e66d1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'enemene-meck-alle-katzen-weg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Enemene Meck - Alle Katzen weg',
|
||||||
|
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'Video hosted at YouTube',
|
||||||
|
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1YVQaAgHyRU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '16 Photos Taken 1 Second Before Disaster',
|
||||||
|
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
|
||||||
|
'uploader': 'BuzzFeedVideo',
|
||||||
|
'uploader_id': 'BuzzFeedVideo',
|
||||||
|
'upload_date': '20131105',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'Video hosted at Vimeo',
|
||||||
|
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
|
||||||
|
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85523671',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Sunday Times - Icons',
|
||||||
|
'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
|
||||||
|
'uploader': 'Us',
|
||||||
|
'uploader_id': 'usfilms',
|
||||||
|
'upload_date': '20140131'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
base64_video_info = self._html_search_regex(
|
||||||
|
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||||
|
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
|
||||||
|
video_info_dict = json.loads(decoded_video_info)
|
||||||
|
|
||||||
|
# get video information from dict
|
||||||
|
video_url = video_info_dict['mediaUrl']
|
||||||
|
description = clean_html(video_info_dict.get('description'))
|
||||||
|
title = video_info_dict['title']
|
||||||
|
native_platform = video_info_dict['nativePlatform']
|
||||||
|
native_video_id = video_info_dict['nativeVideoId']
|
||||||
|
source_priority = video_info_dict['sourcePriority']
|
||||||
|
|
||||||
|
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||||
|
if native_platform is None:
|
||||||
|
youtube_url = self._html_search_regex(
|
||||||
|
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||||
|
webpage, 'fallback video URL', default=None)
|
||||||
|
if youtube_url is not None:
|
||||||
|
return self.url_result(youtube_url, ie='Youtube')
|
||||||
|
|
||||||
|
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||||
|
# the own CDN
|
||||||
|
if source_priority == 'native':
|
||||||
|
if native_platform == 'youtube':
|
||||||
|
return self.url_result(native_video_id, ie='Youtube')
|
||||||
|
if native_platform == 'vimeo':
|
||||||
|
return self.url_result(
|
||||||
|
'http://vimeo.com/' + native_video_id, ie='Vimeo')
|
||||||
|
|
||||||
|
if not video_url:
|
||||||
|
raise ExtractorError('No video found')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
@ -4,6 +4,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class CollegeHumorIE(InfoExtractor):
|
class CollegeHumorIE(InfoExtractor):
|
||||||
@ -11,22 +12,25 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||||
'file': '6902724.mp4',
|
|
||||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '6902724',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Comic-Con Cosplay Catastrophe',
|
'title': 'Comic-Con Cosplay Catastrophe',
|
||||||
'description': 'Fans get creative this year at San Diego. Too',
|
'description': 'Fans get creative this year',
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||||
'file': '3505939.mp4',
|
|
||||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3505939',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Font Conference',
|
'title': 'Font Conference',
|
||||||
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
'description': 'This video wasn\'t long enough,',
|
||||||
'age_limit': 10,
|
'age_limit': 10,
|
||||||
|
'duration': 179,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# embedded youtube video
|
# embedded youtube video
|
||||||
@ -38,7 +42,7 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||||
'uploader': 'Funnyplox TV',
|
'uploader': 'Funnyplox TV',
|
||||||
'uploader_id': 'funnyploxtv',
|
'uploader_id': 'funnyploxtv',
|
||||||
'description': 'md5:506f69f7a297ed698ced3375f2363b0e',
|
'description': 'md5:11812366244110c3523968aa74f02521',
|
||||||
'upload_date': '20140128',
|
'upload_date': '20140128',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -82,6 +86,8 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(vdata.get('duration'), 1000)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': vdata['title'],
|
'title': vdata['title'],
|
||||||
@ -89,4 +95,5 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'thumbnail': vdata.get('thumbnail'),
|
'thumbnail': vdata.get('thumbnail'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
@ -271,8 +271,11 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_json(self, url_or_request, video_id,
|
def _download_json(self, url_or_request, video_id,
|
||||||
note=u'Downloading JSON metadata',
|
note=u'Downloading JSON metadata',
|
||||||
errnote=u'Unable to download JSON metadata'):
|
errnote=u'Unable to download JSON metadata',
|
||||||
|
transform_source=None):
|
||||||
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
if transform_source:
|
||||||
|
json_string = transform_source(json_string)
|
||||||
try:
|
try:
|
||||||
return json.loads(json_string)
|
return json.loads(json_string)
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
|
58
youtube_dl/extractor/elpais.py
Normal file
58
youtube_dl/extractor/elpais.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class ElPaisIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
|
||||||
|
IE_DESC = 'El País'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
|
||||||
|
'md5': '98406f301f19562170ec071b83433d55',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'tiempo-nuevo-recetas-viejas',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tiempo nuevo, recetas viejas',
|
||||||
|
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
|
||||||
|
'upload_date': '20140206',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
prefix = self._html_search_regex(
|
||||||
|
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
|
||||||
|
video_suffix = self._search_regex(
|
||||||
|
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
|
||||||
|
video_url = prefix + video_suffix
|
||||||
|
thumbnail_suffix = self._search_regex(
|
||||||
|
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
|
||||||
|
fatal=False)
|
||||||
|
thumbnail = (
|
||||||
|
None if thumbnail_suffix is None
|
||||||
|
else prefix + thumbnail_suffix)
|
||||||
|
title = self._html_search_regex(
|
||||||
|
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
|
||||||
|
webpage, 'title')
|
||||||
|
date_str = self._search_regex(
|
||||||
|
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
||||||
|
webpage, 'upload date', fatal=False)
|
||||||
|
upload_date = (None if date_str is None else unified_strdate(date_str))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
38
youtube_dl/extractor/firstpost.py
Normal file
38
youtube_dl/extractor/firstpost.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class FirstpostIE(InfoExtractor):
|
||||||
|
IE_NAME = 'Firstpost.com'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
|
||||||
|
'md5': 'ee9114957692f01fb1263ed87039112a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1025403',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
|
||||||
|
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<div.*?name="div_video".*?flashvars="([^"]+)">',
|
||||||
|
webpage, 'video URL')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
}
|
@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
|
|||||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||||
|
|
||||||
# Look for BrightCove:
|
# Look for BrightCove:
|
||||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
||||||
if bc_url is not None:
|
if bc_urls:
|
||||||
self.to_screen('Brightcove video detected.')
|
self.to_screen('Brightcove video detected.')
|
||||||
surl = smuggle_url(bc_url, {'Referer': url})
|
entries = [{
|
||||||
return self.url_result(surl, 'Brightcove')
|
'_type': 'url',
|
||||||
|
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||||
|
'ie_key': 'Brightcove'
|
||||||
|
} for bc_url in bc_urls]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': video_title,
|
||||||
|
'id': video_id,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
# Look for embedded (iframe) Vimeo player
|
# Look for embedded (iframe) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -8,32 +10,42 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GoogleSearchIE(SearchInfoExtractor):
|
class GoogleSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'Google Video search'
|
IE_DESC = 'Google Video search'
|
||||||
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
|
|
||||||
_MAX_RESULTS = 1000
|
_MAX_RESULTS = 1000
|
||||||
IE_NAME = u'video.google:search'
|
IE_NAME = 'video.google:search'
|
||||||
_SEARCH_KEY = 'gvsearch'
|
_SEARCH_KEY = 'gvsearch'
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
|
|
||||||
|
entries = []
|
||||||
res = {
|
res = {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': query,
|
'id': query,
|
||||||
'entries': []
|
'title': query,
|
||||||
}
|
}
|
||||||
|
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count():
|
||||||
result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
|
result_url = (
|
||||||
webpage = self._download_webpage(result_url, u'gvsearch:' + query,
|
'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
|
||||||
note='Downloading result page ' + str(pagenum))
|
% (compat_urllib_parse.quote_plus(query), pagenum * 10))
|
||||||
|
|
||||||
for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
|
webpage = self._download_webpage(
|
||||||
e = {
|
result_url, 'gvsearch:' + query,
|
||||||
|
note='Downloading result page ' + str(pagenum + 1))
|
||||||
|
|
||||||
|
for hit_idx, mobj in enumerate(re.finditer(
|
||||||
|
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||||
|
|
||||||
|
# Skip playlists
|
||||||
|
if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries.append({
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': mobj.group(1)
|
'url': mobj.group(1)
|
||||||
}
|
})
|
||||||
res['entries'].append(e)
|
|
||||||
|
|
||||||
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
|
if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
|
||||||
|
res['entries'] = entries[:n]
|
||||||
return res
|
return res
|
||||||
|
@ -1,39 +1,36 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class InaIE(InfoExtractor):
|
class InaIE(InfoExtractor):
|
||||||
"""Information Extractor for Ina.fr"""
|
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||||
u'file': u'I12055569.mp4',
|
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||||
u'md5': u'a667021bf2b41f8dc6049479d9bb38a3',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'I12055569',
|
||||||
u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\""
|
'ext': 'mp4',
|
||||||
|
'title': 'François Hollande "Je crois que c\'est clair"',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self,url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
|
mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
|
||||||
video_extension = 'mp4'
|
info_doc = self._download_xml(mrss_url, video_id)
|
||||||
webpage = self._download_webpage(mrss_url, video_id)
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
|
video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url']
|
||||||
webpage, u'video URL')
|
|
||||||
|
|
||||||
video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
|
return {
|
||||||
webpage, u'title')
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
return [{
|
'title': info_doc.find('.//title').text,
|
||||||
'id': video_id,
|
}
|
||||||
'url': video_url,
|
|
||||||
'ext': video_extension,
|
|
||||||
'title': video_title,
|
|
||||||
}]
|
|
||||||
|
85
youtube_dl/extractor/iprima.py
Normal file
85
youtube_dl/extractor/iprima.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
from random import random
|
||||||
|
from math import floor
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
|
class IPrimaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '39152',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Partička (92)',
|
||||||
|
'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
|
||||||
|
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
|
||||||
|
floor(random()*1073741824),
|
||||||
|
floor(random()*1073741824))
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(player_url)
|
||||||
|
req.add_header('Referer', url)
|
||||||
|
playerpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||||
|
|
||||||
|
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||||
|
|
||||||
|
if zoneGEO != '0':
|
||||||
|
base_url = base_url.replace('token', 'token_'+zoneGEO)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['lq', 'hq', 'hd']:
|
||||||
|
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||||
|
|
||||||
|
if filename == 'null':
|
||||||
|
continue
|
||||||
|
|
||||||
|
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
|
||||||
|
|
||||||
|
if format_id == 'lq':
|
||||||
|
quality = 0
|
||||||
|
elif format_id == 'hq':
|
||||||
|
quality = 1
|
||||||
|
elif format_id == 'hd':
|
||||||
|
quality = 2
|
||||||
|
filename = 'hq/'+filename
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': base_url,
|
||||||
|
'quality': quality,
|
||||||
|
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
|
||||||
|
'rtmp_live': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': real_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
@ -14,15 +14,16 @@ from ..utils import (
|
|||||||
class IviIE(InfoExtractor):
|
class IviIE(InfoExtractor):
|
||||||
IE_DESC = 'ivi.ru'
|
IE_DESC = 'ivi.ru'
|
||||||
IE_NAME = 'ivi'
|
IE_NAME = 'ivi'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Single movie
|
# Single movie
|
||||||
{
|
{
|
||||||
'url': 'http://www.ivi.ru/watch/53141',
|
'url': 'http://www.ivi.ru/watch/53141',
|
||||||
'file': '53141.mp4',
|
|
||||||
'md5': '6ff5be2254e796ed346251d117196cf4',
|
'md5': '6ff5be2254e796ed346251d117196cf4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '53141',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Иван Васильевич меняет профессию',
|
'title': 'Иван Васильевич меняет профессию',
|
||||||
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
||||||
'duration': 5498,
|
'duration': 5498,
|
||||||
@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
|
|||||||
# Serial's serie
|
# Serial's serie
|
||||||
{
|
{
|
||||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||||
'file': '74791.mp4',
|
|
||||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '74791',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Дежурный ангел - 1 серия',
|
'title': 'Дежурный ангел - 1 серия',
|
||||||
'duration': 2490,
|
'duration': 2490,
|
||||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||||
@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
|
|||||||
class IviCompilationIE(InfoExtractor):
|
class IviCompilationIE(InfoExtractor):
|
||||||
IE_DESC = 'ivi.ru compilations'
|
IE_DESC = 'ivi.ru compilations'
|
||||||
IE_NAME = 'ivi:compilation'
|
IE_NAME = 'ivi:compilation'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||||
|
|
||||||
def _extract_entries(self, html, compilation_id):
|
def _extract_entries(self, html, compilation_id):
|
||||||
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
||||||
|
49
youtube_dl/extractor/jadorecettepub.py
Normal file
49
youtube_dl/extractor/jadorecettepub.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
|
class JadoreCettePubIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
|
||||||
|
'md5': '401286a06067c70b44076044b66515de',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jLMja3tr7a4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La pire utilisation de Star Wars',
|
||||||
|
'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
|
||||||
|
webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
|
||||||
|
fatal=False)
|
||||||
|
real_url = self._search_regex(
|
||||||
|
r'\[/postlink\](.*)endofvid', webpage, 'video URL')
|
||||||
|
video_id = YoutubeIE.extract_id(real_url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': real_url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||||
u'file': u'5182.mp4',
|
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||||
u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '5182',
|
||||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
'ext': 'mp4',
|
||||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||||
|
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
xml_link = self._html_search_regex(
|
xml_link = self._html_search_regex(
|
||||||
r'<param name="flashvars" value="config=(.*?)" />',
|
r'<param name="flashvars" value="config=(.*?)" />',
|
||||||
webpage, u'config URL')
|
webpage, 'config URL')
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||||
xml_link, u'video ID')
|
xml_link, 'video ID')
|
||||||
|
|
||||||
config = self._download_xml(
|
config = self._download_xml(
|
||||||
xml_link, title, u'Downloading XML config')
|
xml_link, title, 'Downloading XML config')
|
||||||
info_json = config.find('format.json').text
|
info_json = config.find('format.json').text
|
||||||
info = json.loads(info_json)['versions'][0]
|
info = json.loads(info_json)['versions'][0]
|
||||||
|
|
||||||
|
66
youtube_dl/extractor/kontrtube.py
Normal file
66
youtube_dl/extractor/kontrtube.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class KontrTubeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'kontrtube'
|
||||||
|
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||||
|
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2678',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||||
|
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||||
|
'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
|
||||||
|
'duration': 270,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||||
|
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||||
|
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
||||||
|
'video title')
|
||||||
|
description = self._html_search_meta('description', webpage, 'video description')
|
||||||
|
|
||||||
|
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||||
|
webpage)
|
||||||
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
||||||
|
'view count', fatal=False)
|
||||||
|
view_count = int(view_count) if view_count is not None else None
|
||||||
|
|
||||||
|
comment_count = None
|
||||||
|
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
||||||
|
fatal=False)
|
||||||
|
if comment_str.startswith('комментариев нет'):
|
||||||
|
comment_count = 0
|
||||||
|
else:
|
||||||
|
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||||
|
if mobj:
|
||||||
|
comment_count = int(mobj.group('total'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
@ -31,7 +31,7 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
|
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||||
|
56
youtube_dl/extractor/m6.py
Normal file
56
youtube_dl/extractor/m6.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class M6IE(InfoExtractor):
|
||||||
|
IE_NAME = 'm6'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
|
||||||
|
'md5': '242994a87de2c316891428e0176bcb77',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11323908',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »',
|
||||||
|
'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
|
||||||
|
'duration': 100,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
|
||||||
|
'Downloading video RSS')
|
||||||
|
|
||||||
|
title = rss.find('./channel/item/title').text
|
||||||
|
description = rss.find('./channel/item/description').text
|
||||||
|
thumbnail = rss.find('./channel/item/visuel_clip_big').text
|
||||||
|
duration = int(rss.find('./channel/item/duration').text)
|
||||||
|
view_count = int(rss.find('./channel/item/nombre_vues').text)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['lq', 'sd', 'hq', 'hd']:
|
||||||
|
video_url = rss.find('./channel/item/url_video_%s' % format_id)
|
||||||
|
if video_url is None:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': video_url.text,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
114
youtube_dl/extractor/mooshare.py
Normal file
114
youtube_dl/extractor/mooshare.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MooshareIE(InfoExtractor):
|
||||||
|
IE_NAME = 'mooshare'
|
||||||
|
IE_DESC = 'Mooshare.biz'
|
||||||
|
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
|
||||||
|
'md5': '4e14f9562928aecd2e42c6f341c8feba',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8dqtk4bjbp8g',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Comedy Football 2011 - (part 1-2)',
|
||||||
|
'duration': 893,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://mooshare.biz/aipjtoc4g95j',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aipjtoc4g95j',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Orange Caramel Dashing Through the Snow',
|
||||||
|
'duration': 212,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
if re.search(r'>Video Not Found or Deleted<', page) is not None:
|
||||||
|
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
|
||||||
|
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
|
||||||
|
|
||||||
|
download_form = {
|
||||||
|
'op': 'download1',
|
||||||
|
'id': video_id,
|
||||||
|
'hash': hash_key,
|
||||||
|
}
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
|
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
|
||||||
|
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
|
||||||
|
duration = int(duration_str) if duration_str is not None else None
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# SD video
|
||||||
|
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'format_id': 'sd',
|
||||||
|
'format': 'SD',
|
||||||
|
})
|
||||||
|
|
||||||
|
# HD video
|
||||||
|
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'format_id': 'hd',
|
||||||
|
'format': 'HD',
|
||||||
|
})
|
||||||
|
|
||||||
|
# rtmp video
|
||||||
|
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('rtmpurl'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'format': 'HD',
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -82,10 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
title_el = find_xpath_attr(
|
title_el = find_xpath_attr(
|
||||||
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
'scheme', 'urn:mtvn:video_title')
|
'scheme', 'urn:mtvn:video_title')
|
||||||
if title_el is None:
|
|
||||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
|
||||||
if title_el is None:
|
if title_el is None:
|
||||||
title_el = itemdoc.find('.//title')
|
title_el = itemdoc.find('.//title')
|
||||||
|
if title_el.text is None:
|
||||||
|
title_el = None
|
||||||
|
if title_el is None:
|
||||||
|
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||||
|
|
||||||
title = title_el.text
|
title = title_el.text
|
||||||
if title is None:
|
if title is None:
|
||||||
raise ExtractorError('Could not find video title')
|
raise ExtractorError('Could not find video title')
|
||||||
|
89
youtube_dl/extractor/ndr.py
Normal file
89
youtube_dl/extractor/ndr.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class NDRIE(InfoExtractor):
|
||||||
|
IE_NAME = 'ndr'
|
||||||
|
IE_DESC = 'NDR.de - Mediathek'
|
||||||
|
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
# video
|
||||||
|
{
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
|
||||||
|
'md5': '20eba151ff165f386643dad9c1da08f7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '19925',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hallo Niedersachsen ',
|
||||||
|
'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
|
||||||
|
'duration': 1722,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# audio
|
||||||
|
{
|
||||||
|
'url': 'http://www.ndr.de/903/audio191719.html',
|
||||||
|
'md5': '41ed601768534dd18a9ae34d84798129',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '191719',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '"Es war schockierend"',
|
||||||
|
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
|
||||||
|
'duration': 112,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
title = self._og_search_title(page)
|
||||||
|
description = self._og_search_description(page)
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
|
||||||
|
page)
|
||||||
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||||
|
if mp3_url:
|
||||||
|
formats.append({
|
||||||
|
'url': mp3_url.group('audio'),
|
||||||
|
'format_id': 'mp3',
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
|
||||||
|
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||||
|
if video_url:
|
||||||
|
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
|
||||||
|
page, 'thumbnail', fatal=False)
|
||||||
|
if thumbnail:
|
||||||
|
thumbnail = 'http://www.ndr.de' + thumbnail
|
||||||
|
for format_id in ['lo', 'hi', 'hq']:
|
||||||
|
formats.append({
|
||||||
|
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
93
youtube_dl/extractor/nfb.py
Normal file
93
youtube_dl/extractor/nfb.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NFBIE(InfoExtractor):
|
||||||
|
IE_NAME = 'nfb'
|
||||||
|
IE_DESC = 'National Film Board of Canada'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'qallunaat_why_white_people_are_funny',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Qallunaat! Why White People Are Funny ',
|
||||||
|
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
|
||||||
|
'duration': 3128,
|
||||||
|
'uploader': 'Mark Sandiford',
|
||||||
|
'uploader_id': 'mark-sandiford',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
|
||||||
|
|
||||||
|
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||||
|
page, 'director id', fatal=False)
|
||||||
|
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||||
|
page, 'director name', fatal=False)
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||||
|
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||||
|
|
||||||
|
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||||
|
|
||||||
|
title = None
|
||||||
|
description = None
|
||||||
|
thumbnail = None
|
||||||
|
duration = None
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
def extract_thumbnail(media):
|
||||||
|
thumbnails = {}
|
||||||
|
for asset in media.findall('assets/asset'):
|
||||||
|
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||||
|
if not thumbnails:
|
||||||
|
return None
|
||||||
|
if 'high' in thumbnails:
|
||||||
|
return thumbnails['high']
|
||||||
|
return list(thumbnails.values())[0]
|
||||||
|
|
||||||
|
for media in config.findall('./player/stream/media'):
|
||||||
|
if media.get('type') == 'posterImage':
|
||||||
|
thumbnail = extract_thumbnail(media)
|
||||||
|
elif media.get('type') == 'video':
|
||||||
|
duration = int(media.get('duration'))
|
||||||
|
title = media.find('title').text
|
||||||
|
description = media.find('description').text
|
||||||
|
# It seems assets always go from lower to better quality, so no need to sort
|
||||||
|
formats = [{
|
||||||
|
'url': x.find('default/streamerURI').text + '/',
|
||||||
|
'play_path': x.find('default/url').text,
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': x.get('quality'),
|
||||||
|
} for x in media.findall('assets/asset')]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,34 +1,68 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class PBSIE(InfoExtractor):
|
class PBSIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?:
|
||||||
|
# Direct video URL
|
||||||
|
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||||
|
# Article with embedded player
|
||||||
|
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||||
|
# Player
|
||||||
|
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||||
u'file': u'2365006249.mp4',
|
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||||
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '2365006249',
|
||||||
u'title': u'A More Perfect Union',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
'title': 'A More Perfect Union',
|
||||||
u'duration': 3190,
|
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||||
|
'duration': 3190,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
presumptive_id = mobj.group('presumptive_id')
|
||||||
|
display_id = presumptive_id
|
||||||
|
if presumptive_id:
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
url = self._search_regex(
|
||||||
|
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||||
|
webpage, 'player URL')
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
player_id = mobj.group('player_id')
|
||||||
|
if not display_id:
|
||||||
|
display_id = player_id
|
||||||
|
if player_id:
|
||||||
|
player_page = self._download_webpage(
|
||||||
|
url, display_id, note='Downloading player page',
|
||||||
|
errnote='Could not download player page')
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
||||||
|
else:
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = video_id
|
||||||
|
|
||||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||||
info_page = self._download_webpage(info_url, video_id)
|
info = self._download_json(info_url, display_id)
|
||||||
info =json.loads(info_page)
|
|
||||||
return {'id': video_id,
|
return {
|
||||||
'title': info['title'],
|
'id': video_id,
|
||||||
'url': info['alternate_encoding']['url'],
|
'title': info['title'],
|
||||||
'ext': 'mp4',
|
'url': info['alternate_encoding']['url'],
|
||||||
'description': info['program'].get('description'),
|
'ext': 'mp4',
|
||||||
'thumbnail': info.get('image_url'),
|
'description': info['program'].get('description'),
|
||||||
'duration': info.get('duration'),
|
'thumbnail': info.get('image_url'),
|
||||||
}
|
'duration': info.get('duration'),
|
||||||
|
}
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||||
u'file': u'25665706.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '25665706',
|
||||||
u'title': u'Managing Scale and Complexity',
|
'ext': 'mp4',
|
||||||
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
|
'title': 'Managing Scale and Complexity',
|
||||||
|
'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
slideshare_obj = self._search_regex(
|
slideshare_obj = self._search_regex(
|
||||||
r'var slideshare_object = ({.*?}); var user_info =',
|
r'var slideshare_object = ({.*?}); var user_info =',
|
||||||
webpage, u'slideshare object')
|
webpage, 'slideshare object')
|
||||||
info = json.loads(slideshare_obj)
|
info = json.loads(slideshare_obj)
|
||||||
if info['slideshow']['type'] != u'video':
|
if info['slideshow']['type'] != 'video':
|
||||||
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||||
|
|
||||||
doc = info['doc']
|
doc = info['doc']
|
||||||
bucket = info['jsplayer']['video_bucket']
|
bucket = info['jsplayer']['video_bucket']
|
||||||
ext = info['jsplayer']['video_extension']
|
ext = info['jsplayer']['video_extension']
|
||||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor):
|
|||||||
'ext': ext,
|
'ext': ext,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': info['slideshow']['pin_image_url'],
|
'thumbnail': info['slideshow']['pin_image_url'],
|
||||||
'description': self._og_search_description(webpage),
|
'description': description,
|
||||||
}
|
}
|
||||||
|
@ -1,36 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class StatigramIE(InfoExtractor):
|
class StatigramIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
|
_VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://statigr.am/p/522207370455279102_24101272',
|
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
||||||
u'file': u'522207370455279102_24101272.mp4',
|
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
||||||
u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '522207370455279102_24101272',
|
||||||
u'uploader_id': u'aguynamedpatrick',
|
'ext': 'mp4',
|
||||||
u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
'uploader_id': 'aguynamedpatrick',
|
||||||
|
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
html_title = self._html_search_regex(
|
html_title = self._html_search_regex(
|
||||||
r'<title>(.+?)</title>',
|
r'<title>(.+?)</title>',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
|
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'@([^ ]+)', title, u'uploader name', fatal=False)
|
r'@([^ ]+)', title, 'uploader name', fatal=False)
|
||||||
ext = 'mp4'
|
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': self._og_search_video_url(webpage),
|
'url': self._og_search_video_url(webpage),
|
||||||
'ext': ext,
|
'title': title,
|
||||||
'title': title,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id' : uploader_id
|
'uploader_id': uploader_id
|
||||||
}]
|
}
|
||||||
|
@ -68,13 +68,14 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
def _request_subtitle_url(self, sub_lang, url):
|
def _request_subtitle_url(self, sub_lang, url):
|
||||||
""" makes the http request for the subtitle """
|
""" makes the http request for the subtitle """
|
||||||
try:
|
try:
|
||||||
return self._download_subtitle_url(sub_lang, url)
|
sub = self._download_subtitle_url(sub_lang, url)
|
||||||
except ExtractorError as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||||
return
|
return
|
||||||
if not sub:
|
if not sub:
|
||||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||||
return
|
return
|
||||||
|
return sub
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
|
@ -1,22 +1,23 @@
|
|||||||
#coding: utf-8
|
#coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import determine_ext
|
||||||
determine_ext,
|
|
||||||
)
|
|
||||||
|
|
||||||
class ThisAVIE(InfoExtractor):
|
class ThisAVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
|
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
|
||||||
u"file": u"47734.flv",
|
'md5': '0480f1ef3932d901f0e0e719f188f19b',
|
||||||
u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '47734',
|
||||||
u"title": u"高樹マリア - Just fit",
|
'ext': 'flv',
|
||||||
u"uploader": u"dj7970",
|
'title': '高樹マリア - Just fit',
|
||||||
u"uploader_id": u"dj7970"
|
'uploader': 'dj7970',
|
||||||
|
'uploader_id': 'dj7970'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
|
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r"addVariable\('file','([^']+)'\);", webpage, u'video url')
|
r"addVariable\('file','([^']+)'\);", webpage, 'video url')
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||||
webpage, u'uploader name', fatal=False)
|
webpage, 'uploader name', fatal=False)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||||
webpage, u'uploader id', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -9,25 +11,25 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class TouTvIE(InfoExtractor):
|
class TouTvIE(InfoExtractor):
|
||||||
IE_NAME = u'tou.tv'
|
IE_NAME = 'tou.tv'
|
||||||
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.tou.tv/30-vies/S04E41',
|
'url': 'http://www.tou.tv/30-vies/S04E41',
|
||||||
u'file': u'30-vies_S04E41.mp4',
|
'file': '30-vies_S04E41.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'30 vies Saison 4 / Épisode 41',
|
'title': '30 vies Saison 4 / Épisode 41',
|
||||||
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
|
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||||
u'age_limit': 8,
|
'age_limit': 8,
|
||||||
u'uploader': u'Groupe des Nouveaux Médias',
|
'uploader': 'Groupe des Nouveaux Médias',
|
||||||
u'duration': 1296,
|
'duration': 1296,
|
||||||
u'upload_date': u'20131118',
|
'upload_date': '20131118',
|
||||||
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
u'skip_download': True, # Requires rtmpdump
|
'skip_download': True, # Requires rtmpdump
|
||||||
},
|
},
|
||||||
u'skip': 'Only available in Canada'
|
'skip': 'Only available in Canada'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -36,25 +38,25 @@ class TouTvIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
mediaId = self._search_regex(
|
mediaId = self._search_regex(
|
||||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
r'"idMedia":\s*"([^"]+)"', webpage, 'media ID')
|
||||||
|
|
||||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||||
streams_doc = self._download_xml(
|
streams_doc = self._download_xml(
|
||||||
streams_url, video_id, note=u'Downloading stream list')
|
streams_url, video_id, note='Downloading stream list')
|
||||||
|
|
||||||
video_url = next(n.text
|
video_url = next(n.text
|
||||||
for n in streams_doc.findall('.//choice/url')
|
for n in streams_doc.findall('.//choice/url')
|
||||||
if u'//ad.doubleclick' not in n.text)
|
if '//ad.doubleclick' not in n.text)
|
||||||
if video_url.endswith('/Unavailable.flv'):
|
if video_url.endswith('/Unavailable.flv'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
u'Access to this video is blocked from outside of Canada',
|
'Access to this video is blocked from outside of Canada',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
duration_str = self._html_search_meta(
|
duration_str = self._html_search_meta(
|
||||||
'video:duration', webpage, u'duration')
|
'video:duration', webpage, 'duration')
|
||||||
duration = int(duration_str) if duration_str else None
|
duration = int(duration_str) if duration_str else None
|
||||||
upload_date_str = self._html_search_meta(
|
upload_date_str = self._html_search_meta(
|
||||||
'video:release_date', webpage, u'upload date')
|
'video:release_date', webpage, 'upload date')
|
||||||
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class TrailerAddictIE(InfoExtractor):
|
class TrailerAddictIE(InfoExtractor):
|
||||||
|
_WORKING = False
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||||
|
@ -11,7 +11,7 @@ from ..aes import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class Tube8IE(InfoExtractor):
|
class Tube8IE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||||
u'file': u'229795.mp4',
|
u'file': u'229795.mp4',
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -10,45 +13,44 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class Vbox7IE(InfoExtractor):
|
class Vbox7IE(InfoExtractor):
|
||||||
"""Information Extractor for Vbox7"""
|
_VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://vbox7.com/play:249bb972c2',
|
'url': 'http://vbox7.com/play:249bb972c2',
|
||||||
u'file': u'249bb972c2.flv',
|
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||||
u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '249bb972c2',
|
||||||
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
|
'ext': 'flv',
|
||||||
}
|
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self,url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
video_id = mobj.group('id')
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
||||||
new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
|
new_location = self._search_regex(r'window\.location = \'(.*)\';',
|
||||||
|
redirect_page, 'redirect location')
|
||||||
redirect_url = urlh.geturl() + new_location
|
redirect_url = urlh.geturl() + new_location
|
||||||
webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
|
webpage = self._download_webpage(redirect_url, video_id,
|
||||||
|
'Downloading redirect page')
|
||||||
|
|
||||||
title = self._html_search_regex(r'<title>(.*)</title>',
|
title = self._html_search_regex(r'<title>(.*)</title>',
|
||||||
webpage, u'title').split('/')[0].strip()
|
webpage, 'title').split('/')[0].strip()
|
||||||
|
|
||||||
ext = "flv"
|
|
||||||
info_url = "http://vbox7.com/play/magare.do"
|
info_url = "http://vbox7.com/play/magare.do"
|
||||||
data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
|
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
|
||||||
info_request = compat_urllib_request.Request(info_url, data)
|
info_request = compat_urllib_request.Request(info_url, data)
|
||||||
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
|
info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage')
|
||||||
if info_response is None:
|
if info_response is None:
|
||||||
raise ExtractorError(u'Unable to extract the media url')
|
raise ExtractorError('Unable to extract the media url')
|
||||||
(final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
|
(final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': final_url,
|
'url': final_url,
|
||||||
'ext': ext,
|
'ext': 'flv',
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': thumbnail_url,
|
||||||
}]
|
}
|
||||||
|
@ -6,10 +6,10 @@ import re
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
|
||||||
clean_html,
|
clean_html,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -19,7 +19,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VimeoIE(InfoExtractor):
|
class VimeoIE(SubtitlesInfoExtractor):
|
||||||
"""Information extractor for vimeo.com."""
|
"""Information extractor for vimeo.com."""
|
||||||
|
|
||||||
# _VALID_URL matches Vimeo URLs
|
# _VALID_URL matches Vimeo URLs
|
||||||
@ -84,6 +84,20 @@ class VimeoIE(InfoExtractor):
|
|||||||
'videopassword': 'youtube-dl',
|
'videopassword': 'youtube-dl',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://vimeo.com/76979871',
|
||||||
|
'md5': '3363dd6ffebe3784d56f4132317fd446',
|
||||||
|
'note': 'Video with subtitles',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '76979871',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The New Vimeo Player (You Know, For Videos)',
|
||||||
|
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
|
||||||
|
'upload_date': '20131015',
|
||||||
|
'uploader_id': 'staff',
|
||||||
|
'uploader': 'Vimeo Staff',
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@ -273,19 +287,31 @@ class VimeoIE(InfoExtractor):
|
|||||||
if len(formats) == 0:
|
if len(formats) == 0:
|
||||||
raise ExtractorError('No known codec found')
|
raise ExtractorError('No known codec found')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
text_tracks = config['request'].get('text_tracks')
|
||||||
|
if text_tracks:
|
||||||
|
for tt in text_tracks:
|
||||||
|
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
|
||||||
|
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'webpage_url': url,
|
'webpage_url': url,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
|
'subtitles': video_subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,18 +1,21 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class VineIE(InfoExtractor):
|
class VineIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'https://vine.co/v/b9KOOWX7HUx',
|
'url': 'https://vine.co/v/b9KOOWX7HUx',
|
||||||
u'file': u'b9KOOWX7HUx.mp4',
|
'md5': '2f36fed6235b16da96ce9b4dc890940d',
|
||||||
u'md5': u'2f36fed6235b16da96ce9b4dc890940d',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'b9KOOWX7HUx',
|
||||||
u"uploader": u"Jack Dorsey",
|
'ext': 'mp4',
|
||||||
u"title": u"Chicken."
|
'uploader': 'Jack Dorsey',
|
||||||
}
|
'title': 'Chicken.',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -24,17 +27,17 @@ class VineIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"',
|
video_url = self._html_search_meta('twitter:player:stream', webpage,
|
||||||
webpage, u'video URL')
|
'video URL')
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
|
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}]
|
}
|
||||||
|
@ -34,6 +34,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
|
uppercase_escape,
|
||||||
)
|
)
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
@ -136,7 +137,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
||||||
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
||||||
(?:www\.)?pwnyoutube\.com|
|
(?:www\.)?pwnyoutube\.com/|
|
||||||
tube\.majestyc\.net/|
|
tube\.majestyc\.net/|
|
||||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
@ -502,7 +503,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
return a % b
|
return a % b
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||||
if m:
|
if m:
|
||||||
fname = m.group('func')
|
fname = m.group('func')
|
||||||
if fname not in functions:
|
if fname not in functions:
|
||||||
@ -1085,8 +1086,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
self._downloader.report_warning(err_msg)
|
self._downloader.report_warning(err_msg)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _extract_id(self, url):
|
@classmethod
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
def extract_id(cls, url):
|
||||||
|
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
video_id = mobj.group(2)
|
video_id = mobj.group(2)
|
||||||
@ -1115,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
mobj = re.search(self._NEXT_URL_RE, url)
|
mobj = re.search(self._NEXT_URL_RE, url)
|
||||||
if mobj:
|
if mobj:
|
||||||
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||||
video_id = self._extract_id(url)
|
video_id = self.extract_id(url)
|
||||||
|
|
||||||
# Get video webpage
|
# Get video webpage
|
||||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||||
@ -1422,7 +1424,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com playlists'
|
IE_DESC = u'YouTube.com playlists'
|
||||||
_VALID_URL = r"""(?:
|
_VALID_URL = r"""(?x)(?:
|
||||||
(?:https?://)?
|
(?:https?://)?
|
||||||
(?:\w+\.)?
|
(?:\w+\.)?
|
||||||
youtube\.com/
|
youtube\.com/
|
||||||
@ -1431,7 +1433,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
\? (?:.*?&)*? (?:p|a|list)=
|
\? (?:.*?&)*? (?:p|a|list)=
|
||||||
| p/
|
| p/
|
||||||
)
|
)
|
||||||
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
|
(
|
||||||
|
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
|
||||||
|
# Top tracks, they can also include dots
|
||||||
|
|(?:MC)[\w\.]*
|
||||||
|
)
|
||||||
.*
|
.*
|
||||||
|
|
|
|
||||||
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
||||||
@ -1441,11 +1447,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -1469,7 +1470,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
playlist_id = mobj.group(1) or mobj.group(2)
|
playlist_id = mobj.group(1) or mobj.group(2)
|
||||||
@ -1590,10 +1591,9 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
# Download all channel pages using the json-based channel_ajax query
|
# Download all channel pages using the json-based channel_ajax query
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||||
page = self._download_webpage(url, channel_id,
|
page = self._download_json(
|
||||||
u'Downloading page #%s' % pagenum)
|
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
||||||
|
transform_source=uppercase_escape)
|
||||||
page = json.loads(page)
|
|
||||||
|
|
||||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||||
video_ids.extend(ids_in_page)
|
video_ids.extend(ids_in_page)
|
||||||
|
@ -751,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
https_request = http_request
|
https_request = http_request
|
||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
def unified_strdate(date_str):
|
def unified_strdate(date_str):
|
||||||
"""Return a string with the date in the format YYYYMMDD"""
|
"""Return a string with the date in the format YYYYMMDD"""
|
||||||
upload_date = None
|
upload_date = None
|
||||||
#Replace commas
|
#Replace commas
|
||||||
date_str = date_str.replace(',',' ')
|
date_str = date_str.replace(',', ' ')
|
||||||
# %z (UTC offset) is only supported in python>=3.2
|
# %z (UTC offset) is only supported in python>=3.2
|
||||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
|
||||||
format_expressions = [
|
format_expressions = [
|
||||||
'%d %B %Y',
|
'%d %B %Y',
|
||||||
'%B %d %Y',
|
'%B %d %Y',
|
||||||
@ -771,11 +772,12 @@ def unified_strdate(date_str):
|
|||||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||||
'%Y-%m-%dT%H:%M:%S',
|
'%Y-%m-%dT%H:%M:%S',
|
||||||
|
'%Y-%m-%dT%H:%M',
|
||||||
]
|
]
|
||||||
for expression in format_expressions:
|
for expression in format_expressions:
|
||||||
try:
|
try:
|
||||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||||
except:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
if upload_date is None:
|
if upload_date is None:
|
||||||
timetuple = email.utils.parsedate_tz(date_str)
|
timetuple = email.utils.parsedate_tz(date_str)
|
||||||
@ -1212,3 +1214,9 @@ class PagedList(object):
|
|||||||
if end == nextfirstid:
|
if end == nextfirstid:
|
||||||
break
|
break
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def uppercase_escape(s):
|
||||||
|
return re.sub(
|
||||||
|
r'\\U([0-9a-fA-F]{8})',
|
||||||
|
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.02.03'
|
__version__ = '2014.02.10'
|
||||||
|
Reference in New Issue
Block a user