Compare commits
90 Commits
2014.02.04
...
2014.02.13
Author | SHA1 | Date | |
---|---|---|---|
76e461f395 | |||
1074982e6e | |||
29b2aaf035 | |||
6f90d098c5 | |||
0715161450 | |||
896583517f | |||
713d31fac8 | |||
96cb10a5f5 | |||
c207c1044e | |||
79629ec717 | |||
008fda0f08 | |||
0ae6b01937 | |||
def630e523 | |||
c5ba203e23 | |||
2317e6b2b3 | |||
cb38928974 | |||
fa78f13302 | |||
18395217c4 | |||
34bd987811 | |||
af6ba6a1c4 | |||
85409a0c69 | |||
ebfe352b62 | |||
fde56d2f17 | |||
3501423dfe | |||
0de668af51 | |||
2a584ea90a | |||
0f6ed94a15 | |||
bcb891e82b | |||
ac6e4ca1ed | |||
2e20bba708 | |||
e70dc1d14b | |||
0793a7b3c7 | |||
026fcc0495 | |||
81c2f20b53 | |||
1afe753462 | |||
524c2c716a | |||
b542d4bbd7 | |||
17968e444c | |||
2e3fd9ec2f | |||
d6a283b025 | |||
9766538124 | |||
98dbee8681 | |||
e421491b3b | |||
6828d37c41 | |||
bf5f610099 | |||
8b7f73404a | |||
85cacb2f51 | |||
b3fa3917e2 | |||
082c6c867a | |||
03fcf1ab57 | |||
3b00dea5eb | |||
8bc6c8e3c0 | |||
79bc27b53a | |||
84dd703199 | |||
c6fdba23a6 | |||
b19fe521a9 | |||
c1e672d121 | |||
f4371f4784 | |||
d914d9d187 | |||
845d14d377 | |||
4a9540b6d2 | |||
9f31be7000 | |||
41fa1b627d | |||
c0c4e66b29 | |||
cd8662de22 | |||
3587159614 | |||
d67cc9fa7c | |||
bf3a2fe923 | |||
e9ea0bf123 | |||
63424b6233 | |||
0bf35c5cf5 | |||
95c29381eb | |||
94c4abce7f | |||
f2dffe55f8 | |||
46a073bfac | |||
df872ec4e7 | |||
5de90176d9 | |||
dcf3eec47a | |||
e9e4f30d26 | |||
83cebd73d4 | |||
1df4229bd7 | |||
3c995527e9 | |||
7c62b568a2 | |||
ccf9114e84 | |||
d8061908bb | |||
211e17dd43 | |||
6cb38a9994 | |||
fa7df757a7 | |||
8c82077619 | |||
e5d1f9e50a |
@ -1,5 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@ -13,6 +15,7 @@ from youtube_dl.extractor import (
|
|||||||
FacebookIE,
|
FacebookIE,
|
||||||
gen_extractors,
|
gen_extractors,
|
||||||
JustinTVIE,
|
JustinTVIE,
|
||||||
|
PBSIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -29,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_youtube_playlist_matching(self):
|
def test_youtube_playlist_matching(self):
|
||||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||||
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||||
assertPlaylist(u'PL63F0C78739B09958')
|
assertPlaylist('PL63F0C78739B09958')
|
||||||
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||||
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
|
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||||
|
# Top tracks
|
||||||
|
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||||
|
|
||||||
def test_youtube_matching(self):
|
def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||||
@ -80,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
@ -89,7 +94,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
def test_facebook_matching(self):
|
def test_facebook_matching(self):
|
||||||
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
@ -124,5 +129,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
||||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
||||||
|
|
||||||
|
def test_pbs(self):
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/2350
|
||||||
|
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -22,6 +22,7 @@ import socket
|
|||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
compat_http_client,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
@ -110,7 +111,7 @@ def generator(test_case):
|
|||||||
ydl.download([test_case['url']])
|
ydl.download([test_case['url']])
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if try_num == RETRIES:
|
if try_num == RETRIES:
|
||||||
|
@ -34,6 +34,7 @@ from youtube_dl.extractor import (
|
|||||||
KhanAcademyIE,
|
KhanAcademyIE,
|
||||||
EveryonesMixtapeIE,
|
EveryonesMixtapeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
GoogleSearchIE,
|
||||||
GenericIE,
|
GenericIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -240,6 +241,14 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
||||||
self.assertEqual(len(result['entries']), 3)
|
self.assertEqual(len(result['entries']), 3)
|
||||||
|
|
||||||
|
def test_GoogleSearch(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = GoogleSearchIE(dl)
|
||||||
|
result = ie.extract('gvsearch15:python language')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'python language')
|
||||||
|
self.assertEqual(result['title'], 'python language')
|
||||||
|
self.assertTrue(len(result['entries']) == 15)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -127,6 +127,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||||
|
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = u'''<root>
|
testxml = u'''<root>
|
||||||
|
@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], 'ytdl test PL')
|
self.assertEqual(result['title'], 'ytdl test PL')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
||||||
|
|
||||||
def test_youtube_playlist_noplaylist(self):
|
def test_youtube_playlist_noplaylist(self):
|
||||||
@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertEqual(result['_type'], 'url')
|
self.assertEqual(result['_type'], 'url')
|
||||||
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
|
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||||
|
|
||||||
def test_issue_673(self):
|
def test_issue_673(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||||
|
|
||||||
@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
# TODO find a > 100 (paginating?) videos course
|
# TODO find a > 100 (paginating?) videos course
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||||
|
|
||||||
def test_youtube_channel(self):
|
def test_youtube_channel(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
original_video = entries[0]
|
original_video = entries[0]
|
||||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||||
|
|
||||||
|
def test_youtube_toptracks(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubePlaylistIE(dl)
|
||||||
|
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertEqual(len(entries), 100)
|
||||||
|
|
||||||
def test_youtube_toplist(self):
|
def test_youtube_toplist(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeTopListIE(dl)
|
ie = YoutubeTopListIE(dl)
|
||||||
|
@ -41,6 +41,7 @@ __authors__ = (
|
|||||||
'Chris Gahan',
|
'Chris Gahan',
|
||||||
'Saimadhav Heblikar',
|
'Saimadhav Heblikar',
|
||||||
'Mike Col',
|
'Mike Col',
|
||||||
|
'Andreas Schmitz',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
|
|||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
player_url = info_dict.get('player_url', None)
|
player_url = info_dict.get('player_url', None)
|
||||||
page_url = info_dict.get('page_url', None)
|
page_url = info_dict.get('page_url', None)
|
||||||
|
app = info_dict.get('app', None)
|
||||||
play_path = info_dict.get('play_path', None)
|
play_path = info_dict.get('play_path', None)
|
||||||
tc_url = info_dict.get('tc_url', None)
|
tc_url = info_dict.get('tc_url', None)
|
||||||
|
flash_version = info_dict.get('flash_version', None)
|
||||||
live = info_dict.get('rtmp_live', False)
|
live = info_dict.get('rtmp_live', False)
|
||||||
conn = info_dict.get('rtmp_conn', None)
|
conn = info_dict.get('rtmp_conn', None)
|
||||||
|
|
||||||
@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
|
|||||||
basic_args += ['--swfVfy', player_url]
|
basic_args += ['--swfVfy', player_url]
|
||||||
if page_url is not None:
|
if page_url is not None:
|
||||||
basic_args += ['--pageUrl', page_url]
|
basic_args += ['--pageUrl', page_url]
|
||||||
|
if app is not None:
|
||||||
|
basic_args += ['--app', app]
|
||||||
if play_path is not None:
|
if play_path is not None:
|
||||||
basic_args += ['--playpath', play_path]
|
basic_args += ['--playpath', play_path]
|
||||||
if tc_url is not None:
|
if tc_url is not None:
|
||||||
basic_args += ['--tcUrl', url]
|
basic_args += ['--tcUrl', url]
|
||||||
if test:
|
if test:
|
||||||
basic_args += ['--stop', '1']
|
basic_args += ['--stop', '1']
|
||||||
|
if flash_version is not None:
|
||||||
|
basic_args += ['--flashVer', flash_version]
|
||||||
if live:
|
if live:
|
||||||
basic_args += ['--live']
|
basic_args += ['--live']
|
||||||
if conn:
|
if conn:
|
||||||
|
@ -15,6 +15,7 @@ from .arte import (
|
|||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
|
from .bbccouk import BBCCoUkIE
|
||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
@ -25,12 +26,16 @@ from .canalplus import CanalplusIE
|
|||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
|
from .chilloutzone import ChilloutzoneIE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
from .cmt import CMTIE
|
from .cmt import CMTIE
|
||||||
from .cnn import CNNIE
|
from .cnn import (
|
||||||
|
CNNIE,
|
||||||
|
CNNBlogsIE,
|
||||||
|
)
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
@ -54,12 +59,15 @@ from .ebaumsworld import EbaumsWorldIE
|
|||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
|
from .elpais import ElPaisIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
from .extremetube import ExtremeTubeIE
|
from .extremetube import ExtremeTubeIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
|
from .firstpost import FirstpostIE
|
||||||
|
from .firsttv import FirstTVIE
|
||||||
from .fktv import (
|
from .fktv import (
|
||||||
FKTVIE,
|
FKTVIE,
|
||||||
FKTVPosteckeIE,
|
FKTVPosteckeIE,
|
||||||
@ -101,6 +109,7 @@ from .ivi import (
|
|||||||
IviIE,
|
IviIE,
|
||||||
IviCompilationIE
|
IviCompilationIE
|
||||||
)
|
)
|
||||||
|
from .jadorecettepub import JadoreCettePubIE
|
||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .justintv import JustinTVIE
|
from .justintv import JustinTVIE
|
||||||
@ -110,6 +119,7 @@ from .keezmovies import KeezMoviesIE
|
|||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import KhanAcademyIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
|
from .kontrtube import KontrTubeIE
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
from .lifenews import LifeNewsIE
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
@ -118,6 +128,7 @@ from .lynda import (
|
|||||||
LyndaIE,
|
LyndaIE,
|
||||||
LyndaCourseIE
|
LyndaCourseIE
|
||||||
)
|
)
|
||||||
|
from .m6 import M6IE
|
||||||
from .macgamestore import MacGameStoreIE
|
from .macgamestore import MacGameStoreIE
|
||||||
from .malemotion import MalemotionIE
|
from .malemotion import MalemotionIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
@ -139,8 +150,10 @@ from .myvideo import MyVideoIE
|
|||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import NBCNewsIE
|
from .nbc import NBCNewsIE
|
||||||
|
from .ndr import NDRIE
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
|
from .nfb import NFBIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
@ -195,6 +208,7 @@ from .stanfordoc import StanfordOpenClassroomIE
|
|||||||
from .statigram import StatigramIE
|
from .statigram import StatigramIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
|
from .streamcz import StreamCZIE
|
||||||
from .sztvhu import SztvHuIE
|
from .sztvhu import SztvHuIE
|
||||||
from .teamcoco import TeamcocoIE
|
from .teamcoco import TeamcocoIE
|
||||||
from .techtalks import TechTalksIE
|
from .techtalks import TechTalksIE
|
||||||
@ -216,6 +230,7 @@ from .ustream import UstreamIE, UstreamChannelIE
|
|||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .veehd import VeeHDIE
|
from .veehd import VeeHDIE
|
||||||
from .veoh import VeohIE
|
from .veoh import VeohIE
|
||||||
|
from .vesti import VestiIE
|
||||||
from .vevo import VevoIE
|
from .vevo import VevoIE
|
||||||
from .vice import ViceIE
|
from .vice import ViceIE
|
||||||
from .viddler import ViddlerIE
|
from .viddler import ViddlerIE
|
||||||
|
217
youtube_dl/extractor/bbccouk.py
Normal file
217
youtube_dl/extractor/bbccouk.py
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||||
|
IE_NAME = 'bbc.co.uk'
|
||||||
|
IE_DESC = 'BBC iPlayer'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p01q7wz4',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
|
||||||
|
'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
|
||||||
|
'duration': 1936,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00yng1d',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||||
|
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||||
|
'duration': 1800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00yng1d',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||||
|
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||||
|
'duration': 5100,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_asx_playlist(self, connection, programme_id):
|
||||||
|
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||||
|
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||||
|
|
||||||
|
def _extract_connection(self, connection, programme_id):
|
||||||
|
formats = []
|
||||||
|
protocol = connection.get('protocol')
|
||||||
|
supplier = connection.get('supplier')
|
||||||
|
if protocol == 'http':
|
||||||
|
href = connection.get('href')
|
||||||
|
# ASX playlist
|
||||||
|
if supplier == 'asx':
|
||||||
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
formats.append({
|
||||||
|
'url': ref,
|
||||||
|
'format_id': 'ref%s_%s' % (i, supplier),
|
||||||
|
})
|
||||||
|
# Direct link
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': href,
|
||||||
|
'format_id': supplier,
|
||||||
|
})
|
||||||
|
elif protocol == 'rtmp':
|
||||||
|
application = connection.get('application', 'ondemand')
|
||||||
|
auth_string = connection.get('authString')
|
||||||
|
identifier = connection.get('identifier')
|
||||||
|
server = connection.get('server')
|
||||||
|
formats.append({
|
||||||
|
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||||
|
'play_path': identifier,
|
||||||
|
'app': '%s?%s' % (application, auth_string),
|
||||||
|
'page_url': 'http://www.bbc.co.uk',
|
||||||
|
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': supplier,
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_items(self, playlist):
|
||||||
|
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||||
|
|
||||||
|
def _extract_medias(self, media_selection):
|
||||||
|
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||||
|
|
||||||
|
def _extract_connections(self, media):
|
||||||
|
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||||
|
|
||||||
|
def _extract_video(self, media, programme_id):
|
||||||
|
formats = []
|
||||||
|
vbr = int(media.get('bitrate'))
|
||||||
|
vcodec = media.get('encoding')
|
||||||
|
service = media.get('service')
|
||||||
|
width = int(media.get('width'))
|
||||||
|
height = int(media.get('height'))
|
||||||
|
file_size = int(media.get('media_file_size'))
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
|
for format in conn_formats:
|
||||||
|
format.update({
|
||||||
|
'format_id': '%s_%s' % (service, format['format_id']),
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': vbr,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'filesize': file_size,
|
||||||
|
})
|
||||||
|
formats.extend(conn_formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_audio(self, media, programme_id):
|
||||||
|
formats = []
|
||||||
|
abr = int(media.get('bitrate'))
|
||||||
|
acodec = media.get('encoding')
|
||||||
|
service = media.get('service')
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
|
for format in conn_formats:
|
||||||
|
format.update({
|
||||||
|
'format_id': '%s_%s' % (service, format['format_id']),
|
||||||
|
'abr': abr,
|
||||||
|
'acodec': acodec,
|
||||||
|
})
|
||||||
|
formats.extend(conn_formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_captions(self, media, programme_id):
|
||||||
|
subtitles = {}
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||||
|
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
|
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||||
|
srt = ''
|
||||||
|
for pos, p in enumerate(ps):
|
||||||
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
||||||
|
p.text.strip() if p.text is not None else '')
|
||||||
|
subtitles[lang] = srt
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
group_id = mobj.group('id')
|
||||||
|
|
||||||
|
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||||
|
'Downloading playlist XML')
|
||||||
|
|
||||||
|
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||||
|
if no_items is not None:
|
||||||
|
reason = no_items.get('reason')
|
||||||
|
if reason == 'preAvailability':
|
||||||
|
msg = 'Episode %s is not yet available' % group_id
|
||||||
|
elif reason == 'postAvailability':
|
||||||
|
msg = 'Episode %s is no longer available' % group_id
|
||||||
|
else:
|
||||||
|
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
subtitles = None
|
||||||
|
|
||||||
|
for item in self._extract_items(playlist):
|
||||||
|
kind = item.get('kind')
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||||
|
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||||
|
|
||||||
|
programme_id = item.get('identifier')
|
||||||
|
duration = int(item.get('duration'))
|
||||||
|
|
||||||
|
media_selection = self._download_xml(
|
||||||
|
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||||
|
programme_id, 'Downloading media selection XML')
|
||||||
|
|
||||||
|
for media in self._extract_medias(media_selection):
|
||||||
|
kind = media.get('kind')
|
||||||
|
if kind == 'audio':
|
||||||
|
formats.extend(self._extract_audio(media, programme_id))
|
||||||
|
elif kind == 'video':
|
||||||
|
formats.extend(self._extract_video(media, programme_id))
|
||||||
|
elif kind == 'captions':
|
||||||
|
subtitles = self._extract_captions(media, programme_id)
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(programme_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@ -24,5 +24,7 @@ class BloombergIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
ooyala_url = self._twitter_search_player(webpage)
|
embed_code = self._search_regex(
|
||||||
return self.url_result(ooyala_url, OoyalaIE.ie_key())
|
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
||||||
|
'embed code')
|
||||||
|
return OoyalaIE._build_url_result(embed_code)
|
||||||
|
@ -1,18 +1,20 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
|
||||||
|
|
||||||
|
|
||||||
class BreakIE(InfoExtractor):
|
class BreakIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)'
|
_VALID_URL = r'http://(?:www\.)?break\.com/video/([^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||||
u'file': u'2468056.mp4',
|
'md5': 'a3513fb1547fba4fb6cfac1bffc6c46b',
|
||||||
u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '2468056',
|
||||||
u"title": u"When Girls Act Like D-Bags"
|
'ext': 'mp4',
|
||||||
|
'title': 'When Girls Act Like D-Bags',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,17 +24,16 @@ class BreakIE(InfoExtractor):
|
|||||||
embed_url = 'http://www.break.com/embed/%s' % video_id
|
embed_url = 'http://www.break.com/embed/%s' % video_id
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
||||||
u'info json', flags=re.DOTALL)
|
'info json', flags=re.DOTALL)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
video_url = info['videoUri']
|
video_url = info['videoUri']
|
||||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
||||||
if m_youtube is not None:
|
if m_youtube is not None:
|
||||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||||
final_url = video_url + '?' + info['AuthToken']
|
final_url = video_url + '?' + info['AuthToken']
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': final_url,
|
'url': final_url,
|
||||||
'ext': determine_ext(final_url),
|
'title': info['contentName'],
|
||||||
'title': info['contentName'],
|
|
||||||
'thumbnail': info['thumbUri'],
|
'thumbnail': info['thumbUri'],
|
||||||
}]
|
}
|
||||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -139,7 +140,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
|
|
||||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
return [url_m.group(1)]
|
return [unescapeHTML(url_m.group(1))]
|
||||||
|
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'''(?sx)<object
|
r'''(?sx)<object
|
||||||
|
@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
IE_DESC = 'Channel 9'
|
IE_DESC = 'Channel 9'
|
||||||
IE_NAME = 'channel9'
|
IE_NAME = 'channel9'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||||
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
|
|
||||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||||
'duration': 4576,
|
'duration': 4576,
|
||||||
@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
|
||||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||||
'duration': 1540,
|
'duration': 1540,
|
||||||
|
97
youtube_dl/extractor/chilloutzone.py
Normal file
97
youtube_dl/extractor/chilloutzone.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
ExtractorError
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChilloutzoneIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
|
||||||
|
'md5': 'a76f3457e813ea0037e5244f509e66d1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'enemene-meck-alle-katzen-weg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Enemene Meck - Alle Katzen weg',
|
||||||
|
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'Video hosted at YouTube',
|
||||||
|
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1YVQaAgHyRU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '16 Photos Taken 1 Second Before Disaster',
|
||||||
|
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
|
||||||
|
'uploader': 'BuzzFeedVideo',
|
||||||
|
'uploader_id': 'BuzzFeedVideo',
|
||||||
|
'upload_date': '20131105',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'Video hosted at Vimeo',
|
||||||
|
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
|
||||||
|
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85523671',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Sunday Times - Icons',
|
||||||
|
'description': 'md5:3e1c0dc6047498d6728dcdaad0891762',
|
||||||
|
'uploader': 'Us',
|
||||||
|
'uploader_id': 'usfilms',
|
||||||
|
'upload_date': '20140131'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
base64_video_info = self._html_search_regex(
|
||||||
|
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||||
|
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
|
||||||
|
video_info_dict = json.loads(decoded_video_info)
|
||||||
|
|
||||||
|
# get video information from dict
|
||||||
|
video_url = video_info_dict['mediaUrl']
|
||||||
|
description = clean_html(video_info_dict.get('description'))
|
||||||
|
title = video_info_dict['title']
|
||||||
|
native_platform = video_info_dict['nativePlatform']
|
||||||
|
native_video_id = video_info_dict['nativeVideoId']
|
||||||
|
source_priority = video_info_dict['sourcePriority']
|
||||||
|
|
||||||
|
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||||
|
if native_platform is None:
|
||||||
|
youtube_url = self._html_search_regex(
|
||||||
|
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||||
|
webpage, 'fallback video URL', default=None)
|
||||||
|
if youtube_url is not None:
|
||||||
|
return self.url_result(youtube_url, ie='Youtube')
|
||||||
|
|
||||||
|
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||||
|
# the own CDN
|
||||||
|
if source_priority == 'native':
|
||||||
|
if native_platform == 'youtube':
|
||||||
|
return self.url_result(native_video_id, ie='Youtube')
|
||||||
|
if native_platform == 'vimeo':
|
||||||
|
return self.url_result(
|
||||||
|
'http://vimeo.com/' + native_video_id, ie='Vimeo')
|
||||||
|
|
||||||
|
if not video_url:
|
||||||
|
raise ExtractorError('No video found')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
url_basename,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -98,3 +99,28 @@ class CNNIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CNNBlogsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
|
||||||
|
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Criminalizing journalism?',
|
||||||
|
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||||
|
'upload_date': '20140209',
|
||||||
|
},
|
||||||
|
'add_ie': ['CNN'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
webpage = self._download_webpage(url, url_basename(url))
|
||||||
|
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': cnn_url,
|
||||||
|
'ie_key': CNNIE.ie_key(),
|
||||||
|
}
|
||||||
|
@ -4,6 +4,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class CollegeHumorIE(InfoExtractor):
|
class CollegeHumorIE(InfoExtractor):
|
||||||
@ -11,22 +12,25 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||||
'file': '6902724.mp4',
|
|
||||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '6902724',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Comic-Con Cosplay Catastrophe',
|
'title': 'Comic-Con Cosplay Catastrophe',
|
||||||
'description': 'Fans get creative this year at San Diego. Too',
|
'description': 'Fans get creative this year',
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||||
'file': '3505939.mp4',
|
|
||||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3505939',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Font Conference',
|
'title': 'Font Conference',
|
||||||
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
'description': 'This video wasn\'t long enough,',
|
||||||
'age_limit': 10,
|
'age_limit': 10,
|
||||||
|
'duration': 179,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# embedded youtube video
|
# embedded youtube video
|
||||||
@ -38,7 +42,7 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||||
'uploader': 'Funnyplox TV',
|
'uploader': 'Funnyplox TV',
|
||||||
'uploader_id': 'funnyploxtv',
|
'uploader_id': 'funnyploxtv',
|
||||||
'description': 'md5:11812366244110c3523968aa74f02521',
|
'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
|
||||||
'upload_date': '20140128',
|
'upload_date': '20140128',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -82,6 +86,8 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(vdata.get('duration'), 1000)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': vdata['title'],
|
'title': vdata['title'],
|
||||||
@ -89,4 +95,5 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'thumbnail': vdata.get('thumbnail'),
|
'thumbnail': vdata.get('thumbnail'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
@ -271,8 +271,11 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_json(self, url_or_request, video_id,
|
def _download_json(self, url_or_request, video_id,
|
||||||
note=u'Downloading JSON metadata',
|
note=u'Downloading JSON metadata',
|
||||||
errnote=u'Unable to download JSON metadata'):
|
errnote=u'Unable to download JSON metadata',
|
||||||
|
transform_source=None):
|
||||||
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
if transform_source:
|
||||||
|
json_string = transform_source(json_string)
|
||||||
try:
|
try:
|
||||||
return json.loads(json_string)
|
return json.loads(json_string)
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
|
@ -1,41 +1,42 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class DotsubIE(InfoExtractor):
|
class DotsubIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?dotsub\.com/view/([^/]+)'
|
_VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||||
u'file': u'aed3b8b2-1889-4df5-ae63-ad85f5572f27.flv',
|
'md5': '0914d4d69605090f623b7ac329fea66e',
|
||||||
u'md5': u'0914d4d69605090f623b7ac329fea66e',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||||
u"title": u"Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary",
|
'ext': 'flv',
|
||||||
u"uploader": u"4v4l0n42",
|
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
||||||
u'description': u'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
'uploader': '4v4l0n42',
|
||||||
u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
||||||
u'upload_date': u'20101213',
|
'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||||
|
'upload_date': '20101213',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
info_url = "https://dotsub.com/api/media/%s/metadata" %(video_id)
|
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
|
||||||
webpage = self._download_webpage(info_url, video_id)
|
info = self._download_json(info_url, video_id)
|
||||||
info = json.loads(webpage)
|
|
||||||
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
|
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': info['mediaURI'],
|
'url': info['mediaURI'],
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'thumbnail': info['screenshotURI'],
|
'thumbnail': info['screenshotURI'],
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
'uploader': info['user'],
|
'uploader': info['user'],
|
||||||
'view_count': info['numberOfViews'],
|
'view_count': info['numberOfViews'],
|
||||||
'upload_date': u'%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
||||||
}]
|
}
|
||||||
|
@ -10,11 +10,12 @@ from .common import InfoExtractor
|
|||||||
class DropboxIE(InfoExtractor):
|
class DropboxIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
|
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.dropbox.com/s/mcnzehi9wo55th4/20131219_085616.mp4',
|
'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4',
|
||||||
'file': 'mcnzehi9wo55th4.mp4',
|
'md5': '8ae17c51172fb7f93bdd6a214cc8c896',
|
||||||
'md5': 'f6d65b1b326e82fd7ab7720bea3dacae',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': '20131219_085616'
|
'id': '0qr9sai2veej4f8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'THE_DOCTOR_GAMES'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
58
youtube_dl/extractor/elpais.py
Normal file
58
youtube_dl/extractor/elpais.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class ElPaisIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
|
||||||
|
IE_DESC = 'El País'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
|
||||||
|
'md5': '98406f301f19562170ec071b83433d55',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'tiempo-nuevo-recetas-viejas',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tiempo nuevo, recetas viejas',
|
||||||
|
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
|
||||||
|
'upload_date': '20140206',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
prefix = self._html_search_regex(
|
||||||
|
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
|
||||||
|
video_suffix = self._search_regex(
|
||||||
|
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
|
||||||
|
video_url = prefix + video_suffix
|
||||||
|
thumbnail_suffix = self._search_regex(
|
||||||
|
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
|
||||||
|
fatal=False)
|
||||||
|
thumbnail = (
|
||||||
|
None if thumbnail_suffix is None
|
||||||
|
else prefix + thumbnail_suffix)
|
||||||
|
title = self._html_search_regex(
|
||||||
|
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
|
||||||
|
webpage, 'title')
|
||||||
|
date_str = self._search_regex(
|
||||||
|
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
|
||||||
|
webpage, 'upload date', fatal=False)
|
||||||
|
upload_date = (None if date_str is None else unified_strdate(date_str))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
@ -1,9 +1,9 @@
|
|||||||
import json
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -11,70 +11,68 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class EscapistIE(InfoExtractor):
|
class EscapistIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||||
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||||
u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '6618',
|
||||||
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
'ext': 'mp4',
|
||||||
u"uploader": u"the-escapist-presents",
|
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||||
u"title": u"Breaking Down Baldur's Gate"
|
'uploader': 'the-escapist-presents',
|
||||||
|
'title': "Breaking Down Baldur's Gate",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
showName = mobj.group('showname')
|
showName = mobj.group('showname')
|
||||||
videoId = mobj.group('episode')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
self.report_extraction(videoId)
|
self.report_extraction(video_id)
|
||||||
webpage = self._download_webpage(url, videoId)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
videoDesc = self._html_search_regex(
|
videoDesc = self._html_search_regex(
|
||||||
r'<meta name="description" content="([^"]*)"',
|
r'<meta name="description" content="([^"]*)"',
|
||||||
webpage, u'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<meta name="title" content="([^"]*)"',
|
r'<meta name="title" content="([^"]*)"',
|
||||||
webpage, u'title').split(' : ')[-1]
|
webpage, 'title').split(' : ')[-1]
|
||||||
|
|
||||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
|
configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
|
||||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
configUrl = compat_urllib_parse.unquote(configUrl)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
def _add_format(name, cfgurl):
|
def _add_format(name, cfgurl, quality):
|
||||||
configJSON = self._download_webpage(
|
config = self._download_json(
|
||||||
cfgurl, videoId,
|
cfgurl, video_id,
|
||||||
u'Downloading ' + name + ' configuration',
|
'Downloading ' + name + ' configuration',
|
||||||
u'Unable to download ' + name + ' configuration')
|
'Unable to download ' + name + ' configuration',
|
||||||
|
transform_source=lambda s: s.replace("'", '"'))
|
||||||
|
|
||||||
# Technically, it's JavaScript, not JSON
|
|
||||||
configJSON = configJSON.replace("'", '"')
|
|
||||||
|
|
||||||
try:
|
|
||||||
config = json.loads(configJSON)
|
|
||||||
except (ValueError,) as err:
|
|
||||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
|
||||||
playlist = config['playlist']
|
playlist = config['playlist']
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': playlist[1]['url'],
|
'url': playlist[1]['url'],
|
||||||
'format_id': name,
|
'format_id': name,
|
||||||
|
'quality': quality,
|
||||||
})
|
})
|
||||||
|
|
||||||
_add_format(u'normal', configUrl)
|
_add_format('normal', configUrl, quality=0)
|
||||||
hq_url = (configUrl +
|
hq_url = (configUrl +
|
||||||
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
||||||
try:
|
try:
|
||||||
_add_format(u'hq', hq_url)
|
_add_format('hq', hq_url, quality=1)
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
pass # That's fine, we'll just use normal quality
|
pass # That's fine, we'll just use normal quality
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': videoId,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': showName,
|
'uploader': showName,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -1,56 +1,58 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class ExfmIE(InfoExtractor):
|
class ExfmIE(InfoExtractor):
|
||||||
IE_NAME = u'exfm'
|
IE_NAME = 'exfm'
|
||||||
IE_DESC = u'ex.fm'
|
IE_DESC = 'ex.fm'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
_VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
|
||||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
u'url': u'http://ex.fm/song/eh359',
|
'url': 'http://ex.fm/song/eh359',
|
||||||
u'file': u'44216187.mp3',
|
'md5': 'e45513df5631e6d760970b14cc0c11e7',
|
||||||
u'md5': u'e45513df5631e6d760970b14cc0c11e7',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '44216187',
|
||||||
u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive",
|
'ext': 'mp3',
|
||||||
u"uploader": u"deadjournalist",
|
'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive',
|
||||||
u'upload_date': u'20120424',
|
'uploader': 'deadjournalist',
|
||||||
u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
'upload_date': '20120424',
|
||||||
|
'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||||
},
|
},
|
||||||
u'note': u'Soundcloud song',
|
'note': 'Soundcloud song',
|
||||||
u'skip': u'The site is down too often',
|
'skip': 'The site is down too often',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://ex.fm/song/wddt8',
|
'url': 'http://ex.fm/song/wddt8',
|
||||||
u'file': u'wddt8.mp3',
|
'md5': '966bd70741ac5b8570d8e45bfaed3643',
|
||||||
u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'wddt8',
|
||||||
u'title': u'Safe and Sound',
|
'ext': 'mp3',
|
||||||
u'uploader': u'Capital Cities',
|
'title': 'Safe and Sound',
|
||||||
|
'uploader': 'Capital Cities',
|
||||||
},
|
},
|
||||||
u'skip': u'The site is down too often',
|
'skip': 'The site is down too often',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
song_id = mobj.group(1)
|
song_id = mobj.group('id')
|
||||||
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
|
info_url = "http://ex.fm/api/v3/song/%s" % song_id
|
||||||
webpage = self._download_webpage(info_url, song_id)
|
info = self._download_json(info_url, song_id)['song']
|
||||||
info = json.loads(webpage)
|
song_url = info['url']
|
||||||
song_url = info['song']['url']
|
|
||||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||||
self.to_screen('Soundcloud song detected')
|
self.to_screen('Soundcloud song detected')
|
||||||
return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
|
return self.url_result(song_url.replace('/stream', ''), 'Soundcloud')
|
||||||
return [{
|
return {
|
||||||
'id': song_id,
|
'id': song_id,
|
||||||
'url': song_url,
|
'url': song_url,
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': info['song']['title'],
|
'title': info['title'],
|
||||||
'thumbnail': info['song']['image']['large'],
|
'thumbnail': info['image']['large'],
|
||||||
'uploader': info['song']['artist'],
|
'uploader': info['artist'],
|
||||||
'view_count': info['song']['loved_count'],
|
'view_count': info['loved_count'],
|
||||||
}]
|
}
|
||||||
|
38
youtube_dl/extractor/firstpost.py
Normal file
38
youtube_dl/extractor/firstpost.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class FirstpostIE(InfoExtractor):
|
||||||
|
IE_NAME = 'Firstpost.com'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
|
||||||
|
'md5': 'ee9114957692f01fb1263ed87039112a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1025403',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
|
||||||
|
'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<div.*?name="div_video".*?flashvars="([^"]+)">',
|
||||||
|
webpage, 'video URL')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
}
|
60
youtube_dl/extractor/firsttv.py
Normal file
60
youtube_dl/extractor/firsttv.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class FirstTVIE(InfoExtractor):
|
||||||
|
IE_NAME = 'firsttv'
|
||||||
|
IE_DESC = 'Видеоархив - Первый канал'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.1tv.ru/videoarchive/73390',
|
||||||
|
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '73390',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Олимпийские канатные дороги',
|
||||||
|
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
|
||||||
|
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
|
||||||
|
'duration': 149,
|
||||||
|
},
|
||||||
|
'skip': 'Only works from Russia',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
|
||||||
|
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
||||||
|
|
||||||
|
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||||
|
webpage, 'like count', fatal=False)
|
||||||
|
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||||
|
webpage, 'dislike count', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': int_or_none(duration),
|
||||||
|
'like_count': int_or_none(like_count),
|
||||||
|
'dislike_count': int_or_none(dislike_count),
|
||||||
|
}
|
@ -1,18 +1,21 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
|
||||||
|
|
||||||
class FreesoundIE(InfoExtractor):
|
class FreesoundIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.freesound.org/people/miklovan/sounds/194503/',
|
'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
|
||||||
u'file': u'194503.mp3',
|
'md5': '12280ceb42c81f19a515c745eae07650',
|
||||||
u'md5': u'12280ceb42c81f19a515c745eae07650',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '194503',
|
||||||
u"title": u"gulls in the city.wav",
|
'ext': 'mp3',
|
||||||
u"uploader" : u"miklovan",
|
'title': 'gulls in the city.wav',
|
||||||
u'description': u'the sounds of seagulls in the city',
|
'uploader': 'miklovan',
|
||||||
|
'description': 'the sounds of seagulls in the city',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -20,17 +23,17 @@ class FreesoundIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
music_id = mobj.group('id')
|
music_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, music_id)
|
webpage = self._download_webpage(url, music_id)
|
||||||
title = self._html_search_regex(r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
title = self._html_search_regex(
|
||||||
webpage, 'music title', flags=re.DOTALL)
|
r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
||||||
music_url = self._og_search_property('audio', webpage, 'music url')
|
webpage, 'music title', flags=re.DOTALL)
|
||||||
description = self._html_search_regex(r'<div id="sound_description">(.*?)</div>',
|
description = self._html_search_regex(
|
||||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
r'<div id="sound_description">(.*?)</div>', webpage, 'description',
|
||||||
|
fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': music_id,
|
'id': music_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': music_url,
|
'url': self._og_search_property('audio', webpage, 'music url'),
|
||||||
'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'),
|
'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'),
|
||||||
'ext': determine_ext(music_url),
|
|
||||||
'description': description,
|
'description': description,
|
||||||
}]
|
}
|
||||||
|
@ -7,10 +7,11 @@ class GametrailersIE(MTVServicesInfoExtractor):
|
|||||||
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
'url': 'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
||||||
'file': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
|
|
||||||
'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7',
|
'md5': '4c8e67681a0ea7ec241e8c09b3ea8cf7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Mirror\'s Edge 2|E3 2013: Debut Trailer',
|
'id': '70e9a5d7-cf25-4a10-9104-6f3e7342ae0d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'E3 2013: Debut Trailer',
|
||||||
'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
|
'description': 'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
@ -10,32 +11,28 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GooglePlusIE(InfoExtractor):
|
class GooglePlusIE(InfoExtractor):
|
||||||
IE_DESC = u'Google Plus'
|
IE_DESC = 'Google Plus'
|
||||||
_VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
|
_VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
||||||
IE_NAME = u'plus.google'
|
IE_NAME = 'plus.google'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
|
'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
|
||||||
u"file": u"ZButuJc6CtH.flv",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': 'ZButuJc6CtH',
|
||||||
u"upload_date": u"20120613",
|
'ext': 'flv',
|
||||||
u"uploader": u"井上ヨシマサ",
|
'upload_date': '20120613',
|
||||||
u"title": u"嘆きの天使 降臨"
|
'uploader': '井上ヨシマサ',
|
||||||
|
'title': '嘆きの天使 降臨',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract id from URL
|
# Extract id from URL
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
post_url = mobj.group(0)
|
video_id = mobj.group('id')
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
video_extension = 'flv'
|
|
||||||
|
|
||||||
# Step 1, Retrieve post webpage to extract further information
|
# Step 1, Retrieve post webpage to extract further information
|
||||||
webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
|
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
@ -43,7 +40,7 @@ class GooglePlusIE(InfoExtractor):
|
|||||||
upload_date = self._html_search_regex(
|
upload_date = self._html_search_regex(
|
||||||
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
||||||
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
||||||
webpage, u'upload date', fatal=False, flags=re.VERBOSE)
|
webpage, 'upload date', fatal=False, flags=re.VERBOSE)
|
||||||
if upload_date:
|
if upload_date:
|
||||||
# Convert timestring to a format suitable for filename
|
# Convert timestring to a format suitable for filename
|
||||||
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
|
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
|
||||||
@ -51,28 +48,27 @@ class GooglePlusIE(InfoExtractor):
|
|||||||
|
|
||||||
# Extract uploader
|
# Extract uploader
|
||||||
uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
|
uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
|
||||||
webpage, u'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
# Extract title
|
# Extract title
|
||||||
# Get the first line for title
|
# Get the first line for title
|
||||||
video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
|
video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
|
||||||
webpage, 'title', default=u'NA')
|
webpage, 'title', default='NA')
|
||||||
|
|
||||||
# Step 2, Simulate clicking the image box to launch video
|
# Step 2, Simulate clicking the image box to launch video
|
||||||
DOMAIN = 'https://plus.google.com/'
|
DOMAIN = 'https://plus.google.com/'
|
||||||
video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
||||||
webpage, u'video page URL')
|
webpage, 'video page URL')
|
||||||
if not video_page.startswith(DOMAIN):
|
if not video_page.startswith(DOMAIN):
|
||||||
video_page = DOMAIN + video_page
|
video_page = DOMAIN + video_page
|
||||||
|
|
||||||
webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
|
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
|
||||||
|
|
||||||
# Extract video links on video page
|
# Extract video links all sizes
|
||||||
"""Extract video links of all sizes"""
|
|
||||||
pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
|
pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
|
||||||
mobj = re.findall(pattern, webpage)
|
mobj = re.findall(pattern, webpage)
|
||||||
if len(mobj) == 0:
|
if len(mobj) == 0:
|
||||||
raise ExtractorError(u'Unable to extract video links')
|
raise ExtractorError('Unable to extract video links')
|
||||||
|
|
||||||
# Sort in resolution
|
# Sort in resolution
|
||||||
links = sorted(mobj)
|
links = sorted(mobj)
|
||||||
@ -87,12 +83,11 @@ class GooglePlusIE(InfoExtractor):
|
|||||||
except AttributeError: # Python 3
|
except AttributeError: # Python 3
|
||||||
video_url = bytes(video_url, 'ascii').decode('unicode-escape')
|
video_url = bytes(video_url, 'ascii').decode('unicode-escape')
|
||||||
|
|
||||||
|
return {
|
||||||
return [{
|
'id': video_id,
|
||||||
'id': video_id,
|
'url': video_url,
|
||||||
'url': video_url,
|
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension,
|
'ext': 'flv',
|
||||||
}]
|
}
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -8,32 +10,42 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GoogleSearchIE(SearchInfoExtractor):
|
class GoogleSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'Google Video search'
|
IE_DESC = 'Google Video search'
|
||||||
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
|
|
||||||
_MAX_RESULTS = 1000
|
_MAX_RESULTS = 1000
|
||||||
IE_NAME = u'video.google:search'
|
IE_NAME = 'video.google:search'
|
||||||
_SEARCH_KEY = 'gvsearch'
|
_SEARCH_KEY = 'gvsearch'
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
|
|
||||||
|
entries = []
|
||||||
res = {
|
res = {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': query,
|
'id': query,
|
||||||
'entries': []
|
'title': query,
|
||||||
}
|
}
|
||||||
|
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count():
|
||||||
result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
|
result_url = (
|
||||||
webpage = self._download_webpage(result_url, u'gvsearch:' + query,
|
'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
|
||||||
note='Downloading result page ' + str(pagenum))
|
% (compat_urllib_parse.quote_plus(query), pagenum * 10))
|
||||||
|
|
||||||
for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
|
webpage = self._download_webpage(
|
||||||
e = {
|
result_url, 'gvsearch:' + query,
|
||||||
|
note='Downloading result page ' + str(pagenum + 1))
|
||||||
|
|
||||||
|
for hit_idx, mobj in enumerate(re.finditer(
|
||||||
|
r'<h3 class="r"><a href="([^"]+)"', webpage)):
|
||||||
|
|
||||||
|
# Skip playlists
|
||||||
|
if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries.append({
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': mobj.group(1)
|
'url': mobj.group(1)
|
||||||
}
|
})
|
||||||
res['entries'].append(e)
|
|
||||||
|
|
||||||
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
|
if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
|
||||||
|
res['entries'] = entries[:n]
|
||||||
return res
|
return res
|
||||||
|
@ -1,17 +1,20 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class HowcastIE(InfoExtractor):
|
class HowcastIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||||
u'file': u'390161.mp4',
|
'md5': '8b743df908c42f60cf6496586c7f12c3',
|
||||||
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '390161',
|
||||||
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
'ext': 'mp4',
|
||||||
u"title": u"How to Tie a Square Knot Properly"
|
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
|
||||||
|
'title': 'How to Tie a Square Knot Properly',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,22 +27,15 @@ class HowcastIE(InfoExtractor):
|
|||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
||||||
webpage, u'video URL')
|
webpage, 'video URL')
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
|
|
||||||
webpage, u'title')
|
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
||||||
webpage, u'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
|
return {
|
||||||
webpage, u'thumbnail', fatal=False)
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
return [{
|
'title': self._og_search_title(webpage),
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
}]
|
}
|
||||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class InaIE(InfoExtractor):
|
class InaIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
|
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||||
|
@ -1,35 +1,39 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class InstagramIE(InfoExtractor):
|
class InstagramIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
|
_VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||||
u'file': u'aye83DjauH.mp4',
|
'md5': '0d2da106a9d2631273e192b372806516',
|
||||||
u'md5': u'0d2da106a9d2631273e192b372806516',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'aye83DjauH',
|
||||||
u"uploader_id": u"naomipq",
|
'ext': 'mp4',
|
||||||
u"title": u"Video by naomipq",
|
'uploader_id': 'naomipq',
|
||||||
u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
'title': 'Video by naomipq',
|
||||||
|
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||||
webpage, u'uploader id', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
|
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
|
||||||
fatal=False)
|
fatal=False)
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': self._og_search_video_url(webpage, secure=False),
|
'url': self._og_search_video_url(webpage, secure=False),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': u'Video by %s' % uploader_id,
|
'title': 'Video by %s' % uploader_id,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id' : uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'description': desc,
|
'description': desc,
|
||||||
}]
|
}
|
||||||
|
@ -14,15 +14,16 @@ from ..utils import (
|
|||||||
class IviIE(InfoExtractor):
|
class IviIE(InfoExtractor):
|
||||||
IE_DESC = 'ivi.ru'
|
IE_DESC = 'ivi.ru'
|
||||||
IE_NAME = 'ivi'
|
IE_NAME = 'ivi'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Single movie
|
# Single movie
|
||||||
{
|
{
|
||||||
'url': 'http://www.ivi.ru/watch/53141',
|
'url': 'http://www.ivi.ru/watch/53141',
|
||||||
'file': '53141.mp4',
|
|
||||||
'md5': '6ff5be2254e796ed346251d117196cf4',
|
'md5': '6ff5be2254e796ed346251d117196cf4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '53141',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Иван Васильевич меняет профессию',
|
'title': 'Иван Васильевич меняет профессию',
|
||||||
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
||||||
'duration': 5498,
|
'duration': 5498,
|
||||||
@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
|
|||||||
# Serial's serie
|
# Serial's serie
|
||||||
{
|
{
|
||||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||||
'file': '74791.mp4',
|
|
||||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '74791',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Дежурный ангел - 1 серия',
|
'title': 'Дежурный ангел - 1 серия',
|
||||||
'duration': 2490,
|
'duration': 2490,
|
||||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||||
@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
|
|||||||
class IviCompilationIE(InfoExtractor):
|
class IviCompilationIE(InfoExtractor):
|
||||||
IE_DESC = 'ivi.ru compilations'
|
IE_DESC = 'ivi.ru compilations'
|
||||||
IE_NAME = 'ivi:compilation'
|
IE_NAME = 'ivi:compilation'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||||
|
|
||||||
def _extract_entries(self, html, compilation_id):
|
def _extract_entries(self, html, compilation_id):
|
||||||
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
||||||
|
48
youtube_dl/extractor/jadorecettepub.py
Normal file
48
youtube_dl/extractor/jadorecettepub.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
|
class JadoreCettePubIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
|
||||||
|
'md5': '401286a06067c70b44076044b66515de',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jLMja3tr7a4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La pire utilisation de Star Wars',
|
||||||
|
'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
|
||||||
|
webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
|
||||||
|
fatal=False)
|
||||||
|
real_url = self._search_regex(
|
||||||
|
r'\[/postlink\](.*)endofvid', webpage, 'video URL')
|
||||||
|
video_id = YoutubeIE.extract_id(real_url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': real_url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||||
u'file': u'5182.mp4',
|
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||||
u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '5182',
|
||||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
'ext': 'mp4',
|
||||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||||
|
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
xml_link = self._html_search_regex(
|
xml_link = self._html_search_regex(
|
||||||
r'<param name="flashvars" value="config=(.*?)" />',
|
r'<param name="flashvars" value="config=(.*?)" />',
|
||||||
webpage, u'config URL')
|
webpage, 'config URL')
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||||
xml_link, u'video ID')
|
xml_link, 'video ID')
|
||||||
|
|
||||||
config = self._download_xml(
|
config = self._download_xml(
|
||||||
xml_link, title, u'Downloading XML config')
|
xml_link, title, 'Downloading XML config')
|
||||||
info_json = config.find('format.json').text
|
info_json = config.find('format.json').text
|
||||||
info = json.loads(info_json)['versions'][0]
|
info = json.loads(info_json)['versions'][0]
|
||||||
|
|
||||||
|
66
youtube_dl/extractor/kontrtube.py
Normal file
66
youtube_dl/extractor/kontrtube.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class KontrTubeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'kontrtube'
|
||||||
|
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||||
|
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2678',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||||
|
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||||
|
'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
|
||||||
|
'duration': 270,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||||
|
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||||
|
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
||||||
|
'video title')
|
||||||
|
description = self._html_search_meta('description', webpage, 'video description')
|
||||||
|
|
||||||
|
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||||
|
webpage)
|
||||||
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
||||||
|
'view count', fatal=False)
|
||||||
|
view_count = int(view_count) if view_count is not None else None
|
||||||
|
|
||||||
|
comment_count = None
|
||||||
|
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
||||||
|
fatal=False)
|
||||||
|
if comment_str.startswith('комментариев нет'):
|
||||||
|
comment_count = 0
|
||||||
|
else:
|
||||||
|
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||||
|
if mobj:
|
||||||
|
comment_count = int(mobj.group('total'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
@ -4,19 +4,23 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class LifeNewsIE(InfoExtractor):
|
class LifeNewsIE(InfoExtractor):
|
||||||
IE_NAME = 'lifenews'
|
IE_NAME = 'lifenews'
|
||||||
IE_DESC = 'LIFE | NEWS'
|
IE_DESC = 'LIFE | NEWS'
|
||||||
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://lifenews.ru/news/126342',
|
'url': 'http://lifenews.ru/news/126342',
|
||||||
'file': '126342.mp4',
|
|
||||||
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '126342',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||||
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||||
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
||||||
@ -32,7 +36,7 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||||
|
|
||||||
@ -44,12 +48,14 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
view_count = self._html_search_regex(
|
view_count = self._html_search_regex(
|
||||||
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
|
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
|
||||||
comment_count = self._html_search_regex(
|
comment_count = self._html_search_regex(
|
||||||
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
|
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count', fatal=False)
|
||||||
|
|
||||||
upload_date = self._html_search_regex(
|
upload_date = self._html_search_regex(
|
||||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
|
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
|
||||||
|
if upload_date is not None:
|
||||||
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -57,7 +63,7 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'view_count': view_count,
|
'view_count': int_or_none(view_count),
|
||||||
'comment_count': comment_count,
|
'comment_count': int_or_none(comment_count),
|
||||||
'upload_date': unified_strdate(upload_date),
|
'upload_date': upload_date,
|
||||||
}
|
}
|
56
youtube_dl/extractor/m6.py
Normal file
56
youtube_dl/extractor/m6.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class M6IE(InfoExtractor):
|
||||||
|
IE_NAME = 'm6'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',
|
||||||
|
'md5': '242994a87de2c316891428e0176bcb77',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11323908',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »',
|
||||||
|
'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2',
|
||||||
|
'duration': 100,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
|
||||||
|
'Downloading video RSS')
|
||||||
|
|
||||||
|
title = rss.find('./channel/item/title').text
|
||||||
|
description = rss.find('./channel/item/description').text
|
||||||
|
thumbnail = rss.find('./channel/item/visuel_clip_big').text
|
||||||
|
duration = int(rss.find('./channel/item/duration').text)
|
||||||
|
view_count = int(rss.find('./channel/item/nombre_vues').text)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['lq', 'sd', 'hq', 'hd']:
|
||||||
|
video_url = rss.find('./channel/item/url_video_%s' % format_id)
|
||||||
|
if video_url is None:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': video_url.text,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -61,7 +61,7 @@ class MooshareIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
|
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||||
@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
@ -86,6 +86,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||||
if title_el is None:
|
if title_el is None:
|
||||||
title_el = itemdoc.find('.//title')
|
title_el = itemdoc.find('.//title')
|
||||||
|
if title_el.text is None:
|
||||||
|
title_el = None
|
||||||
|
|
||||||
title = title_el.text
|
title = title_el.text
|
||||||
if title is None:
|
if title is None:
|
||||||
raise ExtractorError('Could not find video title')
|
raise ExtractorError('Could not find video title')
|
||||||
|
89
youtube_dl/extractor/ndr.py
Normal file
89
youtube_dl/extractor/ndr.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class NDRIE(InfoExtractor):
|
||||||
|
IE_NAME = 'ndr'
|
||||||
|
IE_DESC = 'NDR.de - Mediathek'
|
||||||
|
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html',
|
||||||
|
'md5': 'e7a6079ca39d3568f4996cb858dd6708',
|
||||||
|
'note': 'Video file',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7959',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Markt - die ganze Sendung',
|
||||||
|
'description': 'md5:af9179cf07f67c5c12dc6d9997e05725',
|
||||||
|
'duration': 2655,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ndr.de/903/audio191719.html',
|
||||||
|
'md5': '41ed601768534dd18a9ae34d84798129',
|
||||||
|
'note': 'Audio file',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '191719',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '"Es war schockierend"',
|
||||||
|
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
|
||||||
|
'duration': 112,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
title = self._og_search_title(page)
|
||||||
|
description = self._og_search_description(page)
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
|
||||||
|
page)
|
||||||
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||||
|
if mp3_url:
|
||||||
|
formats.append({
|
||||||
|
'url': mp3_url.group('audio'),
|
||||||
|
'format_id': 'mp3',
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
|
||||||
|
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||||
|
if video_url:
|
||||||
|
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
|
||||||
|
page, 'thumbnail', fatal=False)
|
||||||
|
if thumbnail:
|
||||||
|
thumbnail = 'http://www.ndr.de' + thumbnail
|
||||||
|
for format_id in ['lo', 'hi', 'hq']:
|
||||||
|
formats.append({
|
||||||
|
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
93
youtube_dl/extractor/nfb.py
Normal file
93
youtube_dl/extractor/nfb.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NFBIE(InfoExtractor):
|
||||||
|
IE_NAME = 'nfb'
|
||||||
|
IE_DESC = 'National Film Board of Canada'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'qallunaat_why_white_people_are_funny',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Qallunaat! Why White People Are Funny ',
|
||||||
|
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
|
||||||
|
'duration': 3128,
|
||||||
|
'uploader': 'Mark Sandiford',
|
||||||
|
'uploader_id': 'mark-sandiford',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
|
||||||
|
|
||||||
|
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||||
|
page, 'director id', fatal=False)
|
||||||
|
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||||
|
page, 'director name', fatal=False)
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||||
|
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||||
|
|
||||||
|
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||||
|
|
||||||
|
title = None
|
||||||
|
description = None
|
||||||
|
thumbnail = None
|
||||||
|
duration = None
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
def extract_thumbnail(media):
|
||||||
|
thumbnails = {}
|
||||||
|
for asset in media.findall('assets/asset'):
|
||||||
|
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||||
|
if not thumbnails:
|
||||||
|
return None
|
||||||
|
if 'high' in thumbnails:
|
||||||
|
return thumbnails['high']
|
||||||
|
return list(thumbnails.values())[0]
|
||||||
|
|
||||||
|
for media in config.findall('./player/stream/media'):
|
||||||
|
if media.get('type') == 'posterImage':
|
||||||
|
thumbnail = extract_thumbnail(media)
|
||||||
|
elif media.get('type') == 'video':
|
||||||
|
duration = int(media.get('duration'))
|
||||||
|
title = media.find('title').text
|
||||||
|
description = media.find('description').text
|
||||||
|
# It seems assets always go from lower to better quality, so no need to sort
|
||||||
|
formats = [{
|
||||||
|
'url': x.find('default/streamerURI').text + '/',
|
||||||
|
'play_path': x.find('default/url').text,
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': x.get('quality'),
|
||||||
|
} for x in media.findall('assets/asset')]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,7 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -10,7 +9,7 @@ class PBSIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?:
|
(?:
|
||||||
# Direct video URL
|
# Direct video URL
|
||||||
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
|
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||||
# Article with embedded player
|
# Article with embedded player
|
||||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||||
# Player
|
# Player
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||||
u'file': u'25665706.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '25665706',
|
||||||
u'title': u'Managing Scale and Complexity',
|
'ext': 'mp4',
|
||||||
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
|
'title': 'Managing Scale and Complexity',
|
||||||
|
'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
slideshare_obj = self._search_regex(
|
slideshare_obj = self._search_regex(
|
||||||
r'var slideshare_object = ({.*?}); var user_info =',
|
r'var slideshare_object = ({.*?}); var user_info =',
|
||||||
webpage, u'slideshare object')
|
webpage, 'slideshare object')
|
||||||
info = json.loads(slideshare_obj)
|
info = json.loads(slideshare_obj)
|
||||||
if info['slideshow']['type'] != u'video':
|
if info['slideshow']['type'] != 'video':
|
||||||
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||||
|
|
||||||
doc = info['doc']
|
doc = info['doc']
|
||||||
bucket = info['jsplayer']['video_bucket']
|
bucket = info['jsplayer']['video_bucket']
|
||||||
ext = info['jsplayer']['video_extension']
|
ext = info['jsplayer']['video_extension']
|
||||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor):
|
|||||||
'ext': ext,
|
'ext': ext,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': info['slideshow']['pin_image_url'],
|
'thumbnail': info['slideshow']['pin_image_url'],
|
||||||
'description': self._og_search_description(webpage),
|
'description': description,
|
||||||
}
|
}
|
||||||
|
67
youtube_dl/extractor/streamcz.py
Normal file
67
youtube_dl/extractor/streamcz.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class StreamCZIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||||
|
'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '765767',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Peklo na talíři: Éčka pro děti',
|
||||||
|
'description': 'md5:49ace0df986e95e331d0fe239d421519',
|
||||||
|
'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
|
||||||
|
'duration': 256,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
data = self._html_search_regex(r'Stream\.Data\.Episode\((.+?)\);', webpage, 'stream data')
|
||||||
|
|
||||||
|
jsonData = json.loads(data)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in jsonData['instances']:
|
||||||
|
for video_format in video['instances']:
|
||||||
|
format_id = video_format['quality']
|
||||||
|
|
||||||
|
if format_id == '240p':
|
||||||
|
quality = 0
|
||||||
|
elif format_id == '360p':
|
||||||
|
quality = 1
|
||||||
|
elif format_id == '480p':
|
||||||
|
quality = 2
|
||||||
|
elif format_id == '720p':
|
||||||
|
quality = 3
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%s-%s' % (video_format['type'].split('/')[1], format_id),
|
||||||
|
'url': video_format['source'],
|
||||||
|
'quality': quality,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': str(jsonData['id']),
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'duration': int_or_none(jsonData['duration']),
|
||||||
|
'view_count': int_or_none(jsonData['stats_total']),
|
||||||
|
}
|
@ -1,22 +1,23 @@
|
|||||||
#coding: utf-8
|
#coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import determine_ext
|
||||||
determine_ext,
|
|
||||||
)
|
|
||||||
|
|
||||||
class ThisAVIE(InfoExtractor):
|
class ThisAVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
|
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
|
||||||
u"file": u"47734.flv",
|
'md5': '0480f1ef3932d901f0e0e719f188f19b',
|
||||||
u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '47734',
|
||||||
u"title": u"高樹マリア - Just fit",
|
'ext': 'flv',
|
||||||
u"uploader": u"dj7970",
|
'title': '高樹マリア - Just fit',
|
||||||
u"uploader_id": u"dj7970"
|
'uploader': 'dj7970',
|
||||||
|
'uploader_id': 'dj7970'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
|
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r"addVariable\('file','([^']+)'\);", webpage, u'video url')
|
r"addVariable\('file','([^']+)'\);", webpage, 'video url')
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||||
webpage, u'uploader name', fatal=False)
|
webpage, 'uploader name', fatal=False)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||||
webpage, u'uploader id', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -9,25 +11,25 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class TouTvIE(InfoExtractor):
|
class TouTvIE(InfoExtractor):
|
||||||
IE_NAME = u'tou.tv'
|
IE_NAME = 'tou.tv'
|
||||||
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.tou.tv/30-vies/S04E41',
|
'url': 'http://www.tou.tv/30-vies/S04E41',
|
||||||
u'file': u'30-vies_S04E41.mp4',
|
'file': '30-vies_S04E41.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'30 vies Saison 4 / Épisode 41',
|
'title': '30 vies Saison 4 / Épisode 41',
|
||||||
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
|
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||||
u'age_limit': 8,
|
'age_limit': 8,
|
||||||
u'uploader': u'Groupe des Nouveaux Médias',
|
'uploader': 'Groupe des Nouveaux Médias',
|
||||||
u'duration': 1296,
|
'duration': 1296,
|
||||||
u'upload_date': u'20131118',
|
'upload_date': '20131118',
|
||||||
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
u'skip_download': True, # Requires rtmpdump
|
'skip_download': True, # Requires rtmpdump
|
||||||
},
|
},
|
||||||
u'skip': 'Only available in Canada'
|
'skip': 'Only available in Canada'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -36,25 +38,25 @@ class TouTvIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
mediaId = self._search_regex(
|
mediaId = self._search_regex(
|
||||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
r'"idMedia":\s*"([^"]+)"', webpage, 'media ID')
|
||||||
|
|
||||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||||
streams_doc = self._download_xml(
|
streams_doc = self._download_xml(
|
||||||
streams_url, video_id, note=u'Downloading stream list')
|
streams_url, video_id, note='Downloading stream list')
|
||||||
|
|
||||||
video_url = next(n.text
|
video_url = next(n.text
|
||||||
for n in streams_doc.findall('.//choice/url')
|
for n in streams_doc.findall('.//choice/url')
|
||||||
if u'//ad.doubleclick' not in n.text)
|
if '//ad.doubleclick' not in n.text)
|
||||||
if video_url.endswith('/Unavailable.flv'):
|
if video_url.endswith('/Unavailable.flv'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
u'Access to this video is blocked from outside of Canada',
|
'Access to this video is blocked from outside of Canada',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
duration_str = self._html_search_meta(
|
duration_str = self._html_search_meta(
|
||||||
'video:duration', webpage, u'duration')
|
'video:duration', webpage, 'duration')
|
||||||
duration = int(duration_str) if duration_str else None
|
duration = int(duration_str) if duration_str else None
|
||||||
upload_date_str = self._html_search_meta(
|
upload_date_str = self._html_search_meta(
|
||||||
'video:release_date', webpage, u'upload date')
|
'video:release_date', webpage, 'upload date')
|
||||||
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -11,7 +11,7 @@ from ..aes import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class Tube8IE(InfoExtractor):
|
class Tube8IE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||||
u'file': u'229795.mp4',
|
u'file': u'229795.mp4',
|
||||||
|
170
youtube_dl/extractor/vesti.py
Normal file
170
youtube_dl/extractor/vesti.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VestiIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vesti'
|
||||||
|
IE_DESC = 'Вести.Ru'
|
||||||
|
_VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.vesti.ru/videos?vid=575582&cid=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '765035',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Вести.net: биткоины в России не являются законными',
|
||||||
|
'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b',
|
||||||
|
'duration': 302,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.vesti.ru/only_video.html?vid=576180',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '766048',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'США заморозило, Британию затопило',
|
||||||
|
'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1',
|
||||||
|
'duration': 87,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '766403',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы',
|
||||||
|
'description': 'md5:55805dfd35763a890ff50fa9e35e31b3',
|
||||||
|
'duration': 271,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '51499',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
|
||||||
|
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Translation has finished'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
mobj = re.search(r'<meta property="og:video" content=".+?\.swf\?v?id=(?P<id>\d+).*?" />', page)
|
||||||
|
if mobj:
|
||||||
|
video_type = 'video'
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
else:
|
||||||
|
mobj = re.search(
|
||||||
|
r'<div.+?id="current-video-holder".*?>\s*<iframe src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*"',
|
||||||
|
page)
|
||||||
|
|
||||||
|
if not mobj:
|
||||||
|
raise ExtractorError('No media found')
|
||||||
|
|
||||||
|
video_type = mobj.group('type')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
json_data = self._download_json(
|
||||||
|
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
||||||
|
video_id, 'Downloading JSON')
|
||||||
|
|
||||||
|
if json_data['errors']:
|
||||||
|
raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True)
|
||||||
|
|
||||||
|
playlist = json_data['data']['playlist']
|
||||||
|
medialist = playlist['medialist']
|
||||||
|
media = medialist[0]
|
||||||
|
|
||||||
|
if media['errors']:
|
||||||
|
raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True)
|
||||||
|
|
||||||
|
view_count = playlist.get('count_views')
|
||||||
|
priority_transport = playlist['priority_transport']
|
||||||
|
|
||||||
|
thumbnail = media['picture']
|
||||||
|
width = media['width']
|
||||||
|
height = media['height']
|
||||||
|
description = media['anons']
|
||||||
|
title = media['title']
|
||||||
|
duration = int_or_none(media.get('duration'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for transport, links in media['sources'].items():
|
||||||
|
for quality, url in links.items():
|
||||||
|
if transport == 'rtmp':
|
||||||
|
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
fmt = {
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'app': mobj.group('app'),
|
||||||
|
'page_url': 'http://player.rutv.ru',
|
||||||
|
'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
|
||||||
|
'rtmp_live': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
'vbr': int(quality),
|
||||||
|
}
|
||||||
|
elif transport == 'm3u8':
|
||||||
|
fmt = {
|
||||||
|
'url': url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
fmt = {
|
||||||
|
'url': url
|
||||||
|
}
|
||||||
|
fmt.update({
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'format_id': '%s-%s' % (transport, quality),
|
||||||
|
'preference': -1 if priority_transport == transport else -2,
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -10,14 +12,14 @@ from ..utils import (
|
|||||||
class XTubeIE(InfoExtractor):
|
class XTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
||||||
u'file': u'kVTUy_G222_.mp4',
|
'file': 'kVTUy_G222_.mp4',
|
||||||
u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab',
|
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"strange erotica",
|
"title": "strange erotica",
|
||||||
u"description": u"surreal gay themed erotica...almost an ET kind of thing",
|
"description": "surreal gay themed erotica...almost an ET kind of thing",
|
||||||
u"uploader": u"greenshowers",
|
"uploader": "greenshowers",
|
||||||
u"age_limit": 18,
|
"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30,10 +32,10 @@ class XTubeIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, u'title')
|
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, u'uploader', fatal=False)
|
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
||||||
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, u'description', fatal=False)
|
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False)
|
||||||
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/')
|
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
extension = os.path.splitext(path)[1][1:]
|
||||||
format = path.split('/')[5].split('_')[:2]
|
format = path.split('/')[5].split('_')[:2]
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
@ -12,25 +14,25 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class YahooIE(InfoExtractor):
|
class YahooIE(InfoExtractor):
|
||||||
IE_DESC = u'Yahoo screen'
|
IE_DESC = 'Yahoo screen'
|
||||||
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
|
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||||
u'file': u'214727115.mp4',
|
'file': '214727115.mp4',
|
||||||
u'md5': u'4962b075c08be8690a922ee026d05e69',
|
'md5': '4962b075c08be8690a922ee026d05e69',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
|
'title': 'Julian Smith & Travis Legg Watch Julian Smith',
|
||||||
u'description': u'Julian and Travis watch Julian Smith',
|
'description': 'Julian and Travis watch Julian Smith',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
|
||||||
u'file': u'103000935.mp4',
|
'file': '103000935.mp4',
|
||||||
u'md5': u'd6e6fc6e1313c608f316ddad7b82b306',
|
'md5': 'd6e6fc6e1313c608f316ddad7b82b306',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
|
'title': 'Codefellas - The Cougar Lies with Spanish Moss',
|
||||||
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -41,7 +43,7 @@ class YahooIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
items_json = self._search_regex(r'mediaItems: ({.*?})$',
|
items_json = self._search_regex(r'mediaItems: ({.*?})$',
|
||||||
webpage, u'items', flags=re.MULTILINE)
|
webpage, 'items', flags=re.MULTILINE)
|
||||||
items = json.loads(items_json)
|
items = json.loads(items_json)
|
||||||
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
info = items['mediaItems']['query']['results']['mediaObj'][0]
|
||||||
# The 'meta' field is not always in the video webpage, we request it
|
# The 'meta' field is not always in the video webpage, we request it
|
||||||
@ -60,7 +62,7 @@ class YahooIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
query_result_json = self._download_webpage(
|
query_result_json = self._download_webpage(
|
||||||
'http://video.query.yahoo.com/v1/public/yql?' + data,
|
'http://video.query.yahoo.com/v1/public/yql?' + data,
|
||||||
video_id, u'Downloading video info')
|
video_id, 'Downloading video info')
|
||||||
query_result = json.loads(query_result_json)
|
query_result = json.loads(query_result_json)
|
||||||
info = query_result['query']['results']['mediaObj'][0]
|
info = query_result['query']['results']['mediaObj'][0]
|
||||||
meta = info['meta']
|
meta = info['meta']
|
||||||
@ -103,13 +105,13 @@ class YahooNewsIE(YahooIE):
|
|||||||
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
|
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
|
||||||
u'md5': u'67010fdf3a08d290e060a4dd96baa07b',
|
'md5': '67010fdf3a08d290e060a4dd96baa07b',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'104538833',
|
'id': '104538833',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'China Moses Is Crazy About the Blues',
|
'title': 'China Moses Is Crazy About the Blues',
|
||||||
u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0',
|
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -120,14 +122,14 @@ class YahooNewsIE(YahooIE):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id')
|
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id')
|
||||||
return self._get_info(long_id, video_id)
|
return self._get_info(long_id, video_id)
|
||||||
|
|
||||||
|
|
||||||
class YahooSearchIE(SearchInfoExtractor):
|
class YahooSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'Yahoo screen search'
|
IE_DESC = 'Yahoo screen search'
|
||||||
_MAX_RESULTS = 1000
|
_MAX_RESULTS = 1000
|
||||||
IE_NAME = u'screen.yahoo:search'
|
IE_NAME = 'screen.yahoo:search'
|
||||||
_SEARCH_KEY = 'yvsearch'
|
_SEARCH_KEY = 'yvsearch'
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
@ -139,12 +141,12 @@ class YahooSearchIE(SearchInfoExtractor):
|
|||||||
'entries': []
|
'entries': []
|
||||||
}
|
}
|
||||||
for pagenum in itertools.count(0):
|
for pagenum in itertools.count(0):
|
||||||
result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
|
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
|
||||||
webpage = self._download_webpage(result_url, query,
|
webpage = self._download_webpage(result_url, query,
|
||||||
note='Downloading results page '+str(pagenum+1))
|
note='Downloading results page '+str(pagenum+1))
|
||||||
info = json.loads(webpage)
|
info = json.loads(webpage)
|
||||||
m = info[u'm']
|
m = info['m']
|
||||||
results = info[u'results']
|
results = info['results']
|
||||||
|
|
||||||
for (i, r) in enumerate(results):
|
for (i, r) in enumerate(results):
|
||||||
if (pagenum * 30) +i >= n:
|
if (pagenum * 30) +i >= n:
|
||||||
@ -152,7 +154,7 @@ class YahooSearchIE(SearchInfoExtractor):
|
|||||||
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
|
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
|
||||||
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
|
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
|
||||||
res['entries'].append(e)
|
res['entries'].append(e)
|
||||||
if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1)):
|
if (pagenum * 30 +i >= n) or (m['last'] >= (m['total'] -1)):
|
||||||
break
|
break
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
@ -34,6 +34,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
|
uppercase_escape,
|
||||||
)
|
)
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
@ -136,7 +137,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
||||||
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
||||||
(?:www\.)?pwnyoutube\.com|
|
(?:www\.)?pwnyoutube\.com/|
|
||||||
tube\.majestyc\.net/|
|
tube\.majestyc\.net/|
|
||||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
@ -1085,8 +1086,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
self._downloader.report_warning(err_msg)
|
self._downloader.report_warning(err_msg)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _extract_id(self, url):
|
@classmethod
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
def extract_id(cls, url):
|
||||||
|
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
video_id = mobj.group(2)
|
video_id = mobj.group(2)
|
||||||
@ -1115,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
mobj = re.search(self._NEXT_URL_RE, url)
|
mobj = re.search(self._NEXT_URL_RE, url)
|
||||||
if mobj:
|
if mobj:
|
||||||
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||||
video_id = self._extract_id(url)
|
video_id = self.extract_id(url)
|
||||||
|
|
||||||
# Get video webpage
|
# Get video webpage
|
||||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||||
@ -1422,7 +1424,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com playlists'
|
IE_DESC = u'YouTube.com playlists'
|
||||||
_VALID_URL = r"""(?:
|
_VALID_URL = r"""(?x)(?:
|
||||||
(?:https?://)?
|
(?:https?://)?
|
||||||
(?:\w+\.)?
|
(?:\w+\.)?
|
||||||
youtube\.com/
|
youtube\.com/
|
||||||
@ -1431,7 +1433,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
\? (?:.*?&)*? (?:p|a|list)=
|
\? (?:.*?&)*? (?:p|a|list)=
|
||||||
| p/
|
| p/
|
||||||
)
|
)
|
||||||
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
|
(
|
||||||
|
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
|
||||||
|
# Top tracks, they can also include dots
|
||||||
|
|(?:MC)[\w\.]*
|
||||||
|
)
|
||||||
.*
|
.*
|
||||||
|
|
|
|
||||||
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
||||||
@ -1441,11 +1447,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -1469,7 +1470,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
playlist_id = mobj.group(1) or mobj.group(2)
|
playlist_id = mobj.group(1) or mobj.group(2)
|
||||||
@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
# Download all channel pages using the json-based channel_ajax query
|
# Download all channel pages using the json-based channel_ajax query
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||||
page = self._download_webpage(url, channel_id,
|
page = self._download_json(
|
||||||
u'Downloading page #%s' % pagenum)
|
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
||||||
|
transform_source=uppercase_escape)
|
||||||
page = json.loads(page)
|
|
||||||
|
|
||||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||||
video_ids.extend(ids_in_page)
|
video_ids.extend(ids_in_page)
|
||||||
|
|
||||||
|
@ -751,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
https_request = http_request
|
https_request = http_request
|
||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
def unified_strdate(date_str):
|
def unified_strdate(date_str):
|
||||||
"""Return a string with the date in the format YYYYMMDD"""
|
"""Return a string with the date in the format YYYYMMDD"""
|
||||||
upload_date = None
|
upload_date = None
|
||||||
#Replace commas
|
#Replace commas
|
||||||
date_str = date_str.replace(',',' ')
|
date_str = date_str.replace(',', ' ')
|
||||||
# %z (UTC offset) is only supported in python>=3.2
|
# %z (UTC offset) is only supported in python>=3.2
|
||||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
|
||||||
format_expressions = [
|
format_expressions = [
|
||||||
'%d %B %Y',
|
'%d %B %Y',
|
||||||
'%B %d %Y',
|
'%B %d %Y',
|
||||||
@ -771,11 +772,12 @@ def unified_strdate(date_str):
|
|||||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||||
'%Y-%m-%dT%H:%M:%S',
|
'%Y-%m-%dT%H:%M:%S',
|
||||||
|
'%Y-%m-%dT%H:%M',
|
||||||
]
|
]
|
||||||
for expression in format_expressions:
|
for expression in format_expressions:
|
||||||
try:
|
try:
|
||||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||||
except:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
if upload_date is None:
|
if upload_date is None:
|
||||||
timetuple = email.utils.parsedate_tz(date_str)
|
timetuple = email.utils.parsedate_tz(date_str)
|
||||||
@ -1212,3 +1214,9 @@ class PagedList(object):
|
|||||||
if end == nextfirstid:
|
if end == nextfirstid:
|
||||||
break
|
break
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def uppercase_escape(s):
|
||||||
|
return re.sub(
|
||||||
|
r'\\U([0-9a-fA-F]{8})',
|
||||||
|
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.02.04.1'
|
__version__ = '2014.02.13'
|
||||||
|
Reference in New Issue
Block a user