Compare commits
45 Commits
2014.02.06
...
2014.02.10
Author | SHA1 | Date | |
---|---|---|---|
2e20bba708 | |||
e70dc1d14b | |||
026fcc0495 | |||
81c2f20b53 | |||
1afe753462 | |||
524c2c716a | |||
b542d4bbd7 | |||
17968e444c | |||
2e3fd9ec2f | |||
d6a283b025 | |||
9766538124 | |||
98dbee8681 | |||
e421491b3b | |||
6828d37c41 | |||
bf5f610099 | |||
8b7f73404a | |||
85cacb2f51 | |||
b3fa3917e2 | |||
082c6c867a | |||
03fcf1ab57 | |||
3b00dea5eb | |||
8bc6c8e3c0 | |||
79bc27b53a | |||
84dd703199 | |||
c6fdba23a6 | |||
b19fe521a9 | |||
c1e672d121 | |||
f4371f4784 | |||
d914d9d187 | |||
845d14d377 | |||
4a9540b6d2 | |||
9f31be7000 | |||
41fa1b627d | |||
c0c4e66b29 | |||
cd8662de22 | |||
3587159614 | |||
d67cc9fa7c | |||
bf3a2fe923 | |||
e9ea0bf123 | |||
63424b6233 | |||
0bf35c5cf5 | |||
95c29381eb | |||
94c4abce7f | |||
f2dffe55f8 | |||
46a073bfac |
@ -1,5 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@ -13,6 +15,7 @@ from youtube_dl.extractor import (
|
|||||||
FacebookIE,
|
FacebookIE,
|
||||||
gen_extractors,
|
gen_extractors,
|
||||||
JustinTVIE,
|
JustinTVIE,
|
||||||
|
PBSIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -29,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_youtube_playlist_matching(self):
|
def test_youtube_playlist_matching(self):
|
||||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||||
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||||
assertPlaylist(u'PL63F0C78739B09958')
|
assertPlaylist('PL63F0C78739B09958')
|
||||||
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||||
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
|
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||||
|
# Top tracks
|
||||||
|
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||||
|
|
||||||
def test_youtube_matching(self):
|
def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||||
@ -80,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
@ -89,7 +94,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
def test_facebook_matching(self):
|
def test_facebook_matching(self):
|
||||||
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
@ -124,5 +129,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
||||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
||||||
|
|
||||||
|
def test_pbs(self):
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/2350
|
||||||
|
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -127,6 +127,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||||
|
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = u'''<root>
|
testxml = u'''<root>
|
||||||
|
@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], 'ytdl test PL')
|
self.assertEqual(result['title'], 'ytdl test PL')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
||||||
|
|
||||||
def test_youtube_playlist_noplaylist(self):
|
def test_youtube_playlist_noplaylist(self):
|
||||||
@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertEqual(result['_type'], 'url')
|
self.assertEqual(result['_type'], 'url')
|
||||||
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
|
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||||
|
|
||||||
def test_issue_673(self):
|
def test_issue_673(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||||
|
|
||||||
@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
# TODO find a > 100 (paginating?) videos course
|
# TODO find a > 100 (paginating?) videos course
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||||
|
|
||||||
def test_youtube_channel(self):
|
def test_youtube_channel(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
original_video = entries[0]
|
original_video = entries[0]
|
||||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||||
|
|
||||||
|
def test_youtube_toptracks(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubePlaylistIE(dl)
|
||||||
|
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertEqual(len(entries), 100)
|
||||||
|
|
||||||
def test_youtube_toplist(self):
|
def test_youtube_toplist(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeTopListIE(dl)
|
ie = YoutubeTopListIE(dl)
|
||||||
|
@ -41,6 +41,7 @@ __authors__ = (
|
|||||||
'Chris Gahan',
|
'Chris Gahan',
|
||||||
'Saimadhav Heblikar',
|
'Saimadhav Heblikar',
|
||||||
'Mike Col',
|
'Mike Col',
|
||||||
|
'Andreas Schmitz',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
|
|||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
player_url = info_dict.get('player_url', None)
|
player_url = info_dict.get('player_url', None)
|
||||||
page_url = info_dict.get('page_url', None)
|
page_url = info_dict.get('page_url', None)
|
||||||
|
app = info_dict.get('app', None)
|
||||||
play_path = info_dict.get('play_path', None)
|
play_path = info_dict.get('play_path', None)
|
||||||
tc_url = info_dict.get('tc_url', None)
|
tc_url = info_dict.get('tc_url', None)
|
||||||
|
flash_version = info_dict.get('flash_version', None)
|
||||||
live = info_dict.get('rtmp_live', False)
|
live = info_dict.get('rtmp_live', False)
|
||||||
conn = info_dict.get('rtmp_conn', None)
|
conn = info_dict.get('rtmp_conn', None)
|
||||||
|
|
||||||
@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
|
|||||||
basic_args += ['--swfVfy', player_url]
|
basic_args += ['--swfVfy', player_url]
|
||||||
if page_url is not None:
|
if page_url is not None:
|
||||||
basic_args += ['--pageUrl', page_url]
|
basic_args += ['--pageUrl', page_url]
|
||||||
|
if app is not None:
|
||||||
|
basic_args += ['--app', app]
|
||||||
if play_path is not None:
|
if play_path is not None:
|
||||||
basic_args += ['--playpath', play_path]
|
basic_args += ['--playpath', play_path]
|
||||||
if tc_url is not None:
|
if tc_url is not None:
|
||||||
basic_args += ['--tcUrl', url]
|
basic_args += ['--tcUrl', url]
|
||||||
if test:
|
if test:
|
||||||
basic_args += ['--stop', '1']
|
basic_args += ['--stop', '1']
|
||||||
|
if flash_version is not None:
|
||||||
|
basic_args += ['--flashVer', flash_version]
|
||||||
if live:
|
if live:
|
||||||
basic_args += ['--live']
|
basic_args += ['--live']
|
||||||
if conn:
|
if conn:
|
||||||
|
@ -15,6 +15,7 @@ from .arte import (
|
|||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
|
from .bbccouk import BBCCoUkIE
|
||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
|
|||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
|
from .chilloutzone import ChilloutzoneIE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
@ -103,6 +105,7 @@ from .ivi import (
|
|||||||
IviIE,
|
IviIE,
|
||||||
IviCompilationIE
|
IviCompilationIE
|
||||||
)
|
)
|
||||||
|
from .jadorecettepub import JadoreCettePubIE
|
||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .justintv import JustinTVIE
|
from .justintv import JustinTVIE
|
||||||
@ -112,6 +115,7 @@ from .keezmovies import KeezMoviesIE
|
|||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import KhanAcademyIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
|
from .kontrtube import KontrTubeIE
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
from .lifenews import LifeNewsIE
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
@ -142,8 +146,10 @@ from .myvideo import MyVideoIE
|
|||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import NBCNewsIE
|
from .nbc import NBCNewsIE
|
||||||
|
from .ndr import NDRIE
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
|
from .nfb import NFBIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
|
217
youtube_dl/extractor/bbccouk.py
Normal file
217
youtube_dl/extractor/bbccouk.py
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||||
|
IE_NAME = 'bbc.co.uk'
|
||||||
|
IE_DESC = 'BBC iPlayer'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p01q7wz4',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
|
||||||
|
'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
|
||||||
|
'duration': 1936,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00yng1d',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||||
|
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||||
|
'duration': 1800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b00yng1d',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||||
|
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||||
|
'duration': 5100,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_asx_playlist(self, connection, programme_id):
|
||||||
|
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||||
|
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||||
|
|
||||||
|
def _extract_connection(self, connection, programme_id):
|
||||||
|
formats = []
|
||||||
|
protocol = connection.get('protocol')
|
||||||
|
supplier = connection.get('supplier')
|
||||||
|
if protocol == 'http':
|
||||||
|
href = connection.get('href')
|
||||||
|
# ASX playlist
|
||||||
|
if supplier == 'asx':
|
||||||
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
formats.append({
|
||||||
|
'url': ref,
|
||||||
|
'format_id': 'ref%s_%s' % (i, supplier),
|
||||||
|
})
|
||||||
|
# Direct link
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': href,
|
||||||
|
'format_id': supplier,
|
||||||
|
})
|
||||||
|
elif protocol == 'rtmp':
|
||||||
|
application = connection.get('application', 'ondemand')
|
||||||
|
auth_string = connection.get('authString')
|
||||||
|
identifier = connection.get('identifier')
|
||||||
|
server = connection.get('server')
|
||||||
|
formats.append({
|
||||||
|
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||||
|
'play_path': identifier,
|
||||||
|
'app': '%s?%s' % (application, auth_string),
|
||||||
|
'page_url': 'http://www.bbc.co.uk',
|
||||||
|
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': supplier,
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_items(self, playlist):
|
||||||
|
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||||
|
|
||||||
|
def _extract_medias(self, media_selection):
|
||||||
|
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||||
|
|
||||||
|
def _extract_connections(self, media):
|
||||||
|
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||||
|
|
||||||
|
def _extract_video(self, media, programme_id):
|
||||||
|
formats = []
|
||||||
|
vbr = int(media.get('bitrate'))
|
||||||
|
vcodec = media.get('encoding')
|
||||||
|
service = media.get('service')
|
||||||
|
width = int(media.get('width'))
|
||||||
|
height = int(media.get('height'))
|
||||||
|
file_size = int(media.get('media_file_size'))
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
|
for format in conn_formats:
|
||||||
|
format.update({
|
||||||
|
'format_id': '%s_%s' % (service, format['format_id']),
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': vbr,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'filesize': file_size,
|
||||||
|
})
|
||||||
|
formats.extend(conn_formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_audio(self, media, programme_id):
|
||||||
|
formats = []
|
||||||
|
abr = int(media.get('bitrate'))
|
||||||
|
acodec = media.get('encoding')
|
||||||
|
service = media.get('service')
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
|
for format in conn_formats:
|
||||||
|
format.update({
|
||||||
|
'format_id': '%s_%s' % (service, format['format_id']),
|
||||||
|
'abr': abr,
|
||||||
|
'acodec': acodec,
|
||||||
|
})
|
||||||
|
formats.extend(conn_formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_captions(self, media, programme_id):
|
||||||
|
subtitles = {}
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||||
|
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
|
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||||
|
srt = ''
|
||||||
|
for pos, p in enumerate(ps):
|
||||||
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
||||||
|
p.text.strip() if p.text is not None else '')
|
||||||
|
subtitles[lang] = srt
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
group_id = mobj.group('id')
|
||||||
|
|
||||||
|
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||||
|
'Downloading playlist XML')
|
||||||
|
|
||||||
|
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||||
|
if no_items is not None:
|
||||||
|
reason = no_items.get('reason')
|
||||||
|
if reason == 'preAvailability':
|
||||||
|
msg = 'Episode %s is not yet available' % group_id
|
||||||
|
elif reason == 'postAvailability':
|
||||||
|
msg = 'Episode %s is no longer available' % group_id
|
||||||
|
else:
|
||||||
|
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
subtitles = None
|
||||||
|
|
||||||
|
for item in self._extract_items(playlist):
|
||||||
|
kind = item.get('kind')
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||||
|
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||||
|
|
||||||
|
programme_id = item.get('identifier')
|
||||||
|
duration = int(item.get('duration'))
|
||||||
|
|
||||||
|
media_selection = self._download_xml(
|
||||||
|
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||||
|
programme_id, 'Downloading media selection XML')
|
||||||
|
|
||||||
|
for media in self._extract_medias(media_selection):
|
||||||
|
kind = media.get('kind')
|
||||||
|
if kind == 'audio':
|
||||||
|
formats.extend(self._extract_audio(media, programme_id))
|
||||||
|
elif kind == 'video':
|
||||||
|
formats.extend(self._extract_video(media, programme_id))
|
||||||
|
elif kind == 'captions':
|
||||||
|
subtitles = self._extract_captions(media, programme_id)
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(programme_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@ -24,5 +24,7 @@ class BloombergIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
ooyala_url = self._twitter_search_player(webpage)
|
embed_code = self._search_regex(
|
||||||
return self.url_result(ooyala_url, OoyalaIE.ie_key())
|
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
||||||
|
'embed code')
|
||||||
|
return OoyalaIE._build_url_result(embed_code)
|
||||||
|
@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
IE_DESC = 'Channel 9'
|
IE_DESC = 'Channel 9'
|
||||||
IE_NAME = 'channel9'
|
IE_NAME = 'channel9'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||||
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
|
|
||||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||||
'duration': 4576,
|
'duration': 4576,
|
||||||
@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
|
||||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||||
'duration': 1540,
|
'duration': 1540,
|
||||||
|
97
youtube_dl/extractor/chilloutzone.py
Normal file
97
youtube_dl/extractor/chilloutzone.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
ExtractorError
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChilloutzoneIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
|
||||||
|
'md5': 'a76f3457e813ea0037e5244f509e66d1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'enemene-meck-alle-katzen-weg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Enemene Meck - Alle Katzen weg',
|
||||||
|
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'Video hosted at YouTube',
|
||||||
|
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1YVQaAgHyRU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '16 Photos Taken 1 Second Before Disaster',
|
||||||
|
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
|
||||||
|
'uploader': 'BuzzFeedVideo',
|
||||||
|
'uploader_id': 'BuzzFeedVideo',
|
||||||
|
'upload_date': '20131105',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'Video hosted at Vimeo',
|
||||||
|
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
|
||||||
|
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85523671',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Sunday Times - Icons',
|
||||||
|
'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
|
||||||
|
'uploader': 'Us',
|
||||||
|
'uploader_id': 'usfilms',
|
||||||
|
'upload_date': '20140131'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
base64_video_info = self._html_search_regex(
|
||||||
|
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||||
|
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
|
||||||
|
video_info_dict = json.loads(decoded_video_info)
|
||||||
|
|
||||||
|
# get video information from dict
|
||||||
|
video_url = video_info_dict['mediaUrl']
|
||||||
|
description = clean_html(video_info_dict.get('description'))
|
||||||
|
title = video_info_dict['title']
|
||||||
|
native_platform = video_info_dict['nativePlatform']
|
||||||
|
native_video_id = video_info_dict['nativeVideoId']
|
||||||
|
source_priority = video_info_dict['sourcePriority']
|
||||||
|
|
||||||
|
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||||
|
if native_platform is None:
|
||||||
|
youtube_url = self._html_search_regex(
|
||||||
|
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||||
|
webpage, 'fallback video URL', default=None)
|
||||||
|
if youtube_url is not None:
|
||||||
|
return self.url_result(youtube_url, ie='Youtube')
|
||||||
|
|
||||||
|
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||||
|
# the own CDN
|
||||||
|
if source_priority == 'native':
|
||||||
|
if native_platform == 'youtube':
|
||||||
|
return self.url_result(native_video_id, ie='Youtube')
|
||||||
|
if native_platform == 'vimeo':
|
||||||
|
return self.url_result(
|
||||||
|
'http://vimeo.com/' + native_video_id, ie='Vimeo')
|
||||||
|
|
||||||
|
if not video_url:
|
||||||
|
raise ExtractorError('No video found')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
@ -271,8 +271,11 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_json(self, url_or_request, video_id,
|
def _download_json(self, url_or_request, video_id,
|
||||||
note=u'Downloading JSON metadata',
|
note=u'Downloading JSON metadata',
|
||||||
errnote=u'Unable to download JSON metadata'):
|
errnote=u'Unable to download JSON metadata',
|
||||||
|
transform_source=None):
|
||||||
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
if transform_source:
|
||||||
|
json_string = transform_source(json_string)
|
||||||
try:
|
try:
|
||||||
return json.loads(json_string)
|
return json.loads(json_string)
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import unified_strdate
|
|||||||
|
|
||||||
class ElPaisIE(InfoExtractor):
|
class ElPaisIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
|
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
|
||||||
IE_DESCR = 'El País'
|
IE_DESC = 'El País'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
|
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
|
||||||
|
@ -14,15 +14,16 @@ from ..utils import (
|
|||||||
class IviIE(InfoExtractor):
|
class IviIE(InfoExtractor):
|
||||||
IE_DESC = 'ivi.ru'
|
IE_DESC = 'ivi.ru'
|
||||||
IE_NAME = 'ivi'
|
IE_NAME = 'ivi'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Single movie
|
# Single movie
|
||||||
{
|
{
|
||||||
'url': 'http://www.ivi.ru/watch/53141',
|
'url': 'http://www.ivi.ru/watch/53141',
|
||||||
'file': '53141.mp4',
|
|
||||||
'md5': '6ff5be2254e796ed346251d117196cf4',
|
'md5': '6ff5be2254e796ed346251d117196cf4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '53141',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Иван Васильевич меняет профессию',
|
'title': 'Иван Васильевич меняет профессию',
|
||||||
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
||||||
'duration': 5498,
|
'duration': 5498,
|
||||||
@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
|
|||||||
# Serial's serie
|
# Serial's serie
|
||||||
{
|
{
|
||||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||||
'file': '74791.mp4',
|
|
||||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '74791',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Дежурный ангел - 1 серия',
|
'title': 'Дежурный ангел - 1 серия',
|
||||||
'duration': 2490,
|
'duration': 2490,
|
||||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||||
@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
|
|||||||
class IviCompilationIE(InfoExtractor):
|
class IviCompilationIE(InfoExtractor):
|
||||||
IE_DESC = 'ivi.ru compilations'
|
IE_DESC = 'ivi.ru compilations'
|
||||||
IE_NAME = 'ivi:compilation'
|
IE_NAME = 'ivi:compilation'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||||
|
|
||||||
def _extract_entries(self, html, compilation_id):
|
def _extract_entries(self, html, compilation_id):
|
||||||
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
||||||
|
49
youtube_dl/extractor/jadorecettepub.py
Normal file
49
youtube_dl/extractor/jadorecettepub.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
|
class JadoreCettePubIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
|
||||||
|
'md5': '401286a06067c70b44076044b66515de',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jLMja3tr7a4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La pire utilisation de Star Wars',
|
||||||
|
'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
|
||||||
|
webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
|
||||||
|
fatal=False)
|
||||||
|
real_url = self._search_regex(
|
||||||
|
r'\[/postlink\](.*)endofvid', webpage, 'video URL')
|
||||||
|
video_id = YoutubeIE.extract_id(real_url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': real_url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||||
u'file': u'5182.mp4',
|
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||||
u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '5182',
|
||||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
'ext': 'mp4',
|
||||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||||
|
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
xml_link = self._html_search_regex(
|
xml_link = self._html_search_regex(
|
||||||
r'<param name="flashvars" value="config=(.*?)" />',
|
r'<param name="flashvars" value="config=(.*?)" />',
|
||||||
webpage, u'config URL')
|
webpage, 'config URL')
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||||
xml_link, u'video ID')
|
xml_link, 'video ID')
|
||||||
|
|
||||||
config = self._download_xml(
|
config = self._download_xml(
|
||||||
xml_link, title, u'Downloading XML config')
|
xml_link, title, 'Downloading XML config')
|
||||||
info_json = config.find('format.json').text
|
info_json = config.find('format.json').text
|
||||||
info = json.loads(info_json)['versions'][0]
|
info = json.loads(info_json)['versions'][0]
|
||||||
|
|
||||||
|
66
youtube_dl/extractor/kontrtube.py
Normal file
66
youtube_dl/extractor/kontrtube.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class KontrTubeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'kontrtube'
|
||||||
|
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||||
|
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2678',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||||
|
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||||
|
'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
|
||||||
|
'duration': 270,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||||
|
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||||
|
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
||||||
|
'video title')
|
||||||
|
description = self._html_search_meta('description', webpage, 'video description')
|
||||||
|
|
||||||
|
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||||
|
webpage)
|
||||||
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
||||||
|
'view count', fatal=False)
|
||||||
|
view_count = int(view_count) if view_count is not None else None
|
||||||
|
|
||||||
|
comment_count = None
|
||||||
|
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
||||||
|
fatal=False)
|
||||||
|
if comment_str.startswith('комментариев нет'):
|
||||||
|
comment_count = 0
|
||||||
|
else:
|
||||||
|
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||||
|
if mobj:
|
||||||
|
comment_count = int(mobj.group('total'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
@ -61,7 +61,7 @@ class MooshareIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
|
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||||
@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
89
youtube_dl/extractor/ndr.py
Normal file
89
youtube_dl/extractor/ndr.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class NDRIE(InfoExtractor):
|
||||||
|
IE_NAME = 'ndr'
|
||||||
|
IE_DESC = 'NDR.de - Mediathek'
|
||||||
|
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
# video
|
||||||
|
{
|
||||||
|
'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
|
||||||
|
'md5': '20eba151ff165f386643dad9c1da08f7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '19925',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hallo Niedersachsen ',
|
||||||
|
'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
|
||||||
|
'duration': 1722,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# audio
|
||||||
|
{
|
||||||
|
'url': 'http://www.ndr.de/903/audio191719.html',
|
||||||
|
'md5': '41ed601768534dd18a9ae34d84798129',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '191719',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '"Es war schockierend"',
|
||||||
|
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
|
||||||
|
'duration': 112,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
title = self._og_search_title(page)
|
||||||
|
description = self._og_search_description(page)
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
|
||||||
|
page)
|
||||||
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||||
|
if mp3_url:
|
||||||
|
formats.append({
|
||||||
|
'url': mp3_url.group('audio'),
|
||||||
|
'format_id': 'mp3',
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
|
||||||
|
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||||
|
if video_url:
|
||||||
|
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
|
||||||
|
page, 'thumbnail', fatal=False)
|
||||||
|
if thumbnail:
|
||||||
|
thumbnail = 'http://www.ndr.de' + thumbnail
|
||||||
|
for format_id in ['lo', 'hi', 'hq']:
|
||||||
|
formats.append({
|
||||||
|
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
93
youtube_dl/extractor/nfb.py
Normal file
93
youtube_dl/extractor/nfb.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NFBIE(InfoExtractor):
|
||||||
|
IE_NAME = 'nfb'
|
||||||
|
IE_DESC = 'National Film Board of Canada'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'qallunaat_why_white_people_are_funny',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Qallunaat! Why White People Are Funny ',
|
||||||
|
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
|
||||||
|
'duration': 3128,
|
||||||
|
'uploader': 'Mark Sandiford',
|
||||||
|
'uploader_id': 'mark-sandiford',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
|
||||||
|
|
||||||
|
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||||
|
page, 'director id', fatal=False)
|
||||||
|
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||||
|
page, 'director name', fatal=False)
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||||
|
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||||
|
|
||||||
|
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||||
|
|
||||||
|
title = None
|
||||||
|
description = None
|
||||||
|
thumbnail = None
|
||||||
|
duration = None
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
def extract_thumbnail(media):
|
||||||
|
thumbnails = {}
|
||||||
|
for asset in media.findall('assets/asset'):
|
||||||
|
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||||
|
if not thumbnails:
|
||||||
|
return None
|
||||||
|
if 'high' in thumbnails:
|
||||||
|
return thumbnails['high']
|
||||||
|
return list(thumbnails.values())[0]
|
||||||
|
|
||||||
|
for media in config.findall('./player/stream/media'):
|
||||||
|
if media.get('type') == 'posterImage':
|
||||||
|
thumbnail = extract_thumbnail(media)
|
||||||
|
elif media.get('type') == 'video':
|
||||||
|
duration = int(media.get('duration'))
|
||||||
|
title = media.find('title').text
|
||||||
|
description = media.find('description').text
|
||||||
|
# It seems assets always go from lower to better quality, so no need to sort
|
||||||
|
formats = [{
|
||||||
|
'url': x.find('default/streamerURI').text + '/',
|
||||||
|
'play_path': x.find('default/url').text,
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': x.get('quality'),
|
||||||
|
} for x in media.findall('assets/asset')]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -9,7 +9,7 @@ class PBSIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?:
|
(?:
|
||||||
# Direct video URL
|
# Direct video URL
|
||||||
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
|
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||||
# Article with embedded player
|
# Article with embedded player
|
||||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||||
# Player
|
# Player
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||||
u'file': u'25665706.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '25665706',
|
||||||
u'title': u'Managing Scale and Complexity',
|
'ext': 'mp4',
|
||||||
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
|
'title': 'Managing Scale and Complexity',
|
||||||
|
'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
slideshare_obj = self._search_regex(
|
slideshare_obj = self._search_regex(
|
||||||
r'var slideshare_object = ({.*?}); var user_info =',
|
r'var slideshare_object = ({.*?}); var user_info =',
|
||||||
webpage, u'slideshare object')
|
webpage, 'slideshare object')
|
||||||
info = json.loads(slideshare_obj)
|
info = json.loads(slideshare_obj)
|
||||||
if info['slideshow']['type'] != u'video':
|
if info['slideshow']['type'] != 'video':
|
||||||
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||||
|
|
||||||
doc = info['doc']
|
doc = info['doc']
|
||||||
bucket = info['jsplayer']['video_bucket']
|
bucket = info['jsplayer']['video_bucket']
|
||||||
ext = info['jsplayer']['video_extension']
|
ext = info['jsplayer']['video_extension']
|
||||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor):
|
|||||||
'ext': ext,
|
'ext': ext,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': info['slideshow']['pin_image_url'],
|
'thumbnail': info['slideshow']['pin_image_url'],
|
||||||
'description': self._og_search_description(webpage),
|
'description': description,
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,7 @@ from ..aes import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class Tube8IE(InfoExtractor):
|
class Tube8IE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||||
u'file': u'229795.mp4',
|
u'file': u'229795.mp4',
|
||||||
|
@ -34,6 +34,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
|
uppercase_escape,
|
||||||
)
|
)
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
@ -136,7 +137,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
||||||
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
||||||
(?:www\.)?pwnyoutube\.com|
|
(?:www\.)?pwnyoutube\.com/|
|
||||||
tube\.majestyc\.net/|
|
tube\.majestyc\.net/|
|
||||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
@ -1085,8 +1086,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
self._downloader.report_warning(err_msg)
|
self._downloader.report_warning(err_msg)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _extract_id(self, url):
|
@classmethod
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
def extract_id(cls, url):
|
||||||
|
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
video_id = mobj.group(2)
|
video_id = mobj.group(2)
|
||||||
@ -1115,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
mobj = re.search(self._NEXT_URL_RE, url)
|
mobj = re.search(self._NEXT_URL_RE, url)
|
||||||
if mobj:
|
if mobj:
|
||||||
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||||
video_id = self._extract_id(url)
|
video_id = self.extract_id(url)
|
||||||
|
|
||||||
# Get video webpage
|
# Get video webpage
|
||||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||||
@ -1422,7 +1424,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com playlists'
|
IE_DESC = u'YouTube.com playlists'
|
||||||
_VALID_URL = r"""(?:
|
_VALID_URL = r"""(?x)(?:
|
||||||
(?:https?://)?
|
(?:https?://)?
|
||||||
(?:\w+\.)?
|
(?:\w+\.)?
|
||||||
youtube\.com/
|
youtube\.com/
|
||||||
@ -1431,7 +1433,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
\? (?:.*?&)*? (?:p|a|list)=
|
\? (?:.*?&)*? (?:p|a|list)=
|
||||||
| p/
|
| p/
|
||||||
)
|
)
|
||||||
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
|
(
|
||||||
|
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
|
||||||
|
# Top tracks, they can also include dots
|
||||||
|
|(?:MC)[\w\.]*
|
||||||
|
)
|
||||||
.*
|
.*
|
||||||
|
|
|
|
||||||
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
||||||
@ -1441,11 +1447,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -1469,7 +1470,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
playlist_id = mobj.group(1) or mobj.group(2)
|
playlist_id = mobj.group(1) or mobj.group(2)
|
||||||
@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
# Download all channel pages using the json-based channel_ajax query
|
# Download all channel pages using the json-based channel_ajax query
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||||
page = self._download_webpage(url, channel_id,
|
page = self._download_json(
|
||||||
u'Downloading page #%s' % pagenum)
|
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
||||||
|
transform_source=uppercase_escape)
|
||||||
page = json.loads(page)
|
|
||||||
|
|
||||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||||
video_ids.extend(ids_in_page)
|
video_ids.extend(ids_in_page)
|
||||||
|
|
||||||
|
@ -756,9 +756,9 @@ def unified_strdate(date_str):
|
|||||||
"""Return a string with the date in the format YYYYMMDD"""
|
"""Return a string with the date in the format YYYYMMDD"""
|
||||||
upload_date = None
|
upload_date = None
|
||||||
#Replace commas
|
#Replace commas
|
||||||
date_str = date_str.replace(',',' ')
|
date_str = date_str.replace(',', ' ')
|
||||||
# %z (UTC offset) is only supported in python>=3.2
|
# %z (UTC offset) is only supported in python>=3.2
|
||||||
date_str = re.sub(r' ?(\+|-)[0-9:]*$', '', date_str)
|
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
|
||||||
format_expressions = [
|
format_expressions = [
|
||||||
'%d %B %Y',
|
'%d %B %Y',
|
||||||
'%B %d %Y',
|
'%B %d %Y',
|
||||||
@ -1214,3 +1214,9 @@ class PagedList(object):
|
|||||||
if end == nextfirstid:
|
if end == nextfirstid:
|
||||||
break
|
break
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def uppercase_escape(s):
|
||||||
|
return re.sub(
|
||||||
|
r'\\U([0-9a-fA-F]{8})',
|
||||||
|
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.02.06.1'
|
__version__ = '2014.02.10'
|
||||||
|
Reference in New Issue
Block a user