Compare commits
45 Commits
2014.02.06
...
2014.02.10
Author | SHA1 | Date | |
---|---|---|---|
2e20bba708 | |||
e70dc1d14b | |||
026fcc0495 | |||
81c2f20b53 | |||
1afe753462 | |||
524c2c716a | |||
b542d4bbd7 | |||
17968e444c | |||
2e3fd9ec2f | |||
d6a283b025 | |||
9766538124 | |||
98dbee8681 | |||
e421491b3b | |||
6828d37c41 | |||
bf5f610099 | |||
8b7f73404a | |||
85cacb2f51 | |||
b3fa3917e2 | |||
082c6c867a | |||
03fcf1ab57 | |||
3b00dea5eb | |||
8bc6c8e3c0 | |||
79bc27b53a | |||
84dd703199 | |||
c6fdba23a6 | |||
b19fe521a9 | |||
c1e672d121 | |||
f4371f4784 | |||
d914d9d187 | |||
845d14d377 | |||
4a9540b6d2 | |||
9f31be7000 | |||
41fa1b627d | |||
c0c4e66b29 | |||
cd8662de22 | |||
3587159614 | |||
d67cc9fa7c | |||
bf3a2fe923 | |||
e9ea0bf123 | |||
63424b6233 | |||
0bf35c5cf5 | |||
95c29381eb | |||
94c4abce7f | |||
f2dffe55f8 | |||
46a073bfac |
@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
@ -13,6 +15,7 @@ from youtube_dl.extractor import (
|
||||
FacebookIE,
|
||||
gen_extractors,
|
||||
JustinTVIE,
|
||||
PBSIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
@ -29,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
|
||||
def test_youtube_playlist_matching(self):
|
||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||
assertPlaylist(u'PL63F0C78739B09958')
|
||||
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
|
||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
# Top tracks
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||
|
||||
def test_youtube_matching(self):
|
||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||
@ -80,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
@ -89,7 +94,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
|
||||
def test_facebook_matching(self):
|
||||
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
@ -124,5 +129,9 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
|
||||
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
|
||||
|
||||
def test_pbs(self):
|
||||
# https://github.com/rg3/youtube-dl/issues/2350
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -127,6 +127,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = u'''<root>
|
||||
|
@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'ytdl test PL')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
||||
|
||||
def test_youtube_playlist_noplaylist(self):
|
||||
@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertEqual(result['_type'], 'url')
|
||||
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
|
||||
|
||||
def test_issue_673(self):
|
||||
dl = FakeYDL()
|
||||
@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
|
||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||
|
||||
@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = result['entries']
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
||||
def test_youtube_channel(self):
|
||||
dl = FakeYDL()
|
||||
@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_toplist(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeTopListIE(dl)
|
||||
|
@ -41,6 +41,7 @@ __authors__ = (
|
||||
'Chris Gahan',
|
||||
'Saimadhav Heblikar',
|
||||
'Mike Col',
|
||||
'Andreas Schmitz',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url', None)
|
||||
page_url = info_dict.get('page_url', None)
|
||||
app = info_dict.get('app', None)
|
||||
play_path = info_dict.get('play_path', None)
|
||||
tc_url = info_dict.get('tc_url', None)
|
||||
flash_version = info_dict.get('flash_version', None)
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
|
||||
@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if app is not None:
|
||||
basic_args += ['--app', app]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if flash_version is not None:
|
||||
basic_args += ['--flashVer', flash_version]
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
|
@ -15,6 +15,7 @@ from .arte import (
|
||||
from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cbs import CBSIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
@ -103,6 +105,7 @@ from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .jadorecettepub import JadoreCettePubIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
@ -112,6 +115,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .la7 import LA7IE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .liveleak import LiveLeakIE
|
||||
@ -142,8 +146,10 @@ from .myvideo import MyVideoIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import NBCNewsIE
|
||||
from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .nfb import NFBIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
from .ninegag import NineGagIE
|
||||
|
217
youtube_dl/extractor/bbccouk.py
Normal file
217
youtube_dl/extractor/bbccouk.py
Normal file
@ -0,0 +1,217 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
|
||||
'info_dict': {
|
||||
'id': 'p01q7wz4',
|
||||
'ext': 'flv',
|
||||
'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
|
||||
'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
|
||||
'duration': 1936,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int(media.get('width'))
|
||||
height = int(media.get('height'))
|
||||
file_size = int(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_captions(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||
srt = ''
|
||||
for pos, p in enumerate(ps):
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
||||
p.text.strip() if p.text is not None else '')
|
||||
subtitles[lang] = srt
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
group_id = mobj.group('id')
|
||||
|
||||
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||
'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
return
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -24,5 +24,7 @@ class BloombergIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
ooyala_url = self._twitter_search_player(webpage)
|
||||
return self.url_result(ooyala_url, OoyalaIE.ie_key())
|
||||
embed_code = self._search_regex(
|
||||
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
||||
'embed code')
|
||||
return OoyalaIE._build_url_result(embed_code)
|
||||
|
@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'info_dict': {
|
||||
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||
'ext': 'mp4',
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'info_dict': {
|
||||
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'ext': 'mp4',
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
|
97
youtube_dl/extractor/chilloutzone.py
Normal file
97
youtube_dl/extractor/chilloutzone.py
Normal file
@ -0,0 +1,97 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class ChilloutzoneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
|
||||
'md5': 'a76f3457e813ea0037e5244f509e66d1',
|
||||
'info_dict': {
|
||||
'id': 'enemene-meck-alle-katzen-weg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Enemene Meck - Alle Katzen weg',
|
||||
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
|
||||
},
|
||||
}, {
|
||||
'note': 'Video hosted at YouTube',
|
||||
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
|
||||
'info_dict': {
|
||||
'id': '1YVQaAgHyRU',
|
||||
'ext': 'mp4',
|
||||
'title': '16 Photos Taken 1 Second Before Disaster',
|
||||
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
|
||||
'uploader': 'BuzzFeedVideo',
|
||||
'uploader_id': 'BuzzFeedVideo',
|
||||
'upload_date': '20131105',
|
||||
},
|
||||
}, {
|
||||
'note': 'Video hosted at Vimeo',
|
||||
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
|
||||
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
|
||||
'info_dict': {
|
||||
'id': '85523671',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Sunday Times - Icons',
|
||||
'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
|
||||
'uploader': 'Us',
|
||||
'uploader_id': 'usfilms',
|
||||
'upload_date': '20140131'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
base64_video_info = self._html_search_regex(
|
||||
r'var cozVidData = "(.+?)";', webpage, 'video data')
|
||||
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
|
||||
video_info_dict = json.loads(decoded_video_info)
|
||||
|
||||
# get video information from dict
|
||||
video_url = video_info_dict['mediaUrl']
|
||||
description = clean_html(video_info_dict.get('description'))
|
||||
title = video_info_dict['title']
|
||||
native_platform = video_info_dict['nativePlatform']
|
||||
native_video_id = video_info_dict['nativeVideoId']
|
||||
source_priority = video_info_dict['sourcePriority']
|
||||
|
||||
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||
if native_platform is None:
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||
webpage, 'fallback video URL', default=None)
|
||||
if youtube_url is not None:
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||
# the own CDN
|
||||
if source_priority == 'native':
|
||||
if native_platform == 'youtube':
|
||||
return self.url_result(native_video_id, ie='Youtube')
|
||||
if native_platform == 'vimeo':
|
||||
return self.url_result(
|
||||
'http://vimeo.com/' + native_video_id, ie='Vimeo')
|
||||
|
||||
if not video_url:
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
@ -271,8 +271,11 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_json(self, url_or_request, video_id,
|
||||
note=u'Downloading JSON metadata',
|
||||
errnote=u'Unable to download JSON metadata'):
|
||||
errnote=u'Unable to download JSON metadata',
|
||||
transform_source=None):
|
||||
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||
if transform_source:
|
||||
json_string = transform_source(json_string)
|
||||
try:
|
||||
return json.loads(json_string)
|
||||
except ValueError as ve:
|
||||
|
@ -9,7 +9,7 @@ from ..utils import unified_strdate
|
||||
|
||||
class ElPaisIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
|
||||
IE_DESCR = 'El País'
|
||||
IE_DESC = 'El País'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
|
||||
|
@ -14,15 +14,16 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
{
|
||||
'url': 'http://www.ivi.ru/watch/53141',
|
||||
'file': '53141.mp4',
|
||||
'md5': '6ff5be2254e796ed346251d117196cf4',
|
||||
'info_dict': {
|
||||
'id': '53141',
|
||||
'ext': 'mp4',
|
||||
'title': 'Иван Васильевич меняет профессию',
|
||||
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
|
||||
'duration': 5498,
|
||||
@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
|
||||
# Serial's serie
|
||||
{
|
||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||
'file': '74791.mp4',
|
||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||
'info_dict': {
|
||||
'id': '74791',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дежурный ангел - 1 серия',
|
||||
'duration': 2490,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||
@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
|
||||
class IviCompilationIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru compilations'
|
||||
IE_NAME = 'ivi:compilation'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||
|
||||
def _extract_entries(self, html, compilation_id):
|
||||
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
||||
|
49
youtube_dl/extractor/jadorecettepub.py
Normal file
49
youtube_dl/extractor/jadorecettepub.py
Normal file
@ -0,0 +1,49 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class JadoreCettePubIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
|
||||
'md5': '401286a06067c70b44076044b66515de',
|
||||
'info_dict': {
|
||||
'id': 'jLMja3tr7a4',
|
||||
'ext': 'mp4',
|
||||
'title': 'La pire utilisation de Star Wars',
|
||||
'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
|
||||
webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
|
||||
fatal=False)
|
||||
real_url = self._search_regex(
|
||||
r'\[/postlink\](.*)endofvid', webpage, 'video URL')
|
||||
video_id = YoutubeIE.extract_id(real_url)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': real_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
u'file': u'5182.mp4',
|
||||
u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
u'info_dict': {
|
||||
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
'info_dict': {
|
||||
'id': '5182',
|
||||
'ext': 'mp4',
|
||||
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
||||
},
|
||||
}
|
||||
|
||||
@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
xml_link = self._html_search_regex(
|
||||
r'<param name="flashvars" value="config=(.*?)" />',
|
||||
webpage, u'config URL')
|
||||
webpage, 'config URL')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||
xml_link, u'video ID')
|
||||
xml_link, 'video ID')
|
||||
|
||||
config = self._download_xml(
|
||||
xml_link, title, u'Downloading XML config')
|
||||
xml_link, title, 'Downloading XML config')
|
||||
info_json = config.find('format.json').text
|
||||
info = json.loads(info_json)['versions'][0]
|
||||
|
||||
|
66
youtube_dl/extractor/kontrtube.py
Normal file
66
youtube_dl/extractor/kontrtube.py
Normal file
@ -0,0 +1,66 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
IE_NAME = 'kontrtube'
|
||||
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||
'info_dict': {
|
||||
'id': '2678',
|
||||
'ext': 'mp4',
|
||||
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||
'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
|
||||
'duration': 270,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
||||
'video title')
|
||||
description = self._html_search_meta('description', webpage, 'video description')
|
||||
|
||||
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
||||
'view count', fatal=False)
|
||||
view_count = int(view_count) if view_count is not None else None
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
||||
fatal=False)
|
||||
if comment_str.startswith('комментариев нет'):
|
||||
comment_count = 0
|
||||
else:
|
||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||
if mobj:
|
||||
comment_count = int(mobj.group('total'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
@ -61,7 +61,7 @@ class MooshareIE(InfoExtractor):
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
|
||||
'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||
@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
89
youtube_dl/extractor/ndr.py
Normal file
89
youtube_dl/extractor/ndr.py
Normal file
@ -0,0 +1,89 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NDRIE(InfoExtractor):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Mediathek'
|
||||
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
# video
|
||||
{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
|
||||
'md5': '20eba151ff165f386643dad9c1da08f7',
|
||||
'info_dict': {
|
||||
'id': '19925',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hallo Niedersachsen ',
|
||||
'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
|
||||
'duration': 1722,
|
||||
},
|
||||
},
|
||||
# audio
|
||||
{
|
||||
'url': 'http://www.ndr.de/903/audio191719.html',
|
||||
'md5': '41ed601768534dd18a9ae34d84798129',
|
||||
'info_dict': {
|
||||
'id': '191719',
|
||||
'ext': 'mp3',
|
||||
'title': '"Es war schockierend"',
|
||||
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
|
||||
'duration': 112,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
title = self._og_search_title(page)
|
||||
description = self._og_search_description(page)
|
||||
|
||||
mobj = re.search(
|
||||
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
|
||||
page)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
formats = []
|
||||
|
||||
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||
if mp3_url:
|
||||
formats.append({
|
||||
'url': mp3_url.group('audio'),
|
||||
'format_id': 'mp3',
|
||||
})
|
||||
|
||||
thumbnail = None
|
||||
|
||||
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||
if video_url:
|
||||
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
|
||||
page, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = 'http://www.ndr.de' + thumbnail
|
||||
for format_id in ['lo', 'hi', 'hq']:
|
||||
formats.append({
|
||||
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
93
youtube_dl/extractor/nfb.py
Normal file
93
youtube_dl/extractor/nfb.py
Normal file
@ -0,0 +1,93 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class NFBIE(InfoExtractor):
|
||||
IE_NAME = 'nfb'
|
||||
IE_DESC = 'National Film Board of Canada'
|
||||
_VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||
'info_dict': {
|
||||
'id': 'qallunaat_why_white_people_are_funny',
|
||||
'ext': 'mp4',
|
||||
'title': 'Qallunaat! Why White People Are Funny ',
|
||||
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
|
||||
'duration': 3128,
|
||||
'uploader': 'Mark Sandiford',
|
||||
'uploader_id': 'mark-sandiford',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
|
||||
|
||||
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||
page, 'director id', fatal=False)
|
||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||
page, 'director name', fatal=False)
|
||||
|
||||
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||
|
||||
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||
|
||||
title = None
|
||||
description = None
|
||||
thumbnail = None
|
||||
duration = None
|
||||
formats = []
|
||||
|
||||
def extract_thumbnail(media):
|
||||
thumbnails = {}
|
||||
for asset in media.findall('assets/asset'):
|
||||
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||
if not thumbnails:
|
||||
return None
|
||||
if 'high' in thumbnails:
|
||||
return thumbnails['high']
|
||||
return list(thumbnails.values())[0]
|
||||
|
||||
for media in config.findall('./player/stream/media'):
|
||||
if media.get('type') == 'posterImage':
|
||||
thumbnail = extract_thumbnail(media)
|
||||
elif media.get('type') == 'video':
|
||||
duration = int(media.get('duration'))
|
||||
title = media.find('title').text
|
||||
description = media.find('description').text
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
formats = [{
|
||||
'url': x.find('default/streamerURI').text + '/',
|
||||
'play_path': x.find('default/url').text,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': x.get('quality'),
|
||||
} for x in media.findall('assets/asset')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
}
|
@ -9,7 +9,7 @@ class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
# Direct video URL
|
||||
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
|
||||
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||
# Article with embedded player
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||
# Player
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||
u'file': u'25665706.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Managing Scale and Complexity',
|
||||
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
|
||||
'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||
'info_dict': {
|
||||
'id': '25665706',
|
||||
'ext': 'mp4',
|
||||
'title': 'Managing Scale and Complexity',
|
||||
'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.',
|
||||
},
|
||||
}
|
||||
|
||||
@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
slideshare_obj = self._search_regex(
|
||||
r'var slideshare_object = ({.*?}); var user_info =',
|
||||
webpage, u'slideshare object')
|
||||
webpage, 'slideshare object')
|
||||
info = json.loads(slideshare_obj)
|
||||
if info['slideshow']['type'] != u'video':
|
||||
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||
if info['slideshow']['type'] != 'video':
|
||||
raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||
|
||||
doc = info['doc']
|
||||
bucket = info['jsplayer']['video_bucket']
|
||||
ext = info['jsplayer']['video_extension']
|
||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||
description = self._html_search_regex(
|
||||
r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
'thumbnail': info['slideshow']['pin_image_url'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'description': description,
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ from ..aes import (
|
||||
)
|
||||
|
||||
class Tube8IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
u'file': u'229795.mp4',
|
||||
|
@ -34,6 +34,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
orderedSet,
|
||||
write_json_file,
|
||||
uppercase_escape,
|
||||
)
|
||||
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
@ -136,7 +137,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
|
||||
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
|
||||
(?:www\.)?deturl\.com/www\.youtube\.com/|
|
||||
(?:www\.)?pwnyoutube\.com|
|
||||
(?:www\.)?pwnyoutube\.com/|
|
||||
tube\.majestyc\.net/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
@ -1085,8 +1086,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
def _extract_id(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
@classmethod
|
||||
def extract_id(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group(2)
|
||||
@ -1115,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
mobj = re.search(self._NEXT_URL_RE, url)
|
||||
if mobj:
|
||||
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||
video_id = self._extract_id(url)
|
||||
video_id = self.extract_id(url)
|
||||
|
||||
# Get video webpage
|
||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||
@ -1422,7 +1424,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = u'YouTube.com playlists'
|
||||
_VALID_URL = r"""(?:
|
||||
_VALID_URL = r"""(?x)(?:
|
||||
(?:https?://)?
|
||||
(?:\w+\.)?
|
||||
youtube\.com/
|
||||
@ -1431,7 +1433,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
\? (?:.*?&)*? (?:p|a|list)=
|
||||
| p/
|
||||
)
|
||||
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
|
||||
(
|
||||
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
|
||||
# Top tracks, they can also include dots
|
||||
|(?:MC)[\w\.]*
|
||||
)
|
||||
.*
|
||||
|
|
||||
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
||||
@ -1441,11 +1447,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||
IE_NAME = u'youtube:playlist'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@ -1469,7 +1470,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract playlist id
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
playlist_id = mobj.group(1) or mobj.group(2)
|
||||
@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
# Download all channel pages using the json-based channel_ajax query
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_webpage(url, channel_id,
|
||||
u'Downloading page #%s' % pagenum)
|
||||
|
||||
page = json.loads(page)
|
||||
|
||||
page = self._download_json(
|
||||
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
||||
transform_source=uppercase_escape)
|
||||
|
||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||
video_ids.extend(ids_in_page)
|
||||
|
||||
|
@ -756,9 +756,9 @@ def unified_strdate(date_str):
|
||||
"""Return a string with the date in the format YYYYMMDD"""
|
||||
upload_date = None
|
||||
#Replace commas
|
||||
date_str = date_str.replace(',',' ')
|
||||
date_str = date_str.replace(',', ' ')
|
||||
# %z (UTC offset) is only supported in python>=3.2
|
||||
date_str = re.sub(r' ?(\+|-)[0-9:]*$', '', date_str)
|
||||
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
|
||||
format_expressions = [
|
||||
'%d %B %Y',
|
||||
'%B %d %Y',
|
||||
@ -1214,3 +1214,9 @@ class PagedList(object):
|
||||
if end == nextfirstid:
|
||||
break
|
||||
return res
|
||||
|
||||
|
||||
def uppercase_escape(s):
|
||||
return re.sub(
|
||||
r'\\U([0-9a-fA-F]{8})',
|
||||
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
||||
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.02.06.1'
|
||||
__version__ = '2014.02.10'
|
||||
|
Reference in New Issue
Block a user