Compare commits

...

32 Commits

Author SHA1 Message Date
d6a283b025 release 2014.02.08.2 2014-02-08 19:20:35 +01:00
9766538124 [jadorecettepub] Add extractor (Fixes #2148) 2014-02-08 19:20:23 +01:00
98dbee8681 [jeuxvideo] Modernize 2014-02-08 18:43:12 +01:00
e421491b3b release 2014.02.08.1 2014-02-08 18:38:05 +01:00
6828d37c41 Merge branch 'master' of github.com:rg3/youtube-dl 2014-02-08 18:37:53 +01:00
bf5f610099 [pbs] Add support for viralplayer links (Fixes #2350) 2014-02-08 18:37:33 +01:00
8b7f73404a [bbc.co.uk] Fix regex 2014-02-08 22:55:43 +07:00
85cacb2f51 [bbc.co.uk] Add one more link format 2014-02-08 22:54:05 +07:00
b3fa3917e2 release 2014.02.08 2014-02-08 16:25:03 +01:00
082c6c867a [bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184) 2014-02-08 21:55:28 +07:00
03fcf1ab57 Merge pull request #2342 from MikeCol/tube8
[Tube8] Extended valid urls schema
2014-02-08 04:00:50 +01:00
3b00dea5eb Extended valid urls schema 2014-02-08 00:09:26 +01:00
8bc6c8e3c0 [chilloutzone] Add additional tests (#2340) 2014-02-07 15:42:31 +01:00
79bc27b53a [channel9] Simplify 2014-02-07 19:41:18 +07:00
84dd703199 [ivi] Simplify 2014-02-07 19:36:50 +07:00
c6fdba23a6 [nfb] Add workaround for python2.6 2014-02-07 19:23:53 +07:00
b19fe521a9 Merge pull request #2340 from Fnordlab/master
[chilloutzone] Fixes refactoring bug
2014-02-07 12:46:56 +01:00
c1e672d121 [chilloutzone] fixes bug with youtube extraction
the id used for extracting the video from youtube is stored in
native_video_id not video_id. This id is only used on chilloutzone.net
2014-02-07 12:29:58 +01:00
f4371f4784 Merge remote-tracking branch 'upstream/master' 2014-02-07 12:20:58 +01:00
d914d9d187 [chilloutzone] Add import 2014-02-07 12:03:19 +01:00
845d14d377 credit @Fnordlab for chilloutzone 2014-02-07 12:00:58 +01:00
4a9540b6d2 [chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00
9f31be7000 Merge remote-tracking branch 'Fnordlab/chilloutzone' 2014-02-07 11:50:26 +01:00
41fa1b627d release 2014.02.06.3 2014-02-07 01:41:01 +01:00
c0c4e66b29 Merge branch 'chilloutzone' 2014-02-06 21:33:16 +01:00
cd8662de22 [chilloutzone] Bug fix, runs against tests
Fixes a bug with python3.3 and made the extractor run successfully
against tox
2014-02-06 21:31:04 +01:00
3587159614 [nfb] Add encode POST data 2014-02-07 02:13:04 +07:00
d67cc9fa7c [youtube:playlist] Recognize ‘top tracks’ urls (closes #2332)
The list parameter starts with ‘MC’ and can have more characters after it, including dots
2014-02-06 19:46:26 +01:00
bf3a2fe923 [elpais] Fix typo 2014-02-07 00:38:29 +07:00
e9ea0bf123 [ndr] Add support for ndr.de (Closes #2325) 2014-02-07 00:35:26 +07:00
f2dffe55f8 Merge branch 'chilloutzone' 2014-02-06 11:49:38 +01:00
46a073bfac [chilloutzone] Added support for chilloutzone.net
Added support for chilloutzone.net videos including embedded youtube
and vimeo movies. In case you find a not working movie, drop me an
email.
2014-02-06 11:44:44 +01:00
18 changed files with 464 additions and 62 deletions

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
@ -13,6 +15,7 @@ from youtube_dl.extractor import (
FacebookIE,
gen_extractors,
JustinTVIE,
PBSIE,
YoutubeIE,
)
@ -29,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_playlist_matching(self):
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
assertPlaylist(u'PL63F0C78739B09958')
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
assertPlaylist('PL63F0C78739B09958')
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
# Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
@ -80,7 +85,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
@ -89,7 +94,7 @@ class TestAllURLsMatching(unittest.TestCase):
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable(u'https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
def test_no_duplicates(self):
ies = gen_extractors()
@ -124,5 +129,9 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
def test_pbs(self):
# https://github.com/rg3/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
if __name__ == '__main__':
unittest.main()

View File

@ -30,7 +30,7 @@ class TestYoutubeLists(unittest.TestCase):
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'ytdl test PL')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
def test_youtube_playlist_noplaylist(self):
@ -39,7 +39,7 @@ class TestYoutubeLists(unittest.TestCase):
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertEqual(result['_type'], 'url')
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
def test_issue_673(self):
dl = FakeYDL()
@ -59,7 +59,7 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
self.assertFalse('pElCt5oNDuI' in ytie_results)
self.assertFalse('KdPEApIVdWM' in ytie_results)
@ -76,9 +76,9 @@ class TestYoutubeLists(unittest.TestCase):
# TODO find a > 100 (paginating?) videos course
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
entries = result['entries']
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
self.assertEqual(len(entries), 25)
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
def test_youtube_channel(self):
dl = FakeYDL()
@ -117,6 +117,13 @@ class TestYoutubeLists(unittest.TestCase):
original_video = entries[0]
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
def test_youtube_toptracks(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
entries = result['entries']
self.assertEqual(len(entries), 100)
def test_youtube_toplist(self):
dl = FakeYDL()
ie = YoutubeTopListIE(dl)

View File

@ -41,6 +41,7 @@ __authors__ = (
'Chris Gahan',
'Saimadhav Heblikar',
'Mike Col',
'Andreas Schmitz',
)
__license__ = 'Public Domain'

View File

@ -87,8 +87,10 @@ class RtmpFD(FileDownloader):
url = info_dict['url']
player_url = info_dict.get('player_url', None)
page_url = info_dict.get('page_url', None)
app = info_dict.get('app', None)
play_path = info_dict.get('play_path', None)
tc_url = info_dict.get('tc_url', None)
flash_version = info_dict.get('flash_version', None)
live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn', None)
@ -111,12 +113,16 @@ class RtmpFD(FileDownloader):
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if app is not None:
basic_args += ['--app', app]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if flash_version is not None:
basic_args += ['--flashVer', flash_version]
if live:
basic_args += ['--live']
if conn:

View File

@ -15,6 +15,7 @@ from .arte import (
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
@ -25,6 +26,7 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
@ -103,6 +105,7 @@ from .ivi import (
IviIE,
IviCompilationIE
)
from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
@ -142,6 +145,7 @@ from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import NBCNewsIE
from .ndr import NDRIE
from .ndtv import NDTVIE
from .newgrounds import NewgroundsIE
from .nfb import NFBIE

View File

@ -0,0 +1,116 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC - iPlayer Radio'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
_TEST = {
'url': 'http://www.bbc.co.uk/programmes/p01q7wz1',
'info_dict': {
'id': 'p01q7wz4',
'ext': 'flv',
'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix',
'description': 'Blu Mar Ten deliver a Guest Mix for Friction.',
'duration': 1936,
},
'params': {
# rtmp download
'skip_download': True,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
group_id = mobj.group('id')
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML')
item = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}item')
if item is None:
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
raise ExtractorError('Failed to extract media for episode %s' % group_id, expected=True)
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
radio_programme_id = item.get('identifier')
duration = int(item.get('duration'))
media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % radio_programme_id,
radio_programme_id, 'Downloading media selection XML')
formats = []
for media in media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media'):
bitrate = int(media.get('bitrate'))
encoding = media.get('encoding')
service = media.get('service')
connection = media.find('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
protocol = connection.get('protocol')
priority = connection.get('priority')
supplier = connection.get('supplier')
if protocol == 'http':
href = connection.get('href')
# ASX playlist
if supplier == 'asx':
asx = self._download_xml(href, radio_programme_id, 'Downloading %s ASX playlist' % service)
for i, ref in enumerate(asx.findall('./Entry/ref')):
formats.append({
'url': ref.get('href'),
'format_id': '%s_ref%s' % (service, i),
'abr': bitrate,
'acodec': encoding,
'preference': priority,
})
continue
# Direct link
formats.append({
'url': href,
'format_id': service,
'abr': bitrate,
'acodec': encoding,
'preference': priority,
})
elif protocol == 'rtmp':
application = connection.get('application', 'ondemand')
auth_string = connection.get('authString')
identifier = connection.get('identifier')
server = connection.get('server')
formats.append({
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
'play_path': identifier,
'app': '%s?%s' % (application, auth_string),
'rtmp_live': False,
'ext': 'flv',
'format_id': service,
'abr': bitrate,
'acodec': encoding,
'preference': priority,
})
self._sort_formats(formats)
return {
'id': radio_programme_id,
'title': title,
'description': description,
'duration': duration,
'formats': formats,
}

View File

@ -15,14 +15,15 @@ class Channel9IE(InfoExtractor):
'''
IE_DESC = 'Channel 9'
IE_NAME = 'channel9'
_VALID_URL = r'^https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
_TESTS = [
{
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
'file': 'Events_TechEd_Australia_2013_KOS002.mp4',
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
'info_dict': {
'id': 'Events/TechEd/Australia/2013/KOS002',
'ext': 'mp4',
'title': 'Developer Kick-Off Session: Stuff We Love',
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
'duration': 4576,
@ -35,9 +36,10 @@ class Channel9IE(InfoExtractor):
},
{
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
'file': 'posts_Self-service-BI-with-Power-BI-nuclear-testing.mp4',
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
'info_dict': {
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
'ext': 'mp4',
'title': 'Self-service BI with Power BI - nuclear testing',
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'duration': 1540,

View File

@ -0,0 +1,97 @@
from __future__ import unicode_literals
import re
import base64
import json
from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError
)
class ChilloutzoneIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
_TESTS = [{
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
'md5': 'a76f3457e813ea0037e5244f509e66d1',
'info_dict': {
'id': 'enemene-meck-alle-katzen-weg',
'ext': 'mp4',
'title': 'Enemene Meck - Alle Katzen weg',
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
},
}, {
'note': 'Video hosted at YouTube',
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
'info_dict': {
'id': '1YVQaAgHyRU',
'ext': 'mp4',
'title': '16 Photos Taken 1 Second Before Disaster',
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
'uploader': 'BuzzFeedVideo',
'uploader_id': 'BuzzFeedVideo',
'upload_date': '20131105',
},
}, {
'note': 'Video hosted at Vimeo',
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
'info_dict': {
'id': '85523671',
'ext': 'mp4',
'title': 'The Sunday Times - Icons',
'description': 'md5:3e5e8e839f076a637c6b9406c8f25c4c',
'uploader': 'Us',
'uploader_id': 'usfilms',
'upload_date': '20140131'
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
base64_video_info = self._html_search_regex(
r'var cozVidData = "(.+?)";', webpage, 'video data')
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
video_info_dict = json.loads(decoded_video_info)
# get video information from dict
video_url = video_info_dict['mediaUrl']
description = clean_html(video_info_dict.get('description'))
title = video_info_dict['title']
native_platform = video_info_dict['nativePlatform']
native_video_id = video_info_dict['nativeVideoId']
source_priority = video_info_dict['sourcePriority']
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform is None:
youtube_url = self._html_search_regex(
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
webpage, 'fallback video URL', default=None)
if youtube_url is not None:
return self.url_result(youtube_url, ie='Youtube')
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN
if source_priority == 'native':
if native_platform == 'youtube':
return self.url_result(native_video_id, ie='Youtube')
if native_platform == 'vimeo':
return self.url_result(
'http://vimeo.com/' + native_video_id, ie='Vimeo')
if not video_url:
raise ExtractorError('No video found')
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'description': description,
}

View File

@ -9,7 +9,7 @@ from ..utils import unified_strdate
class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESCR = 'El País'
IE_DESC = 'El País'
_TEST = {
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',

View File

@ -14,15 +14,16 @@ from ..utils import (
class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_TESTS = [
# Single movie
{
'url': 'http://www.ivi.ru/watch/53141',
'file': '53141.mp4',
'md5': '6ff5be2254e796ed346251d117196cf4',
'info_dict': {
'id': '53141',
'ext': 'mp4',
'title': 'Иван Васильевич меняет профессию',
'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
'duration': 5498,
@ -33,9 +34,10 @@ class IviIE(InfoExtractor):
# Serial's serie
{
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
'file': '74791.mp4',
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
'info_dict': {
'id': '74791',
'ext': 'mp4',
'title': 'Дежурный ангел - 1 серия',
'duration': 2490,
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
@ -124,7 +126,7 @@ class IviIE(InfoExtractor):
class IviCompilationIE(InfoExtractor):
IE_DESC = 'ivi.ru compilations'
IE_NAME = 'ivi:compilation'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
def _extract_entries(self, html, compilation_id):
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')

View File

@ -0,0 +1,49 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from .youtube import YoutubeIE
class JadoreCettePubIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
_TEST = {
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',
'md5': '401286a06067c70b44076044b66515de',
'info_dict': {
'id': 'jLMja3tr7a4',
'ext': 'mp4',
'title': 'La pire utilisation de Star Wars',
'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...",
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'<span style="font-size: x-large;"><b>(.*?)</b></span>',
webpage, 'title')
description = self._html_search_regex(
r'(?s)<div id="fb-root">(.*?)<script>', webpage, 'description',
fatal=False)
real_url = self._search_regex(
r'\[/postlink\](.*)endofvid', webpage, 'video URL')
video_id = YoutubeIE.extract_id(real_url)
return {
'_type': 'url_transparent',
'url': real_url,
'id': video_id,
'title': title,
'description': description,
}

View File

@ -1,5 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
@ -10,12 +12,13 @@ class JeuxVideoIE(InfoExtractor):
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
_TEST = {
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
u'file': u'5182.mp4',
u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
u'info_dict': {
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
'info_dict': {
'id': '5182',
'ext': 'mp4',
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
},
}
@ -25,14 +28,14 @@ class JeuxVideoIE(InfoExtractor):
webpage = self._download_webpage(url, title)
xml_link = self._html_search_regex(
r'<param name="flashvars" value="config=(.*?)" />',
webpage, u'config URL')
webpage, 'config URL')
video_id = self._search_regex(
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
xml_link, u'video ID')
xml_link, 'video ID')
config = self._download_xml(
xml_link, title, u'Downloading XML config')
xml_link, title, 'Downloading XML config')
info_json = config.find('format.json').text
info = json.loads(info_json)['versions'][0]

View File

@ -0,0 +1,89 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class NDRIE(InfoExtractor):
IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Mediathek'
_VALID_URL = r'https?://www\.ndr\.de/.+?(?P<id>\d+)\.html'
_TESTS = [
# video
{
'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html',
'md5': '20eba151ff165f386643dad9c1da08f7',
'info_dict': {
'id': '19925',
'ext': 'mp4',
'title': 'Hallo Niedersachsen ',
'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.',
'duration': 1722,
},
},
# audio
{
'url': 'http://www.ndr.de/903/audio191719.html',
'md5': '41ed601768534dd18a9ae34d84798129',
'info_dict': {
'id': '191719',
'ext': 'mp3',
'title': '"Es war schockierend"',
'description': 'md5:ed7ff8364793545021a6355b97e95f10',
'duration': 112,
}
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id, 'Downloading page')
title = self._og_search_title(page)
description = self._og_search_description(page)
mobj = re.search(
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
page)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
formats = []
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
if mp3_url:
formats.append({
'url': mp3_url.group('audio'),
'format_id': 'mp3',
})
thumbnail = None
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
if video_url:
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
page, 'thumbnail', fatal=False)
if thumbnail:
thumbnail = 'http://www.ndr.de' + thumbnail
for format_id in ['lo', 'hi', 'hq']:
formats.append({
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
'format_id': format_id,
})
if not formats:
raise ExtractorError('No media links available for %s' % video_id)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@ -43,26 +43,43 @@ class NFBIE(InfoExtractor):
page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}))
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
config = self._download_xml(request, video_id, 'Downloading player config XML')
thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text
video = config.find("./player/stream/media[@type='video']")
duration = int(video.get('duration'))
title = video.find('title').text
description = video.find('description').text
title = None
description = None
thumbnail = None
duration = None
formats = []
# It seems assets always go from lower to better quality, so no need to sort
formats = [{
'url': x.find('default/streamerURI').text + '/',
'play_path': x.find('default/url').text,
'rtmp_live': False,
'ext': 'mp4',
'format_id': x.get('quality'),
} for x in video.findall('assets/asset')]
def extract_thumbnail(media):
thumbnails = {}
for asset in media.findall('assets/asset'):
thumbnails[asset.get('quality')] = asset.find('default/url').text
if not thumbnails:
return None
if 'high' in thumbnails:
return thumbnails['high']
return list(thumbnails.values())[0]
for media in config.findall('./player/stream/media'):
if media.get('type') == 'posterImage':
thumbnail = extract_thumbnail(media)
elif media.get('type') == 'video':
duration = int(media.get('duration'))
title = media.find('title').text
description = media.find('description').text
# It seems assets always go from lower to better quality, so no need to sort
formats = [{
'url': x.find('default/streamerURI').text + '/',
'play_path': x.find('default/url').text,
'rtmp_live': False,
'ext': 'mp4',
'format_id': x.get('quality'),
} for x in media.findall('assets/asset')]
return {
'id': video_id,

View File

@ -9,7 +9,7 @@ class PBSIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
# Article with embedded player
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
# Player

View File

@ -11,7 +11,7 @@ from ..aes import (
)
class Tube8IE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
_TEST = {
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
u'file': u'229795.mp4',

View File

@ -1085,8 +1085,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self._downloader.report_warning(err_msg)
return {}
def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@classmethod
def extract_id(cls, url):
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group(2)
@ -1115,7 +1116,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
mobj = re.search(self._NEXT_URL_RE, url)
if mobj:
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
video_id = self._extract_id(url)
video_id = self.extract_id(url)
# Get video webpage
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
@ -1422,7 +1423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists'
_VALID_URL = r"""(?:
_VALID_URL = r"""(?x)(?:
(?:https?://)?
(?:\w+\.)?
youtube\.com/
@ -1431,7 +1432,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
\? (?:.*?&)*? (?:p|a|list)=
| p/
)
((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
(
(?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
.*
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
@ -1441,11 +1446,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist'
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def _real_initialize(self):
self._login()
@ -1469,7 +1469,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _real_extract(self, url):
# Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group(1) or mobj.group(2)

View File

@ -1,2 +1,2 @@
__version__ = '2014.02.06.2'
__version__ = '2014.02.08.2'