Compare commits

...

7 Commits

7 changed files with 78 additions and 30 deletions

View File

@ -25,6 +25,7 @@ from youtube_dl.extractor import (
RaiIE, RaiIE,
VikiIE, VikiIE,
ThePlatformIE, ThePlatformIE,
RTVEALaCartaIE,
) )
@ -305,5 +306,19 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
class TestRtveSubtitles(BaseTestSubtitles):
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
IE = RTVEALaCartaIE
def test_allsubtitles(self):
print('Skipping, only available from Spain')
return
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['es']))
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor): class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html' _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
_TEST = { _TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) name = self._match_id(url)
name = mobj.group('name')
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
f4m_url = self._search_regex( f4m_url = self._search_regex(
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage, r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
'f4m url') 'f4m url')

View File

@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
params_raw = compat_urllib_parse.unquote(data['params']) params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw) params = json.loads(params_raw)
video_data = params['video_data'][0] video_data = params['video_data'][0]
video_url = video_data.get('hd_src')
if not video_url: formats = []
video_url = video_data['sd_src'] for quality in ['sd', 'hd']:
if not video_url: src = video_data.get('%s_src' % quality)
raise ExtractorError('Cannot find video URL') if src is not None:
formats.append({
'format_id': quality,
'url': src,
})
if not formats:
raise ExtractorError('Cannot find video formats')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title', r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'url': video_url, 'formats': formats,
'duration': int_or_none(video_data.get('video_duration')), 'duration': int_or_none(video_data.get('video_duration')),
'thumbnail': video_data.get('thumbnail_src'), 'thumbnail': video_data.get('thumbnail_src'),
} }

View File

@ -1,23 +1,26 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals from __future__ import unicode_literals
import random import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import (
ExtractorError,
xpath_text,
)
class Laola1TvIE(InfoExtractor): class Laola1TvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html', 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': { 'info_dict': {
'id': '250019', 'id': '227883',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Bitburger Open Grand Prix Gold - Court 1', 'title': 'Straubing Tigers - Kölner Haie',
'categories': ['Badminton'], 'categories': ['Eishockey'],
'uploader': 'BWF - Badminton World Federation', 'is_live': False,
'is_live': True,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -43,15 +46,26 @@ class Laola1TvIE(InfoExtractor):
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe) r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
flashvars = dict((m[0], m[1]) for m in flashvars_m) flashvars = dict((m[0], m[1]) for m in flashvars_m)
partner_id = self._search_regex(
r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
xml_url = ('http://www.laola1.tv/server/hd_video.php?' + xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % ( 'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
video_id, portal, lang)) video_id, partner_id, portal, lang))
hd_doc = self._download_xml(xml_url, video_id) hd_doc = self._download_xml(xml_url, video_id)
title = hd_doc.find('.//video/title').text title = xpath_text(hd_doc, './/video/title', fatal=True)
flash_url = hd_doc.find('.//video/url').text flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
categories = hd_doc.find('.//video/meta_sports').text.split(',') uploader = xpath_text(hd_doc, './/video/meta_organistation')
uploader = hd_doc.find('.//video/meta_organistation').text
is_live = xpath_text(hd_doc, './/video/islive') == 'true'
if is_live:
raise ExtractorError(
'Live streams are not supported by the f4m downloader.')
categories = xpath_text(hd_doc, './/video/meta_sports')
if categories:
categories = categories.split(',')
ident = random.randint(10000000, 99999999) ident = random.randint(10000000, 99999999)
token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % ( token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % (
@ -60,15 +74,16 @@ class Laola1TvIE(InfoExtractor):
token_doc = self._download_xml( token_doc = self._download_xml(
token_url, video_id, note='Downloading token') token_url, video_id, note='Downloading token')
token_attrib = token_doc.find('.//token').attrib token_attrib = token_doc.find('.//token').attrib
if token_attrib.get('auth') == 'blocked': if token_attrib.get('auth') in ('blocked', 'restricted'):
raise ExtractorError('Token error: ' % token_attrib.get('comment')) raise ExtractorError(
'Token error: %s' % token_attrib.get('comment'), expected=True)
video_url = '%s?hdnea=%s&hdcore=3.2.0' % ( video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
token_attrib['url'], token_attrib['auth']) token_attrib['url'], token_attrib['auth'])
return { return {
'id': video_id, 'id': video_id,
'is_live': True, 'is_live': is_live,
'title': title, 'title': title,
'url': video_url, 'url': video_url,
'uploader': uploader, 'uploader': uploader,

View File

@ -102,14 +102,26 @@ class RTVEALaCartaIE(InfoExtractor):
video_url = compat_urlparse.urljoin( video_url = compat_urlparse.urljoin(
'http://mvod1.akcdn.rtve.es/', video_path) 'http://mvod1.akcdn.rtve.es/', video_path)
subtitles = None
if info.get('sbtFile') is not None:
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
return { return {
'id': video_id, 'id': video_id,
'title': info['title'], 'title': info['title'],
'url': video_url, 'url': video_url,
'thumbnail': info.get('image'), 'thumbnail': info.get('image'),
'page_url': url, 'page_url': url,
'subtitles': subtitles,
} }
def _get_subtitles(self, video_id, sub_file):
subs = self._download_json(
sub_file + '.json', video_id,
'Downloading subtitles info')['page']['items']
return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}])
for s in subs)
class RTVELiveIE(InfoExtractor): class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live' IE_NAME = 'rtve.es:live'

View File

@ -1153,13 +1153,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
| p/ | p/
) )
( (
(?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots # Top tracks, they can also include dots
|(?:MC)[\w\.]* |(?:MC)[\w\.]*
) )
.* .*
| |
((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
)""" )"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
@ -1244,7 +1244,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
for vid_id in ids] for vid_id in ids]
def _extract_mix(self, playlist_id): def _extract_mix(self, playlist_id):
# The mixes are generated from a a single video # The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id # the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage( webpage = self._download_webpage(
@ -1280,7 +1280,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
else: else:
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
if playlist_id.startswith('RD'): if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
# Mixes require a custom extraction process # Mixes require a custom extraction process
return self._extract_mix(playlist_id) return self._extract_mix(playlist_id)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2015.02.23.1' __version__ = '2015.02.24'