Compare commits

...

17 Commits

Author SHA1 Message Date
34814eb66e release 2015.02.10 2015-02-10 01:19:52 +01:00
3a5bcd0326 [extractor/common] Wrap extractor errors (Fixes #1194)
For now, we just wrap some common errors. More may follow. We do not want to catch actual programming errors in the extractors, such as 1 // 0.
2015-02-10 01:17:23 +01:00
99c2398bc6 [bandcamp] Use our API to get more stable error messages (#1194) 2015-02-09 19:08:51 +01:00
28f1272870 [svtplay] Correct test case 2015-02-09 16:05:01 +01:00
f18e3a2fc0 release 2015.02.09.3 2015-02-09 15:59:19 +01:00
c4c5dc27cb Merge branch 'master' of github.com:rg3/youtube-dl 2015-02-09 15:59:14 +01:00
2caf182f37 [trilulilu] Add support for videos without category in the URL (Closes #4067)
Also, update the testcase, detect private/country restricted videos and modernize a bit.
2015-02-09 17:00:05 +02:00
43f244b6d5 [YoutubeDL] Do not show worst in --list-formats output
Nobody wants to know what the worst possible format is. And if they do, they can still provide -f worst.
2015-02-09 15:57:42 +01:00
1309b396d0 [svtplay] Add new extractor (Fixes #4914) 2015-02-09 15:56:59 +01:00
ba61796458 [youtube] Don't override format info from the dash manifest (fixes #4911) 2015-02-09 15:04:22 +01:00
3255fe7141 release 2015.02.09.2 2015-02-09 14:46:30 +01:00
e98b8e79ea [generic] Improve SBS detection (Fixes #4899) 2015-02-09 14:46:10 +01:00
196121c51b release 2015.02.09.1 2015-02-09 10:49:10 +01:00
5269028951 [rtlnow] Add test for @mmue's extension (#4908) 2015-02-09 10:47:19 +01:00
f7bc056b5a Merge remote-tracking branch 'mmue/fix-rtlnow' 2015-02-09 10:44:55 +01:00
a0f7198544 [generic] Add support for jwPlayer YouTube videos
This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)
2015-02-09 10:43:01 +01:00
bdb186f3b0 fix rtlnow for newer series like "Der Bachelor" season 5 2015-02-08 21:55:39 +01:00
11 changed files with 158 additions and 36 deletions

View File

@ -392,6 +392,7 @@
- **StreamCZ**
- **StreetVoice**
- **SunPorno**
- **SVTPlay**
- **SWRMediathek**
- **Syfy**
- **SztvHu**

View File

@ -1546,7 +1546,6 @@ class YoutubeDL(object):
line(f, idlen) for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
header_line = line({

View File

@ -428,6 +428,7 @@ from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE
from .sunporno import SunPornoIE
from .svtplay import SVTPlayIE
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE

View File

@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor):
download_link = m_download.group(1)
video_id = self._search_regex(
r'var TralbumData = {.*?id: (?P<id>\d+),?$',
webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
webpage, 'video id')
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
# We get the dictionary of the track from some javascript code
info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
info = json.loads(info)[0]
all_info = self._parse_json(self._search_regex(
r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
info = All_info[0]
# We pick mp3-320 for now, until format selection can be easily implemented.
mp3_info = info['downloads']['mp3-320']
# If we try to use this url it says the link has expired
initial_url = mp3_info['url']
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
m_url = re.match(re_url, initial_url)
m_url = re.match(
r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
initial_url)
# We build the url we will use to get the final track url
# This url is build in Bandcamp in the script download_bunde_*.js
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
# If we could correctly generate the .rand field the url would be
# in the "download_url" key
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
final_url = self._search_regex(
r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
return {
'id': video_id,

View File

@ -264,8 +264,15 @@ class InfoExtractor(object):
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
self.initialize()
return self._real_extract(url)
try:
self.initialize()
return self._real_extract(url)
except ExtractorError:
raise
except compat_http_client.IncompleteRead as e:
raise ExtractorError('A network error has occured.', cause=e, expected=True)
except (KeyError,) as e:
raise ExtractorError('An extractor error has occured.', cause=e)
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""

View File

@ -524,6 +524,19 @@ class GenericIE(InfoExtractor):
'upload_date': '20150126',
},
'add_ie': ['Viddler'],
},
# jwplayer YouTube
{
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
'info_dict': {
'id': 'Mrj4DVp2zeA',
'ext': 'mp4',
'upload_date': '20150204',
'uploader': 'The National Archives UK',
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
'uploader_id': 'NationalArchives08',
'title': 'Webinar: Using Discovery, The National Archives online catalogue',
},
}
]
@ -1034,7 +1047,12 @@ class GenericIE(InfoExtractor):
# Look for embedded sbs.com.au player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
r'''(?x)
(?:
<meta\s+property="og:video"\s+content=|
<iframe[^>]+?src=
)
(["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS')
@ -1065,6 +1083,8 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Livestream')
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
@ -1082,7 +1102,8 @@ class GenericIE(InfoExtractor):
JWPlayerOptions|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
)
.*?file\s*:\s*["\'](.*?)["\']''', webpage))
.*?
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
if not found:
# Broaden the search a little bit
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))

View File

@ -91,6 +91,15 @@ class RTLnowIE(InfoExtractor):
},
},
{
'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
'info_dict': {
'id': '188729',
'ext': 'flv',
'upload_date': '20150204',
'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
'title': 'Der Bachelor - Folge 4',
}
}, {
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
'only_matching': True,
},
@ -134,9 +143,18 @@ class RTLnowIE(InfoExtractor):
'player_url': video_page_url + 'includes/vodplayer.swf',
}
else:
fmt = {
'url': filename.text,
}
mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
if mobj:
fmt = {
'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
'play_path': 'mp4:' + mobj.group('play_path'),
'page_url': url,
'player_url': video_page_url + 'includes/vodplayer.swf',
}
else:
fmt = {
'url': filename.text,
}
fmt.update({
'width': int_or_none(filename.get('width')),
'height': int_or_none(filename.get('height')),

View File

@ -0,0 +1,56 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class SVTPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
'md5': 'f4a184968bc9c802a9b41316657aaa80',
'info_dict': {
'id': '2609989',
'ext': 'mp4',
'title': 'SM veckan vinter, Örebro - Rally, final',
'duration': 4500,
'thumbnail': 're:^https?://.*[\.-]jpg$',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
'http://www.svtplay.se/video/%s?output=json' % video_id, video_id)
title = info['context']['title']
thumbnail = info['context'].get('thumbnailImage')
video_info = info['video']
formats = []
for vr in video_info['videoReferences']:
vurl = vr['url']
if determine_ext(vurl) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
vurl, video_id,
ext='mp4', entry_protocol='m3u8_native',
m3u8_id=vr.get('playerType')))
else:
formats.append({
'format_id': vr.get('playerType'),
'url': vurl,
})
self._sort_formats(formats)
duration = video_info.get('materialLength')
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@ -1,40 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class TriluliluIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)'
_TEST = {
'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
'md5': 'c1450a00da251e2769b74b9005601cac',
'info_dict': {
'id': 'big-buck-bunny-1',
'id': 'ae2899e124140b',
'ext': 'mp4',
'title': 'Big Buck Bunny',
'description': ':) pentru copilul din noi',
},
# Server ignores Range headers (--test)
'params': {
'skip_download': True
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage):
raise ExtractorError(
'This video is not available in your country.', expected=True)
elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage):
raise ExtractorError('This video is private.', expected=True)
flashvars_str = self._search_regex(
r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None)
if flashvars_str:
flashvars = self._parse_json(flashvars_str, display_id)
else:
raise ExtractorError(
'This page does not contain videos', expected=True)
if flashvars['isMP3'] == 'true':
raise ExtractorError(
'Audio downloads are currently not supported', expected=True)
video_id = flashvars['hash']
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage)
log_str = self._search_regex(
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
log = json.loads(log_str)
description = self._og_search_description(webpage, default=None)
format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
'video-formats2' % log)
'video-formats2' % flashvars)
format_doc = self._download_xml(
format_url, video_id,
note='Downloading formats',
@ -44,10 +59,10 @@ class TriluliluIE(InfoExtractor):
'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
'&source=site&hash=%(hash)s&username=%(userid)s&'
'key=ministhebest&format=%%s&sig=&exp=' %
log)
flashvars)
formats = [
{
'format': fnode.text,
'format_id': fnode.text.partition('-')[2],
'url': video_url_template % fnode.text,
'ext': fnode.text.partition('-')[0]
}
@ -56,8 +71,8 @@ class TriluliluIE(InfoExtractor):
]
return {
'_type': 'video',
'id': video_id,
'display_id': display_id,
'formats': formats,
'title': title,
'description': description,

View File

@ -780,8 +780,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
f.update(self._formats.get(format_id, {}).items())
formats.append(f)
full_info = self._formats.get(format_id, {}).copy()
full_info.update(f)
formats.append(full_info)
else:
existing_format.update(f)
return formats

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2015.02.09'
__version__ = '2015.02.10'