Compare commits
24 Commits
2014.11.20
...
2014.11.21
Author | SHA1 | Date | |
---|---|---|---|
|
2c64b8ba63 | ||
|
42e12102a9 | ||
|
6127693ed9 | ||
|
71069d2157 | ||
|
f3391db889 | ||
|
9b32eca3ce | ||
|
ec06f0f610 | ||
|
e6c9c8f6ee | ||
|
85b9275517 | ||
|
dfd5313afd | ||
|
be53e2a737 | ||
|
a1c68b9ef2 | ||
|
4d46c1c68c | ||
|
d6f714f321 | ||
|
8569f3d629 | ||
|
fed5d03260 | ||
|
6adeffa7c6 | ||
|
b244b5c3f9 | ||
|
f42c190769 | ||
|
c9bf41145f | ||
|
5239075bb6 | ||
|
84437adfa3 | ||
|
732ea2f09b | ||
|
aff2f4f4f5 |
@@ -624,7 +624,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
return self.process_ie_result(
|
return self.process_ie_result(
|
||||||
new_result, download=download, extra_info=extra_info)
|
new_result, download=download, extra_info=extra_info)
|
||||||
elif result_type == 'playlist':
|
elif result_type == 'playlist' or result_type == 'multi_video':
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||||
@@ -679,6 +679,9 @@ class YoutubeDL(object):
|
|||||||
ie_result['entries'] = playlist_results
|
ie_result['entries'] = playlist_results
|
||||||
return ie_result
|
return ie_result
|
||||||
elif result_type == 'compat_list':
|
elif result_type == 'compat_list':
|
||||||
|
self.report_warning(
|
||||||
|
'Extractor %s returned a compat_list result. '
|
||||||
|
'It needs to be updated.' % ie_result.get('extractor'))
|
||||||
def _fixup(r):
|
def _fixup(r):
|
||||||
self.add_extra_info(r,
|
self.add_extra_info(r,
|
||||||
{
|
{
|
||||||
|
@@ -306,7 +306,7 @@ def workaround_optparse_bug9161():
|
|||||||
og = optparse.OptionGroup(op, 'foo')
|
og = optparse.OptionGroup(op, 'foo')
|
||||||
try:
|
try:
|
||||||
og.add_option('-t')
|
og.add_option('-t')
|
||||||
except TypeError as te:
|
except TypeError:
|
||||||
real_add_option = optparse.OptionGroup.add_option
|
real_add_option = optparse.OptionGroup.add_option
|
||||||
|
|
||||||
def _compat_add_option(self, *args, **kwargs):
|
def _compat_add_option(self, *args, **kwargs):
|
||||||
|
@@ -115,6 +115,7 @@ from .fktv import (
|
|||||||
FKTVPosteckeIE,
|
FKTVPosteckeIE,
|
||||||
)
|
)
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
|
from .folketinget import FolketingetIE
|
||||||
from .fourtube import FourTubeIE
|
from .fourtube import FourTubeIE
|
||||||
from .franceculture import FranceCultureIE
|
from .franceculture import FranceCultureIE
|
||||||
from .franceinter import FranceInterIE
|
from .franceinter import FranceInterIE
|
||||||
|
@@ -5,13 +5,12 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
determine_ext,
|
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
qualities,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
# There are different sources of video in arte.tv, the extraction process
|
||||||
@@ -102,79 +101,54 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'upload_date': unified_strdate(upload_date_str),
|
'upload_date': unified_strdate(upload_date_str),
|
||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
}
|
}
|
||||||
|
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
|
||||||
|
|
||||||
all_formats = []
|
formats = []
|
||||||
for format_id, format_dict in player_info['VSR'].items():
|
for format_id, format_dict in player_info['VSR'].items():
|
||||||
fmt = dict(format_dict)
|
f = dict(format_dict)
|
||||||
fmt['format_id'] = format_id
|
versionCode = f.get('versionCode')
|
||||||
all_formats.append(fmt)
|
|
||||||
# Some formats use the m3u8 protocol
|
|
||||||
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
|
|
||||||
def _match_lang(f):
|
|
||||||
if f.get('versionCode') is None:
|
|
||||||
return True
|
|
||||||
# Return true if that format is in the language of the url
|
|
||||||
if lang == 'fr':
|
|
||||||
l = 'F'
|
|
||||||
elif lang == 'de':
|
|
||||||
l = 'A'
|
|
||||||
else:
|
|
||||||
l = lang
|
|
||||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
|
||||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
|
||||||
# Some formats may not be in the same language as the url
|
|
||||||
# TODO: Might want not to drop videos that does not match requested language
|
|
||||||
# but to process those formats with lower precedence
|
|
||||||
formats = filter(_match_lang, all_formats)
|
|
||||||
formats = list(formats) # in python3 filter returns an iterator
|
|
||||||
if not formats:
|
|
||||||
# Some videos are only available in the 'Originalversion'
|
|
||||||
# they aren't tagged as being in French or German
|
|
||||||
# Sometimes there are neither videos of requested lang code
|
|
||||||
# nor original version videos available
|
|
||||||
# For such cases we just take all_formats as is
|
|
||||||
formats = all_formats
|
|
||||||
if not formats:
|
|
||||||
raise ExtractorError('The formats list is empty')
|
|
||||||
|
|
||||||
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
langcode = {
|
||||||
def sort_key(f):
|
'fr': 'F',
|
||||||
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
'de': 'A',
|
||||||
else:
|
}.get(lang, lang)
|
||||||
def sort_key(f):
|
lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode]
|
||||||
versionCode = f.get('versionCode')
|
lang_pref = (
|
||||||
if versionCode is None:
|
None if versionCode is None else (
|
||||||
versionCode = ''
|
10 if any(re.match(r, versionCode) for r in lang_rexs)
|
||||||
return (
|
else -10))
|
||||||
# Sort first by quality
|
source_pref = 0
|
||||||
int(f.get('height', -1)),
|
if versionCode is not None:
|
||||||
int(f.get('bitrate', -1)),
|
# The original version with subtitles has lower relevance
|
||||||
# The original version with subtitles has lower relevance
|
if re.match(r'VO-ST(F|A)', versionCode):
|
||||||
re.match(r'VO-ST(F|A)', versionCode) is None,
|
source_pref -= 10
|
||||||
# The version with sourds/mal subtitles has also lower relevance
|
# The version with sourds/mal subtitles has also lower relevance
|
||||||
re.match(r'VO?(F|A)-STM\1', versionCode) is None,
|
elif re.match(r'VO?(F|A)-STM\1', versionCode):
|
||||||
# Prefer http downloads over m3u8
|
source_pref -= 9
|
||||||
0 if f['url'].endswith('m3u8') else 1,
|
format = {
|
||||||
)
|
'format_id': format_id,
|
||||||
formats = sorted(formats, key=sort_key)
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||||
def _format(format_info):
|
'language_preference': lang_pref,
|
||||||
info = {
|
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
|
||||||
'format_id': format_info['format_id'],
|
'width': int_or_none(f.get('width')),
|
||||||
'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
|
'height': int_or_none(f.get('height')),
|
||||||
'width': int_or_none(format_info.get('width')),
|
'tbr': int_or_none(f.get('bitrate')),
|
||||||
'height': int_or_none(format_info.get('height')),
|
'quality': qfunc(f['quality']),
|
||||||
'tbr': int_or_none(format_info.get('bitrate')),
|
'source_preference': source_pref,
|
||||||
}
|
}
|
||||||
if format_info['mediaType'] == 'rtmp':
|
|
||||||
info['url'] = format_info['streamer']
|
|
||||||
info['play_path'] = 'mp4:' + format_info['url']
|
|
||||||
info['ext'] = 'flv'
|
|
||||||
else:
|
|
||||||
info['url'] = format_info['url']
|
|
||||||
info['ext'] = determine_ext(info['url'])
|
|
||||||
return info
|
|
||||||
info_dict['formats'] = [_format(f) for f in formats]
|
|
||||||
|
|
||||||
|
if f.get('mediaType') == 'rtmp':
|
||||||
|
format['url'] = f['streamer']
|
||||||
|
format['play_path'] = 'mp4:' + f['url']
|
||||||
|
format['ext'] = 'flv'
|
||||||
|
else:
|
||||||
|
format['url'] = f['url']
|
||||||
|
|
||||||
|
formats.append(format)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info_dict['formats'] = formats
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
|
@@ -166,9 +166,17 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class BlipTVUserIE(InfoExtractor):
|
class BlipTVUserIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
|
_VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
|
||||||
_PAGE_SIZE = 12
|
_PAGE_SIZE = 12
|
||||||
IE_NAME = 'blip.tv:user'
|
IE_NAME = 'blip.tv:user'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://blip.tv/actone',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'actone',
|
||||||
|
'title': 'Act One: The Series',
|
||||||
|
},
|
||||||
|
'playlist_count': 5,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -179,6 +187,7 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
page = self._download_webpage(url, username, 'Downloading user page')
|
page = self._download_webpage(url, username, 'Downloading user page')
|
||||||
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
||||||
page_base = page_base % mobj.group(1)
|
page_base = page_base % mobj.group(1)
|
||||||
|
title = self._og_search_title(page)
|
||||||
|
|
||||||
# Download video ids using BlipTV Ajax calls. Result size per
|
# Download video ids using BlipTV Ajax calls. Result size per
|
||||||
# query is limited (currently to 12 videos) so we need to query
|
# query is limited (currently to 12 videos) so we need to query
|
||||||
@@ -215,4 +224,5 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||||
return [self.playlist_result(url_entries, playlist_title=username)]
|
return self.playlist_result(
|
||||||
|
url_entries, playlist_title=title, playlist_id=username)
|
||||||
|
@@ -111,6 +111,8 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
lambda m: m.group(1) + '/>', object_str)
|
lambda m: m.group(1) + '/>', object_str)
|
||||||
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
||||||
object_str = object_str.replace('<--', '<!--')
|
object_str = object_str.replace('<--', '<!--')
|
||||||
|
# remove namespace to simplify extraction
|
||||||
|
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
|
||||||
object_str = fix_xml_ampersands(object_str)
|
object_str = fix_xml_ampersands(object_str)
|
||||||
|
|
||||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||||
@@ -219,7 +221,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
error_msg = self._html_search_regex(
|
||||||
r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
|
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||||
'error message', default=None)
|
'error message', default=None)
|
||||||
if error_msg is not None:
|
if error_msg is not None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
|
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
@@ -110,9 +109,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
if mobj.group('shortname'):
|
if mobj.group('shortname'):
|
||||||
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
||||||
|
@@ -43,7 +43,11 @@ class InfoExtractor(object):
|
|||||||
information possibly downloading the video to the file system, among
|
information possibly downloading the video to the file system, among
|
||||||
other possible outcomes.
|
other possible outcomes.
|
||||||
|
|
||||||
The dictionaries must include the following fields:
|
The type field determines the the type of the result.
|
||||||
|
By far the most common value (and the default if _type is missing) is
|
||||||
|
"video", which indicates a single video.
|
||||||
|
|
||||||
|
For a video, the dictionaries must include the following fields:
|
||||||
|
|
||||||
id: Video identifier.
|
id: Video identifier.
|
||||||
title: Video title, unescaped.
|
title: Video title, unescaped.
|
||||||
@@ -87,6 +91,11 @@ class InfoExtractor(object):
|
|||||||
by this field, regardless of all other values.
|
by this field, regardless of all other values.
|
||||||
-1 for default (order by other properties),
|
-1 for default (order by other properties),
|
||||||
-2 or smaller for less than default.
|
-2 or smaller for less than default.
|
||||||
|
* language_preference Is this in the correct requested
|
||||||
|
language?
|
||||||
|
10 if it's what the URL is about,
|
||||||
|
-1 for default (don't know),
|
||||||
|
-10 otherwise, other values reserved for now.
|
||||||
* quality Order number of the video quality of this
|
* quality Order number of the video quality of this
|
||||||
format, irrespective of the file format.
|
format, irrespective of the file format.
|
||||||
-1 for default (order by other properties),
|
-1 for default (order by other properties),
|
||||||
@@ -146,6 +155,38 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
Unless mentioned otherwise, None is equivalent to absence of information.
|
Unless mentioned otherwise, None is equivalent to absence of information.
|
||||||
|
|
||||||
|
|
||||||
|
_type "playlist" indicates multiple videos.
|
||||||
|
There must be a key "entries", which is a list or a PagedList object, each
|
||||||
|
element of which is a valid dictionary under this specfication.
|
||||||
|
|
||||||
|
Additionally, playlists can have "title" and "id" attributes with the same
|
||||||
|
semantics as videos (see above).
|
||||||
|
|
||||||
|
|
||||||
|
_type "multi_video" indicates that there are multiple videos that
|
||||||
|
form a single show, for examples multiple acts of an opera or TV episode.
|
||||||
|
It must have an entries key like a playlist and contain all the keys
|
||||||
|
required for a video at the same time.
|
||||||
|
|
||||||
|
|
||||||
|
_type "url" indicates that the video must be extracted from another
|
||||||
|
location, possibly by a different extractor. Its only required key is:
|
||||||
|
"url" - the next URL to extract.
|
||||||
|
|
||||||
|
Additionally, it may have properties believed to be identical to the
|
||||||
|
resolved entity, for example "title" if the title of the referred video is
|
||||||
|
known ahead of time.
|
||||||
|
|
||||||
|
|
||||||
|
_type "url_transparent" entities have the same specification as "url", but
|
||||||
|
indicate that the given additional information is more precise than the one
|
||||||
|
associated with the resolved URL.
|
||||||
|
This is useful when a site employs a video service that hosts the video and
|
||||||
|
its technical metadata, but that video service does not embed a useful
|
||||||
|
title, description etc.
|
||||||
|
|
||||||
|
|
||||||
Subclasses of this one should re-define the _real_initialize() and
|
Subclasses of this one should re-define the _real_initialize() and
|
||||||
_real_extract() methods and define a _VALID_URL regexp.
|
_real_extract() methods and define a _VALID_URL regexp.
|
||||||
Probably, they should also be added to the list of extractors.
|
Probably, they should also be added to the list of extractors.
|
||||||
@@ -615,6 +656,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
preference,
|
preference,
|
||||||
|
f.get('language_preference') if f.get('language_preference') is not None else -1,
|
||||||
f.get('quality') if f.get('quality') is not None else -1,
|
f.get('quality') if f.get('quality') is not None else -1,
|
||||||
f.get('height') if f.get('height') is not None else -1,
|
f.get('height') if f.get('height') is not None else -1,
|
||||||
f.get('width') if f.get('width') is not None else -1,
|
f.get('width') if f.get('width') is not None else -1,
|
||||||
|
75
youtube_dl/extractor/folketinget.py
Normal file
75
youtube_dl/extractor/folketinget.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_parse_qs
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FolketingetIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1165642',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Åbent samråd i Erhvervsudvalget',
|
||||||
|
'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
|
||||||
|
'view_count': int,
|
||||||
|
'width': 768,
|
||||||
|
'height': 432,
|
||||||
|
'tbr': 928000,
|
||||||
|
'timestamp': 1416493800,
|
||||||
|
'upload_date': '20141120',
|
||||||
|
'duration': 3960,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'rtmpdump required',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
|
player_params = compat_parse_qs(self._search_regex(
|
||||||
|
r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
|
||||||
|
webpage, 'player params'))
|
||||||
|
xml_url = player_params['xml'][0]
|
||||||
|
doc = self._download_xml(xml_url, video_id)
|
||||||
|
|
||||||
|
timestamp = parse_iso8601(xpath_text(doc, './/date'))
|
||||||
|
duration = parse_duration(xpath_text(doc, './/duration'))
|
||||||
|
width = int_or_none(xpath_text(doc, './/width'))
|
||||||
|
height = int_or_none(xpath_text(doc, './/height'))
|
||||||
|
view_count = int_or_none(xpath_text(doc, './/views'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': n.attrib['bitrate'],
|
||||||
|
'url': xpath_text(n, './url', fatal=True),
|
||||||
|
'tbr': int_or_none(n.attrib['bitrate']),
|
||||||
|
} for n in doc.findall('.//streams/stream')]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
@@ -979,7 +979,7 @@ class GenericIE(InfoExtractor):
|
|||||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# HTML5 video
|
# HTML5 video
|
||||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src="([^"]+)"', webpage)
|
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
found = re.search(
|
found = re.search(
|
||||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
|
@@ -1,9 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -145,7 +145,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
feed_url + '?' + data, video_id,
|
feed_url + '?' + data, video_id,
|
||||||
'Downloading info', transform_source=fix_xml_ampersands)
|
'Downloading info', transform_source=fix_xml_ampersands)
|
||||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
return self.playlist_result(
|
||||||
|
[self._get_video_info(item) for item in idoc.findall('.//item')])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
title = url_basename(url)
|
title = url_basename(url)
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -9,24 +11,23 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class StanfordOpenClassroomIE(InfoExtractor):
|
class StanfordOpenClassroomIE(InfoExtractor):
|
||||||
IE_NAME = u'stanfordoc'
|
IE_NAME = 'stanfordoc'
|
||||||
IE_DESC = u'Stanford Open ClassRoom'
|
IE_DESC = 'Stanford Open ClassRoom'
|
||||||
_VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
|
_VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
|
'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
|
||||||
u'file': u'PracticalUnix_intro-environment.mp4',
|
'md5': '544a9468546059d4e80d76265b0443b8',
|
||||||
u'md5': u'544a9468546059d4e80d76265b0443b8',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'PracticalUnix_intro-environment',
|
||||||
u"title": u"Intro Environment"
|
'ext': 'mp4',
|
||||||
|
'title': 'Intro Environment',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
if mobj.group('course') and mobj.group('video'): # A specific video
|
if mobj.group('course') and mobj.group('video'): # A specific video
|
||||||
course = mobj.group('course')
|
course = mobj.group('course')
|
||||||
video = mobj.group('video')
|
video = mobj.group('video')
|
||||||
info = {
|
info = {
|
||||||
@@ -35,7 +36,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
|||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.report_extraction(info['id'])
|
|
||||||
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
|
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
|
||||||
xmlUrl = baseUrl + video + '.xml'
|
xmlUrl = baseUrl + video + '.xml'
|
||||||
mdoc = self._download_xml(xmlUrl, info['id'])
|
mdoc = self._download_xml(xmlUrl, info['id'])
|
||||||
@@ -43,63 +43,49 @@ class StanfordOpenClassroomIE(InfoExtractor):
|
|||||||
info['title'] = mdoc.findall('./title')[0].text
|
info['title'] = mdoc.findall('./title')[0].text
|
||||||
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
|
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise ExtractorError(u'Invalid metadata XML file')
|
raise ExtractorError('Invalid metadata XML file')
|
||||||
info['ext'] = info['url'].rpartition('.')[2]
|
return info
|
||||||
return [info]
|
elif mobj.group('course'): # A course page
|
||||||
elif mobj.group('course'): # A course page
|
|
||||||
course = mobj.group('course')
|
course = mobj.group('course')
|
||||||
info = {
|
info = {
|
||||||
'id': course,
|
'id': course,
|
||||||
'type': 'playlist',
|
'_type': 'playlist',
|
||||||
'uploader': None,
|
'uploader': None,
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
}
|
}
|
||||||
|
|
||||||
coursepage = self._download_webpage(url, info['id'],
|
coursepage = self._download_webpage(
|
||||||
note='Downloading course info page',
|
url, info['id'],
|
||||||
errnote='Unable to download course info page')
|
note='Downloading course info page',
|
||||||
|
errnote='Unable to download course info page')
|
||||||
|
|
||||||
info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
|
info['title'] = self._html_search_regex(
|
||||||
|
r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
|
||||||
|
|
||||||
info['description'] = self._html_search_regex('<description>([^<]+)</description>',
|
info['description'] = self._html_search_regex(
|
||||||
coursepage, u'description', fatal=False)
|
r'(?s)<description>([^<]+)</description>',
|
||||||
|
coursepage, 'description', fatal=False)
|
||||||
|
|
||||||
links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
|
links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
|
||||||
info['list'] = [
|
info['entries'] = [self.url_result(
|
||||||
{
|
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
|
||||||
'type': 'reference',
|
) for l in links]
|
||||||
'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
|
return info
|
||||||
}
|
else: # Root page
|
||||||
for vpage in links]
|
|
||||||
results = []
|
|
||||||
for entry in info['list']:
|
|
||||||
assert entry['type'] == 'reference'
|
|
||||||
results += self.extract(entry['url'])
|
|
||||||
return results
|
|
||||||
else: # Root page
|
|
||||||
info = {
|
info = {
|
||||||
'id': 'Stanford OpenClassroom',
|
'id': 'Stanford OpenClassroom',
|
||||||
'type': 'playlist',
|
'_type': 'playlist',
|
||||||
'uploader': None,
|
'uploader': None,
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
}
|
}
|
||||||
|
info['title'] = info['id']
|
||||||
|
|
||||||
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
||||||
rootpage = self._download_webpage(rootURL, info['id'],
|
rootpage = self._download_webpage(rootURL, info['id'],
|
||||||
errnote=u'Unable to download course info page')
|
errnote='Unable to download course info page')
|
||||||
|
|
||||||
info['title'] = info['id']
|
|
||||||
|
|
||||||
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
|
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
|
||||||
info['list'] = [
|
info['entries'] = [self.url_result(
|
||||||
{
|
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
|
||||||
'type': 'reference',
|
) for l in links]
|
||||||
'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
|
return info
|
||||||
}
|
|
||||||
for cpage in links]
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for entry in info['list']:
|
|
||||||
assert entry['type'] == 'reference'
|
|
||||||
results += self.extract(entry['url'])
|
|
||||||
return results
|
|
||||||
|
@@ -1,27 +1,24 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
|
||||||
|
|
||||||
|
|
||||||
class SztvHuIE(InfoExtractor):
|
class SztvHuIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
|
_VALID_URL = r'http://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
|
'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
|
||||||
u'file': u'20130909.mp4',
|
'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
|
||||||
u'md5': u'a6df607b11fb07d0e9f2ad94613375cb',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '20130909',
|
||||||
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
|
'ext': 'mp4',
|
||||||
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
|
'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
|
||||||
|
'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
|
||||||
},
|
},
|
||||||
u'skip': u'Service temporarily disabled as of 2013-11-20'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_file = self._search_regex(
|
video_file = self._search_regex(
|
||||||
r'file: "...:(.*?)",', webpage, 'video file')
|
r'file: "...:(.*?)",', webpage, 'video file')
|
||||||
@@ -39,7 +36,6 @@ class SztvHuIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': determine_ext(video_url),
|
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
@@ -121,4 +121,7 @@ class VH1IE(MTVIE):
|
|||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
doc_url, video_id,
|
doc_url, video_id,
|
||||||
'Downloading info', transform_source=fix_xml_ampersands)
|
'Downloading info', transform_source=fix_xml_ampersands)
|
||||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
return self.playlist_result(
|
||||||
|
[self._get_video_info(item) for item in idoc.findall('.//item')],
|
||||||
|
playlist_id=video_id,
|
||||||
|
)
|
||||||
|
@@ -61,7 +61,7 @@ class JSInterpreter(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
|
r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
|
||||||
expr)
|
expr)
|
||||||
if m:
|
if m:
|
||||||
variable = m.group('var')
|
variable = m.group('var')
|
||||||
|
@@ -420,6 +420,7 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
|
|||||||
pass # Python < 3.4
|
pass # Python < 3.4
|
||||||
return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
|
return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class ExtractorError(Exception):
|
class ExtractorError(Exception):
|
||||||
"""Error during info extraction."""
|
"""Error during info extraction."""
|
||||||
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
|
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
|
||||||
@@ -434,7 +435,13 @@ class ExtractorError(Exception):
|
|||||||
if cause:
|
if cause:
|
||||||
msg += ' (caused by %r)' % cause
|
msg += ' (caused by %r)' % cause
|
||||||
if not expected:
|
if not expected:
|
||||||
msg = msg + '; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
|
if ytdl_is_updateable():
|
||||||
|
update_cmd = 'type youtube-dl -U to update'
|
||||||
|
else:
|
||||||
|
update_cmd = 'see https://yt-dl.org/update on how to update'
|
||||||
|
msg += '; please report this issue on https://yt-dl.org/bug .'
|
||||||
|
msg += ' Make sure you are using the latest version; %s.' % update_cmd
|
||||||
|
msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
|
||||||
super(ExtractorError, self).__init__(msg)
|
super(ExtractorError, self).__init__(msg)
|
||||||
|
|
||||||
self.traceback = tb
|
self.traceback = tb
|
||||||
@@ -1419,3 +1426,10 @@ def is_outdated_version(version, limit, assume_new=True):
|
|||||||
return version_tuple(version) < version_tuple(limit)
|
return version_tuple(version) < version_tuple(limit)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return not assume_new
|
return not assume_new
|
||||||
|
|
||||||
|
|
||||||
|
def ytdl_is_updateable():
|
||||||
|
""" Returns if youtube-dl can be updated with -U """
|
||||||
|
from zipimport import zipimporter
|
||||||
|
|
||||||
|
return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.11.20'
|
__version__ = '2014.11.21.1'
|
||||||
|
Reference in New Issue
Block a user