Compare commits
23 Commits
2013.07.02
...
2013.07.08
Author | SHA1 | Date | |
---|---|---|---|
|
b04621d155 | ||
|
b227060388 | ||
|
d93e4dcbb7 | ||
|
73e79f2a1b | ||
|
fc79158de2 | ||
|
7763b04e5f | ||
|
9d7b44b4cc | ||
|
897f36d179 | ||
|
94c3637f6d | ||
|
04cc96173c | ||
|
fbaaad49d7 | ||
|
b29f3b250d | ||
|
fa343954d4 | ||
|
2491f5898e | ||
|
b27c856fbc | ||
|
9941ceb331 | ||
|
c536d38059 | ||
|
8de64cac98 | ||
|
6d6d286539 | ||
|
5d2eac9eba | ||
|
9826925a20 | ||
|
24a267b562 | ||
|
d4da3d6116 |
@@ -61,6 +61,17 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
else:
|
||||
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
||||
|
||||
def test_keywords(self):
|
||||
ies = gen_extractors()
|
||||
matching_ies = lambda url: [ie.IE_NAME for ie in ies
|
||||
if ie.suitable(url) and ie.IE_NAME != 'generic']
|
||||
self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
|
||||
self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
|
||||
self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -5,11 +5,13 @@ from .auengine import AUEngineIE
|
||||
from .bandcamp import BandcampIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE
|
||||
from .cspan import CSpanIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .escapist import EscapistIE
|
||||
from .facebook import FacebookIE
|
||||
@@ -68,7 +70,15 @@ from .yahoo import YahooIE, YahooSearchIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
from .youporn import YouPornIE
|
||||
from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -16,8 +17,8 @@ class ArteTvIE(InfoExtractor):
|
||||
www.arte.tv/guide, the extraction process is different for each one.
|
||||
The videos expire in 7 days, so we can't add tests.
|
||||
"""
|
||||
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html'
|
||||
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
IE_NAME = u'arte.tv'
|
||||
@@ -57,22 +58,24 @@ class ArteTvIE(InfoExtractor):
|
||||
mobj = re.match(self._EMISSION_URL, url)
|
||||
if mobj is not None:
|
||||
name = mobj.group('name')
|
||||
lang = mobj.group('lang')
|
||||
# This is not a real id, it can be for example AJT for the news
|
||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||
video_id = mobj.group('id')
|
||||
return self._extract_emission(url, video_id)
|
||||
return self._extract_emission(url, video_id, lang)
|
||||
|
||||
mobj = re.match(self._VIDEOS_URL, url)
|
||||
if mobj is not None:
|
||||
id = mobj.group('id')
|
||||
return self._extract_video(url, id)
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_video(url, id, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, video_id) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
# return
|
||||
|
||||
def _extract_emission(self, url, video_id):
|
||||
def _extract_emission(self, url, video_id, lang):
|
||||
"""Extract from www.arte.tv/guide"""
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
@@ -91,6 +94,16 @@ class ArteTvIE(InfoExtractor):
|
||||
}
|
||||
|
||||
formats = player_info['VSR'].values()
|
||||
def _match_lang(f):
|
||||
# Return true if that format is in the language of the url
|
||||
if lang == 'fr':
|
||||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
formats = filter(_match_lang, formats)
|
||||
# We order the formats by quality
|
||||
formats = sorted(formats, key=lambda f: int(f['height']))
|
||||
# Pick the best quality
|
||||
@@ -103,13 +116,15 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
return info_dict
|
||||
|
||||
def _extract_video(self, url, video_id):
|
||||
def _extract_video(self, url, video_id, lang):
|
||||
"""Extract from videos.arte.tv"""
|
||||
config_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
config_xml_url = config_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
config_xml = self._download_webpage(config_xml_url, video_id)
|
||||
config_xml_url = self._html_search_regex(r'<video lang=".*?" ref="(.*?)"', config_xml, 'config xml url')
|
||||
config_xml = self._download_webpage(config_xml_url, video_id)
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
||||
config_node = ref_xml_doc.find('.//video[@lang="%s"]' % lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||
|
||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||
def _key(m):
|
||||
|
@@ -8,6 +8,14 @@ from ..utils import (
|
||||
)
|
||||
|
||||
class AUEngineIE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
||||
u'file': u'lfvlytY6.mp4',
|
||||
u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f',
|
||||
u'info_dict': {
|
||||
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -27,7 +27,7 @@ class BlipTVIE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
u'file': u'5779306.m4v',
|
||||
u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe',
|
||||
u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20111205",
|
||||
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
|
||||
@@ -103,7 +103,12 @@ class BlipTVIE(InfoExtractor):
|
||||
data = json_data
|
||||
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
video_url = data['media']['url']
|
||||
if 'additionalMedia' in data:
|
||||
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
|
||||
best_format = formats[-1]
|
||||
video_url = best_format['url']
|
||||
else:
|
||||
video_url = data['media']['url']
|
||||
umobj = re.match(self._URL_EXT, video_url)
|
||||
if umobj is None:
|
||||
raise ValueError('Can not determine filename extension')
|
||||
|
32
youtube_dl/extractor/brightcove.py
Normal file
32
youtube_dl/extractor/brightcove.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class BrightcoveIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*brightcove\.com/.*\?(?P<query>.*videoPlayer=(?P<id>\d*).*)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query = mobj.group('query')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
request_url = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' % query
|
||||
webpage = self._download_webpage(request_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
||||
info = json.loads(info)['data']
|
||||
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
||||
renditions = video_info['renditions']
|
||||
renditions = sorted(renditions, key=lambda r: r['size'])
|
||||
best_format = renditions[-1]
|
||||
|
||||
return {'id': video_id,
|
||||
'title': video_info['displayName'],
|
||||
'url': best_format['defaultURL'],
|
||||
'ext': 'mp4',
|
||||
'description': video_info.get('shortDescription'),
|
||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||
'uploader': video_info.get('publisherName'),
|
||||
}
|
@@ -3,6 +3,7 @@ import os
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
import netrc
|
||||
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
@@ -36,6 +37,8 @@ class InfoExtractor(object):
|
||||
The following fields are optional:
|
||||
|
||||
format: The video format, defaults to ext (used for --get-format)
|
||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||
"url") for the varying thumbnails
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: One-line video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -161,6 +164,10 @@ class InfoExtractor(object):
|
||||
"""Report attempt to confirm age."""
|
||||
self.to_screen(u'Confirming age')
|
||||
|
||||
def report_login(self):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
#Methods for following #608
|
||||
#They set the correct value of the '_type' key
|
||||
def video_result(self, video_info):
|
||||
@@ -225,6 +232,36 @@ class InfoExtractor(object):
|
||||
else:
|
||||
return res
|
||||
|
||||
def _get_login_info(self):
|
||||
"""
|
||||
Get the the login info as (username, password)
|
||||
It will look in the netrc file using the _NETRC_MACHINE value
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
if self._downloader is None:
|
||||
return (None, None)
|
||||
|
||||
username = None
|
||||
password = None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username', None) is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
||||
|
||||
return (username, password)
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
Base class for paged search queries extractors.
|
||||
|
@@ -1,12 +1,11 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
class DailymotionIE(InfoExtractor):
|
||||
@@ -39,33 +38,10 @@ class DailymotionIE(InfoExtractor):
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
mobj = re.search(r'\s*var flashvars = (.*)', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
flashvars = compat_urllib_parse.unquote(mobj.group(1))
|
||||
|
||||
for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
|
||||
if key in flashvars:
|
||||
max_quality = key
|
||||
self.to_screen(u'Using %s' % key)
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
video_title = self._html_search_regex(r'<meta property="og:title" content="(.*?)" />',
|
||||
webpage, 'title')
|
||||
|
||||
mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/')
|
||||
|
||||
# TODO: support choosing qualities
|
||||
|
||||
mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = unescapeHTML(mobj.group('title'))
|
||||
|
||||
video_uploader = None
|
||||
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
||||
# Looking for official user
|
||||
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
||||
@@ -76,6 +52,25 @@ class DailymotionIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
||||
|
||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
u'Downloading embed page')
|
||||
info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
|
||||
info = json.loads(info)
|
||||
|
||||
# TODO: support choosing qualities
|
||||
|
||||
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
|
||||
'stream_h264_hq_url','stream_h264_url',
|
||||
'stream_h264_ld_url']:
|
||||
if info.get(key):#key in info and info[key]:
|
||||
max_quality = key
|
||||
self.to_screen(u'Using %s' % key)
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
video_url = info[max_quality]
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
@@ -83,4 +78,5 @@ class DailymotionIE(InfoExtractor):
|
||||
'upload_date': video_upload_date,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbnail_url']
|
||||
}]
|
||||
|
85
youtube_dl/extractor/dreisat.py
Normal file
85
youtube_dl/extractor/dreisat.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.webm',
|
||||
u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
|
||||
u'info_dict': {
|
||||
u"title": u"Kaffeeland Schweiz",
|
||||
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
|
||||
u"uploader": u"3sat",
|
||||
u"upload_date": u"20130622"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
||||
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
'width': te.attrib['key'].partition('x')[0],
|
||||
'height': te.attrib['key'].partition('x')[2],
|
||||
'url': te.text,
|
||||
} for te in thumbnail_els]
|
||||
|
||||
information_el = details_doc.find('.//information')
|
||||
video_title = information_el.find('./title').text
|
||||
video_description = information_el.find('./detail').text
|
||||
|
||||
details_el = details_doc.find('.//details')
|
||||
video_uploader = details_el.find('./channel').text
|
||||
upload_date = unified_strdate(details_el.find('./airtime').text)
|
||||
|
||||
format_els = details_doc.findall('.//formitaet')
|
||||
formats = [{
|
||||
'format_id': fe.attrib['basetype'],
|
||||
'width': int(fe.find('./width').text),
|
||||
'height': int(fe.find('./height').text),
|
||||
'url': fe.find('./url').text,
|
||||
'filesize': int(fe.find('./filesize').text),
|
||||
'video_bitrate': int(fe.find('./videoBitrate').text),
|
||||
'3sat_qualityname': fe.find('./quality').text,
|
||||
} for fe in format_els
|
||||
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
|
||||
|
||||
def _sortkey(format):
|
||||
qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
|
||||
prefer_http = 1 if 'rtmp' in format['url'] else 0
|
||||
return (qidx, prefer_http, format['video_bitrate'])
|
||||
formats.sort(key=_sortkey)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'description': video_description,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnails[-1]['url'],
|
||||
'uploader': video_uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = determine_ext(formats[-1]['url'])
|
||||
|
||||
return self.video_result(info)
|
@@ -1,24 +1,34 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TudouIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
|
||||
_TEST = {
|
||||
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||
u'file': u'159447792.f4v',
|
||||
u'md5': u'ad7c358a01541e926a1e413612c6b10a',
|
||||
u'file': u'159448201.f4v',
|
||||
u'md5': u'140a49ed444bd22f93330985d8475fcb',
|
||||
u'info_dict': {
|
||||
u"title": u"\u5361\u9a6c\u4e54\u56fd\u8db3\u5f00\u5927\u811a\u957f\u4f20\u51b2\u540a\u96c6\u9526"
|
||||
u"title": u"卡马乔国足开大脚长传冲吊集锦"
|
||||
}
|
||||
}
|
||||
|
||||
def _url_for_id(self, id, quality = None):
|
||||
info_url = "http://v2.tudou.com/f?id="+str(id)
|
||||
if quality:
|
||||
info_url += '&hd' + quality
|
||||
webpage = self._download_webpage(info_url, id, "Opening the info webpage")
|
||||
final_url = self._html_search_regex('>(.+?)</f>',webpage, 'video url')
|
||||
return final_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(2).replace('.html','')
|
||||
video_id = mobj.group(2)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id = re.search('"k":(.+?),',webpage).group(1)
|
||||
title = re.search(",kw:\"(.+)\"",webpage)
|
||||
if title is None:
|
||||
title = re.search(",kw: \'(.+)\'",webpage)
|
||||
@@ -27,14 +37,27 @@ class TudouIE(InfoExtractor):
|
||||
if thumbnail_url is None:
|
||||
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
|
||||
thumbnail_url = thumbnail_url.group(1)
|
||||
info_url = "http://v2.tudou.com/f?id="+str(video_id)
|
||||
webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
|
||||
final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
|
||||
ext = (final_url.split('?')[0]).split('.')[-1]
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}]
|
||||
|
||||
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
|
||||
segments = json.loads(segs_json)
|
||||
# It looks like the keys are the arguments that have to be passed as
|
||||
# the hd field in the request url, we pick the higher
|
||||
quality = sorted(segments.keys())[-1]
|
||||
parts = segments[quality]
|
||||
result = []
|
||||
len_parts = len(parts)
|
||||
if len_parts > 1:
|
||||
self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
|
||||
for part in parts:
|
||||
part_id = part['k']
|
||||
final_url = self._url_for_id(part_id, quality)
|
||||
ext = (final_url.split('?')[0]).split('.')[-1]
|
||||
part_info = {'id': part_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
result.append(part_info)
|
||||
|
||||
return result
|
||||
|
@@ -17,6 +17,7 @@ class VimeoIE(InfoExtractor):
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
IE_NAME = u'vimeo'
|
||||
_TEST = {
|
||||
u'url': u'http://vimeo.com/56015672',
|
||||
@@ -31,6 +32,25 @@ class VimeoIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://vimeo.com/log_in'
|
||||
webpage = self._download_webpage(login_url, None, False)
|
||||
token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
|
||||
data = compat_urllib_parse.urlencode({'email': username,
|
||||
'password': password,
|
||||
'action': 'login',
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
self._download_webpage(login_request, None, False, u'Wrong login info')
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
@@ -50,6 +70,9 @@ class VimeoIE(InfoExtractor):
|
||||
u'Verifying the password',
|
||||
u'Wrong password')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url, new_video=True):
|
||||
# Extract ID from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -4,6 +4,7 @@ import json
|
||||
import netrc
|
||||
import re
|
||||
import socket
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..utils import (
|
||||
@@ -19,6 +20,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
|
||||
@@ -122,7 +124,7 @@ class YoutubeIE(InfoExtractor):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
if YoutubePlaylistIE.suitable(url): return False
|
||||
if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def report_lang(self):
|
||||
@@ -471,7 +473,12 @@ class YoutubeIE(InfoExtractor):
|
||||
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
|
||||
|
||||
# thumbnail image
|
||||
if 'thumbnail_url' not in video_info:
|
||||
# We try first to get a high quality image:
|
||||
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||
video_webpage, re.DOTALL)
|
||||
if m_thumb is not None:
|
||||
video_thumbnail = m_thumb.group(1)
|
||||
elif 'thumbnail_url' not in video_info:
|
||||
self._downloader.report_warning(u'unable to extract video thumbnail')
|
||||
video_thumbnail = ''
|
||||
else: # don't panic if we can't find it
|
||||
@@ -864,3 +871,34 @@ class YoutubeShowIE(InfoExtractor):
|
||||
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
||||
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
|
||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
||||
|
||||
|
||||
class YoutubeSubscriptionsIE(YoutubeIE):
|
||||
"""It's a subclass of YoutubeIE because we need to login"""
|
||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||
IE_NAME = u'youtube:subscriptions'
|
||||
_FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
|
||||
_PAGING_STEP = 30
|
||||
|
||||
# Overwrite YoutubeIE properties we don't want
|
||||
_TESTS = []
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_entries = []
|
||||
# The step argument is available only in 2.7 or higher
|
||||
for i in itertools.count(0):
|
||||
paging = i*self._PAGING_STEP
|
||||
info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
|
||||
u'Downloading page %s' % i)
|
||||
info = json.loads(info)
|
||||
feed_html = info['feed_html']
|
||||
m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
|
||||
ids = orderedSet(m.group(1) for m in m_ids)
|
||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
||||
if info['paging'] is None:
|
||||
break
|
||||
return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
|
||||
|
@@ -623,7 +623,7 @@ def unified_strdate(date_str):
|
||||
date_str = date_str.replace(',',' ')
|
||||
# %z (UTC offset) is only supported in python>=3.2
|
||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
|
||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
|
||||
for expression in format_expressions:
|
||||
try:
|
||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||
@@ -631,6 +631,13 @@ def unified_strdate(date_str):
|
||||
pass
|
||||
return upload_date
|
||||
|
||||
def determine_ext(url):
|
||||
guess = url.partition(u'?')[0].rpartition(u'.')[2]
|
||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||
return guess
|
||||
else:
|
||||
return u'unknown_video'
|
||||
|
||||
def date_from_str(date_str):
|
||||
"""
|
||||
Return a datetime object from a string in the format YYYYMMDD or
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.07.02'
|
||||
__version__ = '2013.07.08'
|
||||
|
Reference in New Issue
Block a user