Compare commits
23 Commits
2013.07.02
...
2013.07.08
Author | SHA1 | Date | |
---|---|---|---|
|
b04621d155 | ||
|
b227060388 | ||
|
d93e4dcbb7 | ||
|
73e79f2a1b | ||
|
fc79158de2 | ||
|
7763b04e5f | ||
|
9d7b44b4cc | ||
|
897f36d179 | ||
|
94c3637f6d | ||
|
04cc96173c | ||
|
fbaaad49d7 | ||
|
b29f3b250d | ||
|
fa343954d4 | ||
|
2491f5898e | ||
|
b27c856fbc | ||
|
9941ceb331 | ||
|
c536d38059 | ||
|
8de64cac98 | ||
|
6d6d286539 | ||
|
5d2eac9eba | ||
|
9826925a20 | ||
|
24a267b562 | ||
|
d4da3d6116 |
@@ -61,6 +61,17 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
else:
|
else:
|
||||||
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
||||||
|
|
||||||
|
def test_keywords(self):
|
||||||
|
ies = gen_extractors()
|
||||||
|
matching_ies = lambda url: [ie.IE_NAME for ie in ies
|
||||||
|
if ie.suitable(url) and ie.IE_NAME != 'generic']
|
||||||
|
self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
|
||||||
|
self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
|
||||||
|
self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
|
||||||
|
self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
|
||||||
|
self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
|
||||||
|
self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -5,11 +5,13 @@ from .auengine import AUEngineIE
|
|||||||
from .bandcamp import BandcampIE
|
from .bandcamp import BandcampIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
|
from .brightcove import BrightcoveIE
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE
|
from .comedycentral import ComedyCentralIE
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
from .depositfiles import DepositFilesIE
|
from .depositfiles import DepositFilesIE
|
||||||
|
from .dreisat import DreiSatIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
@@ -68,7 +70,15 @@ from .yahoo import YahooIE, YahooSearchIE
|
|||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
from .youku import YoukuIE
|
from .youku import YoukuIE
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE
|
from .youtube import (
|
||||||
|
YoutubeIE,
|
||||||
|
YoutubePlaylistIE,
|
||||||
|
YoutubeSearchIE,
|
||||||
|
YoutubeUserIE,
|
||||||
|
YoutubeChannelIE,
|
||||||
|
YoutubeShowIE,
|
||||||
|
YoutubeSubscriptionsIE,
|
||||||
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -16,8 +17,8 @@ class ArteTvIE(InfoExtractor):
|
|||||||
www.arte.tv/guide, the extraction process is different for each one.
|
www.arte.tv/guide, the extraction process is different for each one.
|
||||||
The videos expire in 7 days, so we can't add tests.
|
The videos expire in 7 days, so we can't add tests.
|
||||||
"""
|
"""
|
||||||
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P<id>.*?).html'
|
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||||
|
|
||||||
IE_NAME = u'arte.tv'
|
IE_NAME = u'arte.tv'
|
||||||
@@ -57,22 +58,24 @@ class ArteTvIE(InfoExtractor):
|
|||||||
mobj = re.match(self._EMISSION_URL, url)
|
mobj = re.match(self._EMISSION_URL, url)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
|
lang = mobj.group('lang')
|
||||||
# This is not a real id, it can be for example AJT for the news
|
# This is not a real id, it can be for example AJT for the news
|
||||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
return self._extract_emission(url, video_id)
|
return self._extract_emission(url, video_id, lang)
|
||||||
|
|
||||||
mobj = re.match(self._VIDEOS_URL, url)
|
mobj = re.match(self._VIDEOS_URL, url)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
id = mobj.group('id')
|
id = mobj.group('id')
|
||||||
return self._extract_video(url, id)
|
lang = mobj.group('lang')
|
||||||
|
return self._extract_video(url, id, lang)
|
||||||
|
|
||||||
if re.search(self._LIVE_URL, video_id) is not None:
|
if re.search(self._LIVE_URL, video_id) is not None:
|
||||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||||
# self.extractLiveStream(url)
|
# self.extractLiveStream(url)
|
||||||
# return
|
# return
|
||||||
|
|
||||||
def _extract_emission(self, url, video_id):
|
def _extract_emission(self, url, video_id, lang):
|
||||||
"""Extract from www.arte.tv/guide"""
|
"""Extract from www.arte.tv/guide"""
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||||
@@ -91,6 +94,16 @@ class ArteTvIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
formats = player_info['VSR'].values()
|
formats = player_info['VSR'].values()
|
||||||
|
def _match_lang(f):
|
||||||
|
# Return true if that format is in the language of the url
|
||||||
|
if lang == 'fr':
|
||||||
|
l = 'F'
|
||||||
|
elif lang == 'de':
|
||||||
|
l = 'A'
|
||||||
|
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
|
||||||
|
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||||
|
# Some formats may not be in the same language as the url
|
||||||
|
formats = filter(_match_lang, formats)
|
||||||
# We order the formats by quality
|
# We order the formats by quality
|
||||||
formats = sorted(formats, key=lambda f: int(f['height']))
|
formats = sorted(formats, key=lambda f: int(f['height']))
|
||||||
# Pick the best quality
|
# Pick the best quality
|
||||||
@@ -103,13 +116,15 @@ class ArteTvIE(InfoExtractor):
|
|||||||
|
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
def _extract_video(self, url, video_id):
|
def _extract_video(self, url, video_id, lang):
|
||||||
"""Extract from videos.arte.tv"""
|
"""Extract from videos.arte.tv"""
|
||||||
config_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||||
config_xml_url = config_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id)
|
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||||
config_xml_url = self._html_search_regex(r'<video lang=".*?" ref="(.*?)"', config_xml, 'config xml url')
|
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id)
|
config_node = ref_xml_doc.find('.//video[@lang="%s"]' % lang)
|
||||||
|
config_xml_url = config_node.attrib['ref']
|
||||||
|
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||||
|
|
||||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||||
def _key(m):
|
def _key(m):
|
||||||
|
@@ -8,6 +8,14 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class AUEngineIE(InfoExtractor):
|
class AUEngineIE(InfoExtractor):
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
||||||
|
u'file': u'lfvlytY6.mp4',
|
||||||
|
u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
|
||||||
|
}
|
||||||
|
}
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
|
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -27,7 +27,7 @@ class BlipTVIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||||
u'file': u'5779306.m4v',
|
u'file': u'5779306.m4v',
|
||||||
u'md5': u'b2d849efcf7ee18917e4b4d9ff37cafe',
|
u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"upload_date": u"20111205",
|
u"upload_date": u"20111205",
|
||||||
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
|
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
|
||||||
@@ -103,7 +103,12 @@ class BlipTVIE(InfoExtractor):
|
|||||||
data = json_data
|
data = json_data
|
||||||
|
|
||||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||||
video_url = data['media']['url']
|
if 'additionalMedia' in data:
|
||||||
|
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
|
||||||
|
best_format = formats[-1]
|
||||||
|
video_url = best_format['url']
|
||||||
|
else:
|
||||||
|
video_url = data['media']['url']
|
||||||
umobj = re.match(self._URL_EXT, video_url)
|
umobj = re.match(self._URL_EXT, video_url)
|
||||||
if umobj is None:
|
if umobj is None:
|
||||||
raise ValueError('Can not determine filename extension')
|
raise ValueError('Can not determine filename extension')
|
||||||
|
32
youtube_dl/extractor/brightcove.py
Normal file
32
youtube_dl/extractor/brightcove.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
class BrightcoveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://.*brightcove\.com/.*\?(?P<query>.*videoPlayer=(?P<id>\d*).*)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
query = mobj.group('query')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
request_url = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' % query
|
||||||
|
webpage = self._download_webpage(request_url, video_id)
|
||||||
|
|
||||||
|
self.report_extraction(video_id)
|
||||||
|
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
||||||
|
info = json.loads(info)['data']
|
||||||
|
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
||||||
|
renditions = video_info['renditions']
|
||||||
|
renditions = sorted(renditions, key=lambda r: r['size'])
|
||||||
|
best_format = renditions[-1]
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'title': video_info['displayName'],
|
||||||
|
'url': best_format['defaultURL'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': video_info.get('shortDescription'),
|
||||||
|
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||||
|
'uploader': video_info.get('publisherName'),
|
||||||
|
}
|
@@ -3,6 +3,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
|
import netrc
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
@@ -36,6 +37,8 @@ class InfoExtractor(object):
|
|||||||
The following fields are optional:
|
The following fields are optional:
|
||||||
|
|
||||||
format: The video format, defaults to ext (used for --get-format)
|
format: The video format, defaults to ext (used for --get-format)
|
||||||
|
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||||
|
"url") for the varying thumbnails
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
description: One-line video description.
|
description: One-line video description.
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
@@ -161,6 +164,10 @@ class InfoExtractor(object):
|
|||||||
"""Report attempt to confirm age."""
|
"""Report attempt to confirm age."""
|
||||||
self.to_screen(u'Confirming age')
|
self.to_screen(u'Confirming age')
|
||||||
|
|
||||||
|
def report_login(self):
|
||||||
|
"""Report attempt to log in."""
|
||||||
|
self.to_screen(u'Logging in')
|
||||||
|
|
||||||
#Methods for following #608
|
#Methods for following #608
|
||||||
#They set the correct value of the '_type' key
|
#They set the correct value of the '_type' key
|
||||||
def video_result(self, video_info):
|
def video_result(self, video_info):
|
||||||
@@ -225,6 +232,36 @@ class InfoExtractor(object):
|
|||||||
else:
|
else:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def _get_login_info(self):
|
||||||
|
"""
|
||||||
|
Get the the login info as (username, password)
|
||||||
|
It will look in the netrc file using the _NETRC_MACHINE value
|
||||||
|
If there's no info available, return (None, None)
|
||||||
|
"""
|
||||||
|
if self._downloader is None:
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
username = None
|
||||||
|
password = None
|
||||||
|
downloader_params = self._downloader.params
|
||||||
|
|
||||||
|
# Attempt to use provided username and password or .netrc data
|
||||||
|
if downloader_params.get('username', None) is not None:
|
||||||
|
username = downloader_params['username']
|
||||||
|
password = downloader_params['password']
|
||||||
|
elif downloader_params.get('usenetrc', False):
|
||||||
|
try:
|
||||||
|
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||||
|
if info is not None:
|
||||||
|
username = info[0]
|
||||||
|
password = info[2]
|
||||||
|
else:
|
||||||
|
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||||
|
except (IOError, netrc.NetrcParseError) as err:
|
||||||
|
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
||||||
|
|
||||||
|
return (username, password)
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
Base class for paged search queries extractors.
|
Base class for paged search queries extractors.
|
||||||
|
@@ -1,12 +1,11 @@
|
|||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
class DailymotionIE(InfoExtractor):
|
class DailymotionIE(InfoExtractor):
|
||||||
@@ -39,33 +38,10 @@ class DailymotionIE(InfoExtractor):
|
|||||||
|
|
||||||
# Extract URL, uploader and title from webpage
|
# Extract URL, uploader and title from webpage
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
mobj = re.search(r'\s*var flashvars = (.*)', webpage)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Unable to extract media URL')
|
|
||||||
flashvars = compat_urllib_parse.unquote(mobj.group(1))
|
|
||||||
|
|
||||||
for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
|
video_title = self._html_search_regex(r'<meta property="og:title" content="(.*?)" />',
|
||||||
if key in flashvars:
|
webpage, 'title')
|
||||||
max_quality = key
|
|
||||||
self.to_screen(u'Using %s' % key)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise ExtractorError(u'Unable to extract video URL')
|
|
||||||
|
|
||||||
mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Unable to extract video URL')
|
|
||||||
|
|
||||||
video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/')
|
|
||||||
|
|
||||||
# TODO: support choosing qualities
|
|
||||||
|
|
||||||
mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Unable to extract title')
|
|
||||||
video_title = unescapeHTML(mobj.group('title'))
|
|
||||||
|
|
||||||
video_uploader = None
|
|
||||||
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
||||||
# Looking for official user
|
# Looking for official user
|
||||||
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
||||||
@@ -76,6 +52,25 @@ class DailymotionIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
||||||
|
|
||||||
|
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
||||||
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
|
u'Downloading embed page')
|
||||||
|
info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
|
||||||
|
info = json.loads(info)
|
||||||
|
|
||||||
|
# TODO: support choosing qualities
|
||||||
|
|
||||||
|
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
|
||||||
|
'stream_h264_hq_url','stream_h264_url',
|
||||||
|
'stream_h264_ld_url']:
|
||||||
|
if info.get(key):#key in info and info[key]:
|
||||||
|
max_quality = key
|
||||||
|
self.to_screen(u'Using %s' % key)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ExtractorError(u'Unable to extract video URL')
|
||||||
|
video_url = info[max_quality]
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@@ -83,4 +78,5 @@ class DailymotionIE(InfoExtractor):
|
|||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension,
|
'ext': video_extension,
|
||||||
|
'thumbnail': info['thumbnail_url']
|
||||||
}]
|
}]
|
||||||
|
85
youtube_dl/extractor/dreisat.py
Normal file
85
youtube_dl/extractor/dreisat.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DreiSatIE(InfoExtractor):
|
||||||
|
IE_NAME = '3sat'
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||||
|
u'file': u'36983.webm',
|
||||||
|
u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Kaffeeland Schweiz",
|
||||||
|
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
|
||||||
|
u"uploader": u"3sat",
|
||||||
|
u"upload_date": u"20130622"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
|
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
||||||
|
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
||||||
|
|
||||||
|
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||||
|
thumbnails = [{
|
||||||
|
'width': te.attrib['key'].partition('x')[0],
|
||||||
|
'height': te.attrib['key'].partition('x')[2],
|
||||||
|
'url': te.text,
|
||||||
|
} for te in thumbnail_els]
|
||||||
|
|
||||||
|
information_el = details_doc.find('.//information')
|
||||||
|
video_title = information_el.find('./title').text
|
||||||
|
video_description = information_el.find('./detail').text
|
||||||
|
|
||||||
|
details_el = details_doc.find('.//details')
|
||||||
|
video_uploader = details_el.find('./channel').text
|
||||||
|
upload_date = unified_strdate(details_el.find('./airtime').text)
|
||||||
|
|
||||||
|
format_els = details_doc.findall('.//formitaet')
|
||||||
|
formats = [{
|
||||||
|
'format_id': fe.attrib['basetype'],
|
||||||
|
'width': int(fe.find('./width').text),
|
||||||
|
'height': int(fe.find('./height').text),
|
||||||
|
'url': fe.find('./url').text,
|
||||||
|
'filesize': int(fe.find('./filesize').text),
|
||||||
|
'video_bitrate': int(fe.find('./videoBitrate').text),
|
||||||
|
'3sat_qualityname': fe.find('./quality').text,
|
||||||
|
} for fe in format_els
|
||||||
|
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
|
||||||
|
|
||||||
|
def _sortkey(format):
|
||||||
|
qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
|
||||||
|
prefer_http = 1 if 'rtmp' in format['url'] else 0
|
||||||
|
return (qidx, prefer_http, format['video_bitrate'])
|
||||||
|
formats.sort(key=_sortkey)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': video_description,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'thumbnail': thumbnails[-1]['url'],
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: Remove when #980 has been merged
|
||||||
|
info['url'] = formats[-1]['url']
|
||||||
|
info['ext'] = determine_ext(formats[-1]['url'])
|
||||||
|
|
||||||
|
return self.video_result(info)
|
@@ -1,24 +1,34 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class TudouIE(InfoExtractor):
|
class TudouIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||||
u'file': u'159447792.f4v',
|
u'file': u'159448201.f4v',
|
||||||
u'md5': u'ad7c358a01541e926a1e413612c6b10a',
|
u'md5': u'140a49ed444bd22f93330985d8475fcb',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"\u5361\u9a6c\u4e54\u56fd\u8db3\u5f00\u5927\u811a\u957f\u4f20\u51b2\u540a\u96c6\u9526"
|
u"title": u"卡马乔国足开大脚长传冲吊集锦"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _url_for_id(self, id, quality = None):
|
||||||
|
info_url = "http://v2.tudou.com/f?id="+str(id)
|
||||||
|
if quality:
|
||||||
|
info_url += '&hd' + quality
|
||||||
|
webpage = self._download_webpage(info_url, id, "Opening the info webpage")
|
||||||
|
final_url = self._html_search_regex('>(.+?)</f>',webpage, 'video url')
|
||||||
|
return final_url
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(2).replace('.html','')
|
video_id = mobj.group(2)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_id = re.search('"k":(.+?),',webpage).group(1)
|
|
||||||
title = re.search(",kw:\"(.+)\"",webpage)
|
title = re.search(",kw:\"(.+)\"",webpage)
|
||||||
if title is None:
|
if title is None:
|
||||||
title = re.search(",kw: \'(.+)\'",webpage)
|
title = re.search(",kw: \'(.+)\'",webpage)
|
||||||
@@ -27,14 +37,27 @@ class TudouIE(InfoExtractor):
|
|||||||
if thumbnail_url is None:
|
if thumbnail_url is None:
|
||||||
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
|
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
|
||||||
thumbnail_url = thumbnail_url.group(1)
|
thumbnail_url = thumbnail_url.group(1)
|
||||||
info_url = "http://v2.tudou.com/f?id="+str(video_id)
|
|
||||||
webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
|
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
|
||||||
final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
|
segments = json.loads(segs_json)
|
||||||
ext = (final_url.split('?')[0]).split('.')[-1]
|
# It looks like the keys are the arguments that have to be passed as
|
||||||
return [{
|
# the hd field in the request url, we pick the higher
|
||||||
'id': video_id,
|
quality = sorted(segments.keys())[-1]
|
||||||
'url': final_url,
|
parts = segments[quality]
|
||||||
'ext': ext,
|
result = []
|
||||||
'title': title,
|
len_parts = len(parts)
|
||||||
'thumbnail': thumbnail_url,
|
if len_parts > 1:
|
||||||
}]
|
self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
|
||||||
|
for part in parts:
|
||||||
|
part_id = part['k']
|
||||||
|
final_url = self._url_for_id(part_id, quality)
|
||||||
|
ext = (final_url.split('?')[0]).split('.')[-1]
|
||||||
|
part_info = {'id': part_id,
|
||||||
|
'url': final_url,
|
||||||
|
'ext': ext,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail_url,
|
||||||
|
}
|
||||||
|
result.append(part_info)
|
||||||
|
|
||||||
|
return result
|
||||||
|
@@ -17,6 +17,7 @@ class VimeoIE(InfoExtractor):
|
|||||||
|
|
||||||
# _VALID_URL matches Vimeo URLs
|
# _VALID_URL matches Vimeo URLs
|
||||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
|
||||||
|
_NETRC_MACHINE = 'vimeo'
|
||||||
IE_NAME = u'vimeo'
|
IE_NAME = u'vimeo'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://vimeo.com/56015672',
|
u'url': u'http://vimeo.com/56015672',
|
||||||
@@ -31,6 +32,25 @@ class VimeoIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
self.report_login()
|
||||||
|
login_url = 'https://vimeo.com/log_in'
|
||||||
|
webpage = self._download_webpage(login_url, None, False)
|
||||||
|
token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
|
||||||
|
data = compat_urllib_parse.urlencode({'email': username,
|
||||||
|
'password': password,
|
||||||
|
'action': 'login',
|
||||||
|
'service': 'vimeo',
|
||||||
|
'token': token,
|
||||||
|
})
|
||||||
|
login_request = compat_urllib_request.Request(login_url, data)
|
||||||
|
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||||
|
self._download_webpage(login_request, None, False, u'Wrong login info')
|
||||||
|
|
||||||
def _verify_video_password(self, url, video_id, webpage):
|
def _verify_video_password(self, url, video_id, webpage):
|
||||||
password = self._downloader.params.get('videopassword', None)
|
password = self._downloader.params.get('videopassword', None)
|
||||||
if password is None:
|
if password is None:
|
||||||
@@ -50,6 +70,9 @@ class VimeoIE(InfoExtractor):
|
|||||||
u'Verifying the password',
|
u'Verifying the password',
|
||||||
u'Wrong password')
|
u'Wrong password')
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
def _real_extract(self, url, new_video=True):
|
def _real_extract(self, url, new_video=True):
|
||||||
# Extract ID from URL
|
# Extract ID from URL
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@@ -4,6 +4,7 @@ import json
|
|||||||
import netrc
|
import netrc
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -19,6 +20,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
orderedSet,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -122,7 +124,7 @@ class YoutubeIE(InfoExtractor):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
if YoutubePlaylistIE.suitable(url): return False
|
if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||||
|
|
||||||
def report_lang(self):
|
def report_lang(self):
|
||||||
@@ -471,7 +473,12 @@ class YoutubeIE(InfoExtractor):
|
|||||||
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
|
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
|
||||||
|
|
||||||
# thumbnail image
|
# thumbnail image
|
||||||
if 'thumbnail_url' not in video_info:
|
# We try first to get a high quality image:
|
||||||
|
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||||
|
video_webpage, re.DOTALL)
|
||||||
|
if m_thumb is not None:
|
||||||
|
video_thumbnail = m_thumb.group(1)
|
||||||
|
elif 'thumbnail_url' not in video_info:
|
||||||
self._downloader.report_warning(u'unable to extract video thumbnail')
|
self._downloader.report_warning(u'unable to extract video thumbnail')
|
||||||
video_thumbnail = ''
|
video_thumbnail = ''
|
||||||
else: # don't panic if we can't find it
|
else: # don't panic if we can't find it
|
||||||
@@ -864,3 +871,34 @@ class YoutubeShowIE(InfoExtractor):
|
|||||||
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
||||||
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
|
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
|
||||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeSubscriptionsIE(YoutubeIE):
|
||||||
|
"""It's a subclass of YoutubeIE because we need to login"""
|
||||||
|
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
||||||
|
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||||
|
IE_NAME = u'youtube:subscriptions'
|
||||||
|
_FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
|
||||||
|
_PAGING_STEP = 30
|
||||||
|
|
||||||
|
# Overwrite YoutubeIE properties we don't want
|
||||||
|
_TESTS = []
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return re.match(cls._VALID_URL, url) is not None
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
feed_entries = []
|
||||||
|
# The step argument is available only in 2.7 or higher
|
||||||
|
for i in itertools.count(0):
|
||||||
|
paging = i*self._PAGING_STEP
|
||||||
|
info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
|
||||||
|
u'Downloading page %s' % i)
|
||||||
|
info = json.loads(info)
|
||||||
|
feed_html = info['feed_html']
|
||||||
|
m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
|
||||||
|
ids = orderedSet(m.group(1) for m in m_ids)
|
||||||
|
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
||||||
|
if info['paging'] is None:
|
||||||
|
break
|
||||||
|
return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
|
||||||
|
@@ -623,7 +623,7 @@ def unified_strdate(date_str):
|
|||||||
date_str = date_str.replace(',',' ')
|
date_str = date_str.replace(',',' ')
|
||||||
# %z (UTC offset) is only supported in python>=3.2
|
# %z (UTC offset) is only supported in python>=3.2
|
||||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
||||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
|
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
|
||||||
for expression in format_expressions:
|
for expression in format_expressions:
|
||||||
try:
|
try:
|
||||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||||
@@ -631,6 +631,13 @@ def unified_strdate(date_str):
|
|||||||
pass
|
pass
|
||||||
return upload_date
|
return upload_date
|
||||||
|
|
||||||
|
def determine_ext(url):
|
||||||
|
guess = url.partition(u'?')[0].rpartition(u'.')[2]
|
||||||
|
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||||
|
return guess
|
||||||
|
else:
|
||||||
|
return u'unknown_video'
|
||||||
|
|
||||||
def date_from_str(date_str):
|
def date_from_str(date_str):
|
||||||
"""
|
"""
|
||||||
Return a datetime object from a string in the format YYYYMMDD or
|
Return a datetime object from a string in the format YYYYMMDD or
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.07.02'
|
__version__ = '2013.07.08'
|
||||||
|
Reference in New Issue
Block a user