Compare commits

..

13 Commits

Author SHA1 Message Date
Philipp Hagemeister
156d5ad6da release 2013.07.24.2 2013-07-24 21:18:41 +02:00
Jaime Marquínez Ferrándiz
c626a3d9fa Add an extractor for downloading the Youtube favorite videos(closes #127) 2013-07-24 20:45:19 +02:00
Jaime Marquínez Ferrándiz
b2e8bc1b20 YoutubeIE: Move the code from _real_initialize to a base class
This allows to reuse the code in other IEs without having to overwrite some parts.
2013-07-24 20:40:12 +02:00
Jaime Marquínez Ferrándiz
771822ebb8 YoutubePlaylistIE: break only if there's no entry field in the response
Otherwise the Favorite videos playlist cannot be downloaded complete.
Also break if it reach the maximum value of the start-index.
2013-07-24 20:14:55 +02:00
Jaime Marquínez Ferrándiz
eb6a41ba0f ExfmIE: extract Soundcloud songs using SoundcloudIE
Now SouncloudIE accepts api urls.
2013-07-24 14:39:21 +02:00
Jaime Marquínez Ferrándiz
7d2392691c [soundcloud]: Some improvements
Extract thumbnails.
Make SoundcloudSetIE a subclass of SoundcloudIE to reuse some code.
Directly extract the file url without downloading an extra page.
2013-07-24 14:15:12 +02:00
Philipp Hagemeister
c216c1894d release 2013.07.24.1 2013-07-24 13:52:55 +02:00
Jaime Marquínez Ferrándiz
3e1ad508eb Add Youtube player info for length 87 2013-07-24 12:48:25 +02:00
Jaime Marquínez Ferrándiz
a052c1d785 Merge pull request #1114 from alexvh/traileraddict_hd
[traileraddict] Obtain hd quality stream if available

Updated md5 checksum of the test video.
2013-07-24 10:52:24 +02:00
Jaime Marquínez Ferrándiz
16484d4923 [traileraddict]: Support clips urls and more trailer urls 2013-07-24 10:43:44 +02:00
Jaime Marquínez Ferrándiz
32a09b4382 Merge pull request #1113 from alexvh/master
[traileraddict] Allow all types of trailer URLs
2013-07-24 10:37:52 +02:00
Alex Van't Hof
b1ca5e3ffa [traileraddict] Obtain hd quality stream if available
No clear method for determining if hd is available so opted to just
check for presence of hd toggle function.
2013-07-24 02:42:32 -04:00
Alex Van't Hof
b9a1252c96 [traileraddict] Allow all types of trailer URLs
Valid url regex for traileraddict.com is too strict. Need to allow,
e.g. theatrical-trailer, teaser-trailer, feature-read-band-trailer, etc.
2013-07-24 00:48:11 -04:00
7 changed files with 230 additions and 202 deletions

View File

@@ -14,7 +14,7 @@ tests = [
# 88 # 88
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
# 87 # 87 - vflART1Nf 2013/07/24
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"), "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
# 86 - vfl_ymO4Z 2013/06/27 # 86 - vfl_ymO4Z 2013/06/27

View File

@@ -93,6 +93,7 @@ from .youtube import (
YoutubeShowIE, YoutubeShowIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeRecommendedIE, YoutubeRecommendedIE,
YoutubeFavouritesIE,
) )
from .zdf import ZDFIE from .zdf import ZDFIE

View File

@@ -8,17 +8,30 @@ class ExfmIE(InfoExtractor):
IE_NAME = u'exfm' IE_NAME = u'exfm'
IE_DESC = u'ex.fm' IE_DESC = u'ex.fm'
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)' _VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
_SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream' _SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
_TEST = { _TESTS = [
u'url': u'http://ex.fm/song/1bgtzg', {
u'file': u'1bgtzg.mp3', u'url': u'http://ex.fm/song/1bgtzg',
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf', u'file': u'95223130.mp3',
u'info_dict': { u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
u"title": u"We Can't Stop", u'info_dict': {
u"uploader": u"Miley Cyrus", u"title": u"We Can't Stop - Miley Cyrus",
u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37' u"uploader": u"Miley Cyrus",
} u'upload_date': u'20130603',
} u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
},
u'note': u'Soundcloud song',
},
{
u'url': u'http://ex.fm/song/wddt8',
u'file': u'wddt8.mp3',
u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
u'info_dict': {
u'title': u'Safe and Sound',
u'uploader': u'Capital Cities',
},
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@@ -26,11 +39,10 @@ class ExfmIE(InfoExtractor):
info_url = "http://ex.fm/api/v3/song/%s" %(song_id) info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
webpage = self._download_webpage(info_url, song_id) webpage = self._download_webpage(info_url, song_id)
info = json.loads(webpage) info = json.loads(webpage)
song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url']) song_url = info['song']['url']
if song_url is not None: if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28" self.to_screen('Soundcloud song detected')
else: return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
song_url = info['song']['url']
return [{ return [{
'id': song_id, 'id': song_id,
'url': song_url, 'url': song_url,

View File

@@ -19,7 +19,11 @@ class SoundcloudIE(InfoExtractor):
of the stream token and uid of the stream token and uid
""" """
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$' _VALID_URL = r'''^(?:https?://)?
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
)
'''
IE_NAME = u'soundcloud' IE_NAME = u'soundcloud'
_TEST = { _TEST = {
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
@@ -33,59 +37,65 @@ class SoundcloudIE(InfoExtractor):
} }
} }
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
@classmethod
def suitable(cls, url):
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
def report_resolve(self, video_id): def report_resolve(self, video_id):
"""Report information extraction.""" """Report information extraction."""
self.to_screen(u'%s: Resolving id' % video_id) self.to_screen(u'%s: Resolving id' % video_id)
def _real_extract(self, url): @classmethod
mobj = re.match(self._VALID_URL, url) def _resolv_url(cls, url):
if mobj is None: return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
raise ExtractorError(u'Invalid URL: %s' % url)
# extract uploader (which is in the url) def _extract_info_dict(self, info, full_title=None):
uploader = mobj.group(1)
# extract simple title (uploader + slug of song title)
slug_title = mobj.group(2)
full_title = '%s/%s' % (uploader, slug_title)
self.report_resolve(full_title)
url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
info_json = self._download_webpage(resolv_url, full_title, u'Downloading info JSON')
info = json.loads(info_json)
video_id = info['id'] video_id = info['id']
self.report_extraction(full_title) name = full_title or video_id
self.report_extraction(name)
streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' thumbnail = info['artwork_url']
stream_json = self._download_webpage(streams_url, full_title, if thumbnail is not None:
u'Downloading stream definitions', thumbnail = thumbnail.replace('-large', '-t500x500')
u'unable to download stream definitions') return {
streams = json.loads(stream_json)
mediaURL = streams['http_mp3_128_url']
upload_date = unified_strdate(info['created_at'])
return [{
'id': info['id'], 'id': info['id'],
'url': mediaURL, 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'uploader': info['user']['username'], 'uploader': info['user']['username'],
'upload_date': upload_date, 'upload_date': unified_strdate(info['created_at']),
'title': info['title'], 'title': info['title'],
'ext': u'mp3', 'ext': u'mp3',
'description': info['description'], 'description': info['description'],
}] 'thumbnail': thumbnail,
}
class SoundcloudSetIE(InfoExtractor): def _real_extract(self, url):
"""Information extractor for soundcloud.com sets mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
To access the media, the uid of the song and a stream token if mobj is None:
must be extracted from the page source and the script must make raise ExtractorError(u'Invalid URL: %s' % url)
a request to media.soundcloud.com/crossdomain.xml. Then
the media can be grabbed by requesting from an url composed
of the stream token and uid
"""
track_id = mobj.group('track_id')
if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
else:
# extract uploader (which is in the url)
uploader = mobj.group(1)
# extract simple title (uploader + slug of song title)
slug_title = mobj.group(2)
full_title = '%s/%s' % (uploader, slug_title)
self.report_resolve(full_title)
url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
info_json_url = self._resolv_url(url)
info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
info = json.loads(info_json)
return self._extract_info_dict(info, full_title)
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
IE_NAME = u'soundcloud:set' IE_NAME = u'soundcloud:set'
_TEST = { _TEST = {
@@ -153,10 +163,6 @@ class SoundcloudSetIE(InfoExtractor):
] ]
} }
def report_resolve(self, video_id):
"""Report information extraction."""
self.to_screen(u'%s: Resolving id' % video_id)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
@@ -171,7 +177,7 @@ class SoundcloudSetIE(InfoExtractor):
self.report_resolve(full_title) self.report_resolve(full_title)
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title) url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28' resolv_url = self._resolv_url(url)
info_json = self._download_webpage(resolv_url, full_title) info_json = self._download_webpage(resolv_url, full_title)
videos = [] videos = []
@@ -182,23 +188,8 @@ class SoundcloudSetIE(InfoExtractor):
return return
self.report_extraction(full_title) self.report_extraction(full_title)
for track in info['tracks']: return {'_type': 'playlist',
video_id = track['id'] 'entries': [self._extract_info_dict(track) for track in info['tracks']],
'id': info['id'],
streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28' 'title': info['title'],
stream_json = self._download_webpage(streams_url, video_id, u'Downloading track info JSON') }
self.report_extraction(video_id)
streams = json.loads(stream_json)
mediaURL = streams['http_mp3_128_url']
videos.append({
'id': video_id,
'url': mediaURL,
'uploader': track['user']['username'],
'upload_date': unified_strdate(track['created_at']),
'title': track['title'],
'ext': u'mp3',
'description': track['description'],
})
return videos

View File

@@ -4,11 +4,11 @@ from .common import InfoExtractor
class TrailerAddictIE(InfoExtractor): class TrailerAddictIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/trailer/([^/]+)/(?:trailer|feature-trailer)' _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
_TEST = { _TEST = {
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer', u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
u'file': u'76184.mp4', u'file': u'76184.mp4',
u'md5': u'41365557f3c8c397d091da510e73ceb4', u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
u'info_dict': { u'info_dict': {
u"title": u"Prince Avalanche Trailer", u"title": u"Prince Avalanche Trailer",
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind." u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
@@ -17,24 +17,30 @@ class TrailerAddictIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1) name = mobj.group('movie') + '/' + mobj.group('trailer_name')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, name)
title = self._search_regex(r'<title>(.+?)</title>', title = self._search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' - Trailer Addict','') webpage, 'video title').replace(' - Trailer Addict','')
view_count = self._search_regex(r'Views: (.+?)<br />', view_count = self._search_regex(r'Views: (.+?)<br />',
webpage, 'Views Count') webpage, 'Views Count')
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1] video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
info_url = "http://www.traileraddict.com/fvar.php?tid=%s" %(str(video_id)) # Presence of (no)watchplus function indicates HD quality is available
if re.search(r'function (no)?watchplus()', webpage):
fvar = "fvarhd"
else:
fvar = "fvar"
info_url = "http://www.traileraddict.com/%s.php?tid=%s" % (fvar, str(video_id))
info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage") info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
final_url = self._search_regex(r'&fileurl=(.+)', final_url = self._search_regex(r'&fileurl=(.+)',
info_webpage, 'Download url').replace('%3F','?') info_webpage, 'Download url').replace('%3F','?')
thumbnail_url = self._search_regex(r'&image=(.+?)&', thumbnail_url = self._search_regex(r'&image=(.+?)&',
info_webpage, 'thumbnail url') info_webpage, 'thumbnail url')
ext = final_url.split('.')[-1].split('?')[0] ext = final_url.split('.')[-1].split('?')[0]
return [{ return [{
'id' : video_id, 'id' : video_id,
'url' : final_url, 'url' : final_url,

View File

@@ -23,8 +23,114 @@ from ..utils import (
orderedSet, orderedSet,
) )
class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
class YoutubeIE(InfoExtractor): def report_lang(self):
"""Report attempt to set language."""
self.to_screen(u'Setting language')
def _set_language(self):
request = compat_urllib_request.Request(self._LANG_URL)
try:
self.report_lang()
compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
return False
return True
def _login(self):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False
request = compat_urllib_request.Request(self._LOGIN_URL)
try:
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
return False
galx = None
dsh = None
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
if match:
galx = match.group(1)
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
if match:
dsh = match.group(1)
# Log in
login_form_strs = {
u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
u'Email': username,
u'GALX': galx,
u'Passwd': password,
u'PersistentCookie': u'yes',
u'_utf8': u'',
u'bgresponse': u'js_disabled',
u'checkConnection': u'',
u'checkedDomains': u'youtube',
u'dnConn': u'',
u'dsh': dsh,
u'pstMsg': u'0',
u'rmShown': u'1',
u'secTok': u'',
u'signIn': u'Sign in',
u'timeStmp': u'',
u'service': u'youtube',
u'uilel': u'3',
u'hl': u'en_US',
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
try:
self.report_login()
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password')
return False
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
return False
return True
def _confirm_age(self):
age_form = {
'next_url': '/',
'action_confirm': 'Confirm',
}
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
try:
self.report_age_confirmation()
compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
return True
def _real_initialize(self):
if self._downloader is None:
return
if not self._set_language():
return
if not self._login():
return
self._confirm_age()
class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com' IE_DESC = u'YouTube.com'
_VALID_URL = r"""^ _VALID_URL = r"""^
( (
@@ -45,11 +151,7 @@ class YoutubeIE(InfoExtractor):
([0-9A-Za-z_-]+) # here is it! the YouTube video ID ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow (?(1).+)? # if we found the ID, everything can follow
$""" $"""
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_NETRC_MACHINE = 'youtube'
# Listed in order of quality # Listed in order of quality
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
@@ -139,10 +241,6 @@ class YoutubeIE(InfoExtractor):
if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def report_lang(self):
"""Report attempt to set language."""
self.to_screen(u'Setting language')
def report_video_webpage_download(self, video_id): def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage.""" """Report attempt to download video webpage."""
self.to_screen(u'%s: Downloading video webpage' % video_id) self.to_screen(u'%s: Downloading video webpage' % video_id)
@@ -306,91 +404,6 @@ class YoutubeIE(InfoExtractor):
for x in formats: for x in formats:
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
def _real_initialize(self):
if self._downloader is None:
return
# Set language
request = compat_urllib_request.Request(self._LANG_URL)
try:
self.report_lang()
compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
return
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
return
request = compat_urllib_request.Request(self._LOGIN_URL)
try:
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
return
galx = None
dsh = None
match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
if match:
galx = match.group(1)
match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
if match:
dsh = match.group(1)
# Log in
login_form_strs = {
u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
u'Email': username,
u'GALX': galx,
u'Passwd': password,
u'PersistentCookie': u'yes',
u'_utf8': u'',
u'bgresponse': u'js_disabled',
u'checkConnection': u'',
u'checkedDomains': u'youtube',
u'dnConn': u'',
u'dsh': dsh,
u'pstMsg': u'0',
u'rmShown': u'1',
u'secTok': u'',
u'signIn': u'Sign in',
u'timeStmp': u'',
u'service': u'youtube',
u'uilel': u'3',
u'hl': u'en_US',
}
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
# chokes on unicode
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
try:
self.report_login()
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password')
return
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
return
# Confirm age
age_form = {
'next_url': '/',
'action_confirm': 'Confirm',
}
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
try:
self.report_age_confirmation()
compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
def _extract_id(self, url): def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None: if mobj is None:
@@ -670,10 +683,10 @@ class YoutubePlaylistIE(InfoExtractor):
\? (?:.*?&)*? (?:p|a|list)= \? (?:.*?&)*? (?:p|a|list)=
| p/ | p/
) )
((?:PL|EC|UU)?[0-9A-Za-z-_]{10,}) ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
.* .*
| |
((?:PL|EC|UU)[0-9A-Za-z-_]{10,}) ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
)""" )"""
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
_MAX_RESULTS = 50 _MAX_RESULTS = 50
@@ -696,7 +709,11 @@ class YoutubePlaylistIE(InfoExtractor):
videos = [] videos = []
while True: while True:
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1) start_index = self._MAX_RESULTS * (page_num - 1) + 1
if start_index >= 1000:
self._downloader.report_warning(u'Max number of results reached')
break
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
try: try:
@@ -715,9 +732,6 @@ class YoutubePlaylistIE(InfoExtractor):
index = entry['yt$position']['$t'] index = entry['yt$position']['$t']
if 'media$group' in entry and 'media$player' in entry['media$group']: if 'media$group' in entry and 'media$player' in entry['media$group']:
videos.append((index, entry['media$group']['media$player']['url'])) videos.append((index, entry['media$group']['media$player']['url']))
if len(response['feed']['entry']) < self._MAX_RESULTS:
break
page_num += 1 page_num += 1
videos = [v[1] for v in sorted(videos)] videos = [v[1] for v in sorted(videos)]
@@ -898,20 +912,15 @@ class YoutubeShowIE(InfoExtractor):
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
class YoutubeFeedsInfoExtractor(YoutubeIE): class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
""" """
Base class for extractors that fetch info from Base class for extractors that fetch info from
http://www.youtube.com/feed_ajax http://www.youtube.com/feed_ajax
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
""" """
_LOGIN_REQUIRED = True
_PAGING_STEP = 30 _PAGING_STEP = 30
# Overwrite YoutubeIE properties we don't want
_TESTS = []
@classmethod
def suitable(cls, url):
return re.match(cls._VALID_URL, url) is not None
@property @property
def _FEED_TEMPLATE(self): def _FEED_TEMPLATE(self):
return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
@@ -921,10 +930,7 @@ class YoutubeFeedsInfoExtractor(YoutubeIE):
return u'youtube:%s' % self._FEED_NAME return u'youtube:%s' % self._FEED_NAME
def _real_initialize(self): def _real_initialize(self):
(username, password) = self._get_login_info() self._login()
if username is None:
raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
super(YoutubeFeedsInfoExtractor, self)._real_initialize()
def _real_extract(self, url): def _real_extract(self, url):
feed_entries = [] feed_entries = []
@@ -954,3 +960,15 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended' _FEED_NAME = 'recommended'
_PLAYLIST_TITLE = u'Youtube Recommended videos' _PLAYLIST_TITLE = u'Youtube Recommended videos'
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
_LOGIN_REQUIRED = True
def _real_extract(self, url):
webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
return self.url_result(playlist_id, 'YoutubePlaylist')

View File

@@ -1,2 +1,2 @@
__version__ = '2013.07.24' __version__ = '2013.07.24.2'