Compare commits

..

5 Commits

Author SHA1 Message Date
Ricardo Garcia
5745bfdcdc Bump version number 2010-10-31 11:24:32 +01:00
Ricardo Garcia
320becd692 Remove trails from the "append_const" change (fixes issue #23) 2010-10-31 11:24:32 +01:00
Ricardo Garcia
968aa88438 Only catch UnavailableFormatError in call to process_info 2010-10-31 11:24:32 +01:00
Ricardo Garcia
cbfff4db63 Verify URLs in simulate mode (fixes issue #22) 2010-10-31 11:24:32 +01:00
Ricardo Garcia
781daeabdb Restore "INTERNAL" version number 2010-10-31 11:24:32 +01:00
2 changed files with 65 additions and 59 deletions

View File

@@ -1 +1 @@
2009.05.23
2009.05.25

View File

@@ -200,6 +200,14 @@ class FileDownloader(object):
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return long(round(number * multiplier))
@staticmethod
def verify_url(url):
"""Verify a URL is valid and data could be downloaded."""
request = urllib2.Request(url, None, std_headers)
data = urllib2.urlopen(request)
data.read(1)
data.close()
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
@@ -265,16 +273,21 @@ class FileDownloader(object):
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
# Forced printings
if self.params.get('forcetitle', False):
print info_dict['title'].encode(locale.getpreferredencoding())
if self.params.get('forceurl', False):
print info_dict['url'].encode(locale.getpreferredencoding())
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
return
try:
self.verify_url(info_dict['url'])
except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
raise UnavailableFormatError
# Forced printings
if self.params.get('forcetitle', False):
print info_dict['title'].encode(locale.getpreferredencoding())
if self.params.get('forceurl', False):
print info_dict['url'].encode(locale.getpreferredencoding())
return
try:
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
@@ -607,53 +620,53 @@ class YoutubeIE(InfoExtractor):
best_quality = True
while True:
# Extension
video_extension = self._video_extensions.get(format_param, 'flv')
# Normalize URL, including format
normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
if format_param is not None:
normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
request = urllib2.Request(normalized_url, None, std_headers)
try:
# Extension
video_extension = self._video_extensions.get(format_param, 'flv')
self.report_webpage_download(video_id)
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
return
self.report_information_extraction(video_id)
# "t" param
mobj = re.search(r', "t": "([^"]+)"', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
return
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
if format_param is not None:
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
self.report_video_url(video_id, video_real_url)
# Normalize URL, including format
normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
if format_param is not None:
normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
request = urllib2.Request(normalized_url, None, std_headers)
try:
self.report_webpage_download(video_id)
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
return
self.report_information_extraction(video_id)
# "t" param
mobj = re.search(r', "t": "([^"]+)"', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
return
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
if format_param is not None:
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
self.report_video_url(video_id, video_real_url)
# uploader
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = mobj.group(1)
# uploader
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = mobj.group(1)
# title
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = mobj.group(1).decode('utf-8')
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
video_title = video_title.replace(os.sep, u'%')
# title
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = mobj.group(1).decode('utf-8')
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
video_title = video_title.replace(os.sep, u'%')
# simplified title
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
simple_title = simple_title.strip(ur'_')
# simplified title
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
simple_title = simple_title.strip(ur'_')
try:
# Process video information
self._downloader.process_info({
'id': video_id.decode('utf-8'),
@@ -1016,7 +1029,7 @@ if __name__ == '__main__':
# Parse command line
parser = optparse.OptionParser(
usage='Usage: %prog [options] url...',
version='2009.05.23',
version='2009.05.25',
conflict_handler='resolve',
)
@@ -1104,13 +1117,6 @@ if __name__ == '__main__':
if numeric_limit is None:
parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit
if opts.format is not None and len(opts.format) > 1:
parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)')
if opts.format is None:
real_format = None
else:
real_format = opts.format[0]
# Information extractors
youtube_ie = YoutubeIE()
@@ -1127,7 +1133,7 @@ if __name__ == '__main__':
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'simulate': (opts.simulate or opts.geturl or opts.gettitle),
'format': real_format,
'format': opts.format,
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')