2012-03-25 03:07:37 +02:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
2012-11-28 03:34:40 +01:00
from __future__ import absolute_import
2012-03-25 03:07:37 +02:00
import math
2012-12-17 19:48:10 +01:00
import io
2012-03-25 03:07:37 +02:00
import os
import re
import socket
import subprocess
import sys
import time
2012-12-11 11:02:21 +01:00
import traceback
2012-03-25 03:07:37 +02:00
if os . name == ' nt ' :
2012-11-28 02:04:46 +01:00
import ctypes
2012-10-03 14:20:04 +02:00
2012-11-28 03:34:40 +01:00
from . utils import *
2012-03-25 03:07:37 +02:00
class FileDownloader ( object ) :
2012-11-28 02:04:46 +01:00
""" File Downloader class.
File downloader objects are the ones responsible of downloading the
actual video file and writing it to disk if the user has requested
it , among some other tasks . In most cases there should be one per
program . As , given a video URL , the downloader doesn ' t know how to
extract all the needed information , task that InfoExtractors do , it
has to pass the URL to one of them .
For this , file downloader objects have a method that allows
InfoExtractors to be registered in a given order . When it is passed
a URL , the file downloader handles it to the first InfoExtractor it
finds that reports being able to handle it . The InfoExtractor extracts
all the information about the video or videos the URL refers to , and
asks the FileDownloader to process the video information , possibly
downloading the video .
File downloaders accept a lot of parameters . In order not to saturate
the object constructor with arguments , it receives a dictionary of
options instead . These options are available through the params
attribute for the InfoExtractors to use . The FileDownloader also
registers itself as the downloader in charge for the InfoExtractors
that are added to it , so this is a " mutual registration " .
Available options :
username : Username for authentication purposes .
password : Password for authentication purposes .
usenetrc : Use netrc for authentication instead .
quiet : Do not print messages to stdout .
forceurl : Force printing final URL .
forcetitle : Force printing title .
forcethumbnail : Force printing thumbnail URL .
forcedescription : Force printing description .
forcefilename : Force printing final filename .
simulate : Do not download the video files .
format : Video format code .
format_limit : Highest quality format to try .
outtmpl : Template for output names .
restrictfilenames : Do not allow " & " and spaces in file names
ignoreerrors : Do not stop on download errors .
ratelimit : Download speed limit , in bytes / sec .
nooverwrites : Prevent overwriting files .
retries : Number of times to retry for HTTP error 5 xx
buffersize : Size of download buffer in bytes .
noresizebuffer : Do not automatically resize the download buffer .
continuedl : Try to continue downloads if possible .
noprogress : Do not print the progress bar .
playliststart : Playlist item to start at .
playlistend : Playlist item to end at .
matchtitle : Download only matching titles .
rejecttitle : Reject downloads for matching titles .
logtostderr : Log messages to stderr instead of stdout .
consoletitle : Display progress in console window ' s titlebar.
nopart : Do not use temporary . part files .
updatetime : Use the Last - modified header to set output file timestamps .
writedescription : Write the video description to a . description file
writeinfojson : Write the video description to a . info . json file
writesubtitles : Write the video subtitles to a . srt file
subtitleslang : Language of the subtitles to download
2012-12-11 09:49:27 +01:00
test : Download only first bytes to test the downloader .
2013-01-12 15:07:59 +01:00
keepvideo : Keep the video file after post - processing
2013-01-22 00:50:42 -05:00
min_filesize : Skip files smaller than this size
max_filesize : Skip files larger than this size
2012-11-28 02:04:46 +01:00
"""
params = None
_ies = [ ]
_pps = [ ]
_download_retcode = None
_num_downloads = None
_screen_file = None
def __init__ ( self , params ) :
""" Create a FileDownloader object with the given options. """
self . _ies = [ ]
self . _pps = [ ]
2013-01-12 20:34:50 +01:00
self . _progress_hooks = [ ]
2012-11-28 02:04:46 +01:00
self . _download_retcode = 0
self . _num_downloads = 0
self . _screen_file = [ sys . stdout , sys . stderr ] [ params . get ( ' logtostderr ' , False ) ]
self . params = params
if ' %(stitle)s ' in self . params [ ' outtmpl ' ] :
self . to_stderr ( u ' WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead. ' )
@staticmethod
def format_bytes ( bytes ) :
if bytes is None :
return ' N/A '
if type ( bytes ) is str :
bytes = float ( bytes )
if bytes == 0.0 :
exponent = 0
else :
exponent = int ( math . log ( bytes , 1024.0 ) )
suffix = ' bkMGTPEZY ' [ exponent ]
converted = float ( bytes ) / float ( 1024 * * exponent )
return ' %.2f %s ' % ( converted , suffix )
@staticmethod
def calc_percent ( byte_counter , data_len ) :
if data_len is None :
return ' ---.- % '
return ' %6s ' % ( ' %3.1f %% ' % ( float ( byte_counter ) / float ( data_len ) * 100.0 ) )
@staticmethod
def calc_eta ( start , now , total , current ) :
if total is None :
return ' --:-- '
dif = now - start
if current == 0 or dif < 0.001 : # One millisecond
return ' --:-- '
rate = float ( current ) / dif
eta = int ( ( float ( total ) - float ( current ) ) / rate )
( eta_mins , eta_secs ) = divmod ( eta , 60 )
if eta_mins > 99 :
return ' --:-- '
return ' %02d : %02d ' % ( eta_mins , eta_secs )
@staticmethod
def calc_speed ( start , now , bytes ) :
dif = now - start
if bytes == 0 or dif < 0.001 : # One millisecond
return ' %10s ' % ' ---b/s '
return ' %10s ' % ( ' %s /s ' % FileDownloader . format_bytes ( float ( bytes ) / dif ) )
@staticmethod
def best_block_size ( elapsed_time , bytes ) :
new_min = max ( bytes / 2.0 , 1.0 )
new_max = min ( max ( bytes * 2.0 , 1.0 ) , 4194304 ) # Do not surpass 4 MB
if elapsed_time < 0.001 :
return int ( new_max )
rate = bytes / elapsed_time
if rate > new_max :
return int ( new_max )
if rate < new_min :
return int ( new_min )
return int ( rate )
@staticmethod
def parse_bytes ( bytestr ) :
""" Parse a string indicating a byte quantity into an integer. """
matchobj = re . match ( r ' (?i)^( \ d+(?: \ . \ d+)?)([kMGTPEZY]?)$ ' , bytestr )
if matchobj is None :
return None
number = float ( matchobj . group ( 1 ) )
multiplier = 1024.0 * * ' bkmgtpezy ' . index ( matchobj . group ( 2 ) . lower ( ) )
return int ( round ( number * multiplier ) )
def add_info_extractor ( self , ie ) :
""" Add an InfoExtractor object to the end of the list. """
self . _ies . append ( ie )
ie . set_downloader ( self )
def add_post_processor ( self , pp ) :
""" Add a PostProcessor object to the end of the chain. """
self . _pps . append ( pp )
pp . set_downloader ( self )
def to_screen ( self , message , skip_eol = False ) :
""" Print message to stdout if not in quiet mode. """
assert type ( message ) == type ( u ' ' )
if not self . params . get ( ' quiet ' , False ) :
terminator = [ u ' \n ' , u ' ' ] [ skip_eol ]
output = message + terminator
if ' b ' in getattr ( self . _screen_file , ' mode ' , ' ' ) or sys . version_info [ 0 ] < 3 : # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output . encode ( preferredencoding ( ) , ' ignore ' )
self . _screen_file . write ( output )
self . _screen_file . flush ( )
def to_stderr ( self , message ) :
""" Print message to stderr. """
assert type ( message ) == type ( u ' ' )
output = message + u ' \n '
if ' b ' in getattr ( self . _screen_file , ' mode ' , ' ' ) or sys . version_info [ 0 ] < 3 : # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output . encode ( preferredencoding ( ) )
sys . stderr . write ( output )
def to_cons_title ( self , message ) :
""" Set console/terminal window title to message. """
if not self . params . get ( ' consoletitle ' , False ) :
return
if os . name == ' nt ' and ctypes . windll . kernel32 . GetConsoleWindow ( ) :
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes . windll . kernel32 . SetConsoleTitleW ( ctypes . c_wchar_p ( message ) )
elif ' TERM ' in os . environ :
2013-02-09 22:58:12 +02:00
self . to_screen ( ' \033 ]0; %s \007 ' % message , skip_eol = True )
2012-11-28 02:04:46 +01:00
def fixed_template ( self ) :
""" Checks if the output template is fixed. """
return ( re . search ( u ' (?u) % \\ (.+? \\ )s ' , self . params [ ' outtmpl ' ] ) is None )
2013-01-01 20:27:53 +01:00
def trouble ( self , message = None , tb = None ) :
2012-11-28 02:04:46 +01:00
""" Determine action to take when a download problem appears.
Depending on if the downloader has been configured to ignore
download errors or not , this method may throw an exception or
not when errors are found , after printing the message .
2013-01-03 15:39:55 +01:00
tb , if given , is additional traceback information .
2012-11-28 02:04:46 +01:00
"""
if message is not None :
self . to_stderr ( message )
2012-12-11 11:02:21 +01:00
if self . params . get ( ' verbose ' ) :
2013-01-01 20:27:53 +01:00
if tb is None :
2013-01-03 15:39:55 +01:00
tb_data = traceback . format_list ( traceback . extract_stack ( ) )
tb = u ' ' . join ( tb_data )
2013-01-01 20:27:53 +01:00
self . to_stderr ( tb )
2012-11-28 02:04:46 +01:00
if not self . params . get ( ' ignoreerrors ' , False ) :
raise DownloadError ( message )
self . _download_retcode = 1
def slow_down ( self , start_time , byte_counter ) :
""" Sleep if the download speed is over the rate limit. """
rate_limit = self . params . get ( ' ratelimit ' , None )
if rate_limit is None or byte_counter == 0 :
return
now = time . time ( )
elapsed = now - start_time
if elapsed < = 0.0 :
return
speed = float ( byte_counter ) / elapsed
if speed > rate_limit :
time . sleep ( ( byte_counter - rate_limit * ( now - start_time ) ) / rate_limit )
def temp_name ( self , filename ) :
""" Returns a temporary filename for the given filename. """
if self . params . get ( ' nopart ' , False ) or filename == u ' - ' or \
( os . path . exists ( encodeFilename ( filename ) ) and not os . path . isfile ( encodeFilename ( filename ) ) ) :
return filename
return filename + u ' .part '
def undo_temp_name ( self , filename ) :
if filename . endswith ( u ' .part ' ) :
return filename [ : - len ( u ' .part ' ) ]
return filename
def try_rename ( self , old_filename , new_filename ) :
try :
if old_filename == new_filename :
return
os . rename ( encodeFilename ( old_filename ) , encodeFilename ( new_filename ) )
except ( IOError , OSError ) as err :
self . trouble ( u ' ERROR: unable to rename file ' )
def try_utime ( self , filename , last_modified_hdr ) :
""" Try to set the last-modified time of the given file. """
if last_modified_hdr is None :
return
if not os . path . isfile ( encodeFilename ( filename ) ) :
return
timestr = last_modified_hdr
if timestr is None :
return
filetime = timeconvert ( timestr )
if filetime is None :
return filetime
try :
os . utime ( filename , ( time . time ( ) , filetime ) )
except :
pass
return filetime
def report_writedescription ( self , descfn ) :
""" Report that the description file is being written """
self . to_screen ( u ' [info] Writing video description to: ' + descfn )
def report_writesubtitles ( self , srtfn ) :
""" Report that the subtitles file is being written """
self . to_screen ( u ' [info] Writing video subtitles to: ' + srtfn )
def report_writeinfojson ( self , infofn ) :
""" Report that the metadata file has been written """
self . to_screen ( u ' [info] Video description metadata as JSON to: ' + infofn )
def report_destination ( self , filename ) :
""" Report destination filename. """
self . to_screen ( u ' [download] Destination: ' + filename )
def report_progress ( self , percent_str , data_len_str , speed_str , eta_str ) :
""" Report download progress. """
if self . params . get ( ' noprogress ' , False ) :
return
2013-02-18 18:52:06 +01:00
if self . params . get ( ' progress_with_newline ' , False ) :
2013-02-13 16:43:08 +13:00
self . to_screen ( u ' [download] %s of %s at %s ETA %s ' %
2013-02-13 14:02:31 +13:00
( percent_str , data_len_str , speed_str , eta_str ) )
2013-02-18 18:52:06 +01:00
else :
self . to_screen ( u ' \r [download] %s of %s at %s ETA %s ' %
2012-11-28 02:04:46 +01:00
( percent_str , data_len_str , speed_str , eta_str ) , skip_eol = True )
self . to_cons_title ( u ' youtube-dl - %s of %s at %s ETA %s ' %
( percent_str . strip ( ) , data_len_str . strip ( ) , speed_str . strip ( ) , eta_str . strip ( ) ) )
def report_resuming_byte ( self , resume_len ) :
""" Report attempt to resume at given byte. """
self . to_screen ( u ' [download] Resuming download at byte %s ' % resume_len )
def report_retry ( self , count , retries ) :
""" Report retry in case of HTTP error 5xx """
self . to_screen ( u ' [download] Got server HTTP error. Retrying (attempt %d of %d )... ' % ( count , retries ) )
def report_file_already_downloaded ( self , file_name ) :
""" Report file has already been fully downloaded. """
try :
self . to_screen ( u ' [download] %s has already been downloaded ' % file_name )
except ( UnicodeEncodeError ) as err :
self . to_screen ( u ' [download] The file has already been downloaded ' )
def report_unable_to_resume ( self ) :
""" Report it was impossible to resume download. """
self . to_screen ( u ' [download] Unable to resume ' )
def report_finish ( self ) :
""" Report download finished. """
if self . params . get ( ' noprogress ' , False ) :
self . to_screen ( u ' [download] Download completed ' )
else :
self . to_screen ( u ' ' )
def increment_downloads ( self ) :
""" Increment the ordinal that assigns a number to each file. """
self . _num_downloads + = 1
def prepare_filename ( self , info_dict ) :
""" Generate the output filename. """
try :
template_dict = dict ( info_dict )
template_dict [ ' epoch ' ] = int ( time . time ( ) )
template_dict [ ' autonumber ' ] = u ' %05d ' % self . _num_downloads
2012-12-03 15:36:24 +01:00
sanitize = lambda k , v : sanitize_filename (
u ' NA ' if v is None else compat_str ( v ) ,
restricted = self . params . get ( ' restrictfilenames ' ) ,
is_id = ( k == u ' id ' ) )
template_dict = dict ( ( k , sanitize ( k , v ) ) for k , v in template_dict . items ( ) )
2012-11-28 02:04:46 +01:00
filename = self . params [ ' outtmpl ' ] % template_dict
return filename
except ( ValueError , KeyError ) as err :
self . trouble ( u ' ERROR: invalid system charset or erroneous output template ' )
return None
def _match_entry ( self , info_dict ) :
""" Returns None iff the file should be downloaded """
title = info_dict [ ' title ' ]
matchtitle = self . params . get ( ' matchtitle ' , False )
if matchtitle :
matchtitle = matchtitle . decode ( ' utf8 ' )
if not re . search ( matchtitle , title , re . IGNORECASE ) :
return u ' [download] " ' + title + ' " title did not match pattern " ' + matchtitle + ' " '
rejecttitle = self . params . get ( ' rejecttitle ' , False )
if rejecttitle :
rejecttitle = rejecttitle . decode ( ' utf8 ' )
if re . search ( rejecttitle , title , re . IGNORECASE ) :
return u ' " ' + title + ' " title matched reject pattern " ' + rejecttitle + ' " '
return None
def process_info ( self , info_dict ) :
""" Process a single dictionary returned by an InfoExtractor. """
# Keep for backwards compatibility
info_dict [ ' stitle ' ] = info_dict [ ' title ' ]
if not ' format ' in info_dict :
info_dict [ ' format ' ] = info_dict [ ' ext ' ]
reason = self . _match_entry ( info_dict )
if reason is not None :
self . to_screen ( u ' [download] ' + reason )
return
max_downloads = self . params . get ( ' max_downloads ' )
if max_downloads is not None :
if self . _num_downloads > int ( max_downloads ) :
raise MaxDownloadsReached ( )
filename = self . prepare_filename ( info_dict )
# Forced printings
if self . params . get ( ' forcetitle ' , False ) :
compat_print ( info_dict [ ' title ' ] )
if self . params . get ( ' forceurl ' , False ) :
compat_print ( info_dict [ ' url ' ] )
if self . params . get ( ' forcethumbnail ' , False ) and ' thumbnail ' in info_dict :
compat_print ( info_dict [ ' thumbnail ' ] )
if self . params . get ( ' forcedescription ' , False ) and ' description ' in info_dict :
compat_print ( info_dict [ ' description ' ] )
if self . params . get ( ' forcefilename ' , False ) and filename is not None :
compat_print ( filename )
if self . params . get ( ' forceformat ' , False ) :
compat_print ( info_dict [ ' format ' ] )
# Do nothing else if in simulate mode
if self . params . get ( ' simulate ' , False ) :
return
if filename is None :
return
try :
dn = os . path . dirname ( encodeFilename ( filename ) )
if dn != ' ' and not os . path . exists ( dn ) : # dn is already encoded
os . makedirs ( dn )
except ( OSError , IOError ) as err :
self . trouble ( u ' ERROR: unable to create directory ' + compat_str ( err ) )
return
if self . params . get ( ' writedescription ' , False ) :
try :
descfn = filename + u ' .description '
self . report_writedescription ( descfn )
2012-12-20 13:25:54 +01:00
with io . open ( encodeFilename ( descfn ) , ' w ' , encoding = ' utf-8 ' ) as descfile :
descfile . write ( info_dict [ ' description ' ] )
2012-11-28 02:04:46 +01:00
except ( OSError , IOError ) :
self . trouble ( u ' ERROR: Cannot write description file ' + descfn )
return
if self . params . get ( ' writesubtitles ' , False ) and ' subtitles ' in info_dict and info_dict [ ' subtitles ' ] :
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
try :
srtfn = filename . rsplit ( ' . ' , 1 ) [ 0 ] + u ' .srt '
self . report_writesubtitles ( srtfn )
2012-12-20 14:18:23 +01:00
with io . open ( encodeFilename ( srtfn ) , ' w ' , encoding = ' utf-8 ' ) as srtfile :
srtfile . write ( info_dict [ ' subtitles ' ] )
2012-11-28 02:04:46 +01:00
except ( OSError , IOError ) :
self . trouble ( u ' ERROR: Cannot write subtitles file ' + descfn )
return
if self . params . get ( ' writeinfojson ' , False ) :
infofn = filename + u ' .info.json '
self . report_writeinfojson ( infofn )
try :
2012-12-20 13:13:24 +01:00
json_info_dict = dict ( ( k , v ) for k , v in info_dict . items ( ) if not k in [ ' urlhandle ' ] )
write_json_file ( json_info_dict , encodeFilename ( infofn ) )
2012-11-28 02:04:46 +01:00
except ( OSError , IOError ) :
self . trouble ( u ' ERROR: Cannot write metadata to JSON file ' + infofn )
return
if not self . params . get ( ' skip_download ' , False ) :
if self . params . get ( ' nooverwrites ' , False ) and os . path . exists ( encodeFilename ( filename ) ) :
success = True
else :
try :
success = self . _do_download ( filename , info_dict )
except ( OSError , IOError ) as err :
2012-11-28 18:21:06 +01:00
raise UnavailableVideoError ( )
2012-11-28 02:04:46 +01:00
except ( compat_urllib_error . URLError , compat_http_client . HTTPException , socket . error ) as err :
self . trouble ( u ' ERROR: unable to download video data: %s ' % str ( err ) )
return
except ( ContentTooShortError , ) as err :
self . trouble ( u ' ERROR: content too short (expected %s bytes and served %s ) ' % ( err . expected , err . downloaded ) )
return
if success :
try :
self . post_process ( filename , info_dict )
except ( PostProcessingError ) as err :
self . trouble ( u ' ERROR: postprocessing: %s ' % str ( err ) )
return
def download ( self , url_list ) :
""" Download a given list of URLs. """
if len ( url_list ) > 1 and self . fixed_template ( ) :
raise SameFileError ( self . params [ ' outtmpl ' ] )
for url in url_list :
suitable_found = False
for ie in self . _ies :
# Go to next InfoExtractor if not suitable
if not ie . suitable ( url ) :
continue
# Warn if the _WORKING attribute is False
if not ie . working ( ) :
2013-01-01 20:27:53 +01:00
self . to_stderr ( u ' WARNING: the program functionality for this site has been marked as broken, '
u ' and will probably not work. If you want to go on, use the -i option. ' )
2012-11-28 02:04:46 +01:00
# Suitable InfoExtractor found
suitable_found = True
# Extract information from URL and process it
2013-01-01 20:27:53 +01:00
try :
videos = ie . extract ( url )
except ExtractorError as de : # An error we somewhat expected
2013-01-03 15:39:55 +01:00
self . trouble ( u ' ERROR: ' + compat_str ( de ) , de . format_traceback ( ) )
2013-01-01 20:27:53 +01:00
break
except Exception as e :
if self . params . get ( ' ignoreerrors ' , False ) :
self . trouble ( u ' ERROR: ' + compat_str ( e ) , tb = compat_str ( traceback . format_exc ( ) ) )
break
else :
raise
2012-12-31 19:09:37 +01:00
2012-12-31 19:21:28 +01:00
if len ( videos or [ ] ) > 1 and self . fixed_template ( ) :
2012-12-31 19:09:37 +01:00
raise SameFileError ( self . params [ ' outtmpl ' ] )
2012-11-28 02:04:46 +01:00
for video in videos or [ ] :
video [ ' extractor ' ] = ie . IE_NAME
try :
self . increment_downloads ( )
self . process_info ( video )
except UnavailableVideoError :
self . trouble ( u ' \n ERROR: unable to download video ' )
# Suitable InfoExtractor had been found; go to next URL
break
if not suitable_found :
self . trouble ( u ' ERROR: no suitable InfoExtractor: %s ' % url )
return self . _download_retcode
def post_process ( self , filename , ie_info ) :
2013-01-12 15:07:59 +01:00
""" Run all the postprocessors on the given file. """
2012-11-28 02:04:46 +01:00
info = dict ( ie_info )
info [ ' filepath ' ] = filename
2013-01-12 15:07:59 +01:00
keep_video = None
2012-11-28 02:04:46 +01:00
for pp in self . _pps :
2013-01-12 15:07:59 +01:00
try :
keep_video_wish , new_info = pp . run ( info )
if keep_video_wish is not None :
if keep_video_wish :
keep_video = keep_video_wish
elif keep_video is None :
# No clear decision yet, let IE decide
keep_video = keep_video_wish
except PostProcessingError as e :
self . to_stderr ( u ' ERROR: ' + e . msg )
2013-01-12 15:12:28 +01:00
if keep_video is False and not self . params . get ( ' keepvideo ' , False ) :
2013-01-12 15:07:59 +01:00
try :
self . to_stderr ( u ' Deleting original file %s (pass -k to keep) ' % filename )
os . remove ( encodeFilename ( filename ) )
except ( IOError , OSError ) :
self . to_stderr ( u ' WARNING: Unable to remove downloaded video file ' )
2012-11-28 02:04:46 +01:00
2013-01-03 20:26:38 +01:00
def _download_with_rtmpdump ( self , filename , url , player_url , page_url ) :
2012-11-28 02:04:46 +01:00
self . report_destination ( filename )
tmpfilename = self . temp_name ( filename )
# Check for rtmpdump first
try :
subprocess . call ( [ ' rtmpdump ' , ' -h ' ] , stdout = ( file ( os . path . devnull , ' w ' ) ) , stderr = subprocess . STDOUT )
except ( OSError , IOError ) :
self . trouble ( u ' ERROR: RTMP download detected but " rtmpdump " could not be run ' )
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
2013-01-03 20:26:38 +01:00
basic_args = [ ' rtmpdump ' , ' -q ' , ' -r ' , url , ' -o ' , tmpfilename ]
if player_url is not None :
basic_args + = [ ' -W ' , player_url ]
if page_url is not None :
basic_args + = [ ' --pageUrl ' , page_url ]
2012-11-28 02:04:46 +01:00
args = basic_args + [ [ ] , [ ' -e ' , ' -k ' , ' 1 ' ] ] [ self . params . get ( ' continuedl ' , False ) ]
if self . params . get ( ' verbose ' , False ) :
try :
import pipes
shell_quote = lambda args : ' ' . join ( map ( pipes . quote , args ) )
except ImportError :
shell_quote = repr
self . to_screen ( u ' [debug] rtmpdump command line: ' + shell_quote ( args ) )
retval = subprocess . call ( args )
while retval == 2 or retval == 1 :
prevsize = os . path . getsize ( encodeFilename ( tmpfilename ) )
self . to_screen ( u ' \r [rtmpdump] %s bytes ' % prevsize , skip_eol = True )
time . sleep ( 5.0 ) # This seems to be needed
retval = subprocess . call ( basic_args + [ ' -e ' ] + [ [ ] , [ ' -k ' , ' 1 ' ] ] [ retval == 1 ] )
cursize = os . path . getsize ( encodeFilename ( tmpfilename ) )
if prevsize == cursize and retval == 1 :
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024 :
self . to_screen ( u ' \r [rtmpdump] Could not download the whole video. This can happen for some advertisements. ' )
retval = 0
break
if retval == 0 :
2013-01-12 20:34:50 +01:00
fsize = os . path . getsize ( encodeFilename ( tmpfilename ) )
self . to_screen ( u ' \r [rtmpdump] %s bytes ' % fsize )
2012-11-28 02:04:46 +01:00
self . try_rename ( tmpfilename , filename )
2013-01-12 20:34:50 +01:00
self . _hook_progress ( {
' downloaded_bytes ' : fsize ,
' total_bytes ' : fsize ,
' filename ' : filename ,
' status ' : ' finished ' ,
} )
2012-11-28 02:04:46 +01:00
return True
else :
self . trouble ( u ' \n ERROR: rtmpdump exited with code %d ' % retval )
return False
def _do_download ( self , filename , info_dict ) :
url = info_dict [ ' url ' ]
# Check file already present
if self . params . get ( ' continuedl ' , False ) and os . path . isfile ( encodeFilename ( filename ) ) and not self . params . get ( ' nopart ' , False ) :
self . report_file_already_downloaded ( filename )
2013-01-12 20:34:50 +01:00
self . _hook_progress ( {
' filename ' : filename ,
' status ' : ' finished ' ,
} )
2012-11-28 02:04:46 +01:00
return True
# Attempt to download using rtmpdump
if url . startswith ( ' rtmp ' ) :
2013-01-03 20:26:38 +01:00
return self . _download_with_rtmpdump ( filename , url ,
info_dict . get ( ' player_url ' , None ) ,
info_dict . get ( ' page_url ' , None ) )
2012-11-28 02:04:46 +01:00
tmpfilename = self . temp_name ( filename )
stream = None
# Do not include the Accept-Encoding header
headers = { ' Youtubedl-no-compression ' : ' True ' }
2013-01-12 16:49:13 +01:00
if ' user_agent ' in info_dict :
headers [ ' Youtubedl-user-agent ' ] = info_dict [ ' user_agent ' ]
2012-11-28 02:04:46 +01:00
basic_request = compat_urllib_request . Request ( url , None , headers )
request = compat_urllib_request . Request ( url , None , headers )
2012-12-11 09:49:27 +01:00
if self . params . get ( ' test ' , False ) :
request . add_header ( ' Range ' , ' bytes=0-10240 ' )
2012-11-28 02:04:46 +01:00
# Establish possible resume length
if os . path . isfile ( encodeFilename ( tmpfilename ) ) :
resume_len = os . path . getsize ( encodeFilename ( tmpfilename ) )
else :
resume_len = 0
open_mode = ' wb '
if resume_len != 0 :
if self . params . get ( ' continuedl ' , False ) :
self . report_resuming_byte ( resume_len )
request . add_header ( ' Range ' , ' bytes= %d - ' % resume_len )
open_mode = ' ab '
else :
resume_len = 0
count = 0
retries = self . params . get ( ' retries ' , 0 )
while count < = retries :
# Establish connection
try :
if count == 0 and ' urlhandle ' in info_dict :
data = info_dict [ ' urlhandle ' ]
data = compat_urllib_request . urlopen ( request )
break
except ( compat_urllib_error . HTTPError , ) as err :
if ( err . code < 500 or err . code > = 600 ) and err . code != 416 :
# Unexpected HTTP error
raise
elif err . code == 416 :
# Unable to resume (requested range not satisfiable)
try :
# Open the connection again without the range header
data = compat_urllib_request . urlopen ( basic_request )
content_length = data . info ( ) [ ' Content-Length ' ]
except ( compat_urllib_error . HTTPError , ) as err :
if err . code < 500 or err . code > = 600 :
raise
else :
# Examine the reported length
if ( content_length is not None and
( resume_len - 100 < int ( content_length ) < resume_len + 100 ) ) :
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# changing the file size slightly and causing problems for some users. So
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self . report_file_already_downloaded ( filename )
self . try_rename ( tmpfilename , filename )
2013-01-12 20:34:50 +01:00
self . _hook_progress ( {
' filename ' : filename ,
' status ' : ' finished ' ,
} )
2012-11-28 02:04:46 +01:00
return True
else :
# The length does not match, we start the download over
self . report_unable_to_resume ( )
open_mode = ' wb '
break
# Retry
count + = 1
if count < = retries :
self . report_retry ( count , retries )
if count > retries :
self . trouble ( u ' ERROR: giving up after %s retries ' % retries )
return False
data_len = data . info ( ) . get ( ' Content-length ' , None )
if data_len is not None :
data_len = int ( data_len ) + resume_len
2013-01-22 00:50:42 -05:00
min_data_len = self . params . get ( " min_filesize " , None )
max_data_len = self . params . get ( " max_filesize " , None )
if min_data_len is not None and data_len < min_data_len :
self . to_screen ( u ' \r [download] File is smaller than min-filesize ( %s bytes < %s bytes). Aborting. ' % ( data_len , min_data_len ) )
return False
if max_data_len is not None and data_len > max_data_len :
self . to_screen ( u ' \r [download] File is larger than max-filesize ( %s bytes > %s bytes). Aborting. ' % ( data_len , max_data_len ) )
return False
2012-11-28 02:04:46 +01:00
data_len_str = self . format_bytes ( data_len )
byte_counter = 0 + resume_len
block_size = self . params . get ( ' buffersize ' , 1024 )
start = time . time ( )
while True :
# Download and write
before = time . time ( )
data_block = data . read ( block_size )
after = time . time ( )
if len ( data_block ) == 0 :
break
byte_counter + = len ( data_block )
# Open file just in time
if stream is None :
try :
( stream , tmpfilename ) = sanitize_open ( tmpfilename , open_mode )
assert stream is not None
filename = self . undo_temp_name ( tmpfilename )
self . report_destination ( filename )
except ( OSError , IOError ) as err :
self . trouble ( u ' ERROR: unable to open for writing: %s ' % str ( err ) )
return False
try :
stream . write ( data_block )
except ( IOError , OSError ) as err :
self . trouble ( u ' \n ERROR: unable to write data: %s ' % str ( err ) )
return False
if not self . params . get ( ' noresizebuffer ' , False ) :
block_size = self . best_block_size ( after - before , len ( data_block ) )
# Progress message
speed_str = self . calc_speed ( start , time . time ( ) , byte_counter - resume_len )
if data_len is None :
self . report_progress ( ' Unknown % ' , data_len_str , speed_str , ' Unknown ETA ' )
else :
percent_str = self . calc_percent ( byte_counter , data_len )
eta_str = self . calc_eta ( start , time . time ( ) , data_len - resume_len , byte_counter - resume_len )
self . report_progress ( percent_str , data_len_str , speed_str , eta_str )
2013-01-12 20:34:50 +01:00
self . _hook_progress ( {
' downloaded_bytes ' : byte_counter ,
' total_bytes ' : data_len ,
' tmpfilename ' : tmpfilename ,
' filename ' : filename ,
' status ' : ' downloading ' ,
} )
2012-11-28 02:04:46 +01:00
# Apply rate limit
self . slow_down ( start , byte_counter - resume_len )
if stream is None :
self . trouble ( u ' \n ERROR: Did not get any data blocks ' )
return False
stream . close ( )
self . report_finish ( )
if data_len is not None and byte_counter != data_len :
raise ContentTooShortError ( byte_counter , int ( data_len ) )
self . try_rename ( tmpfilename , filename )
# Update file modification time
if self . params . get ( ' updatetime ' , True ) :
info_dict [ ' filetime ' ] = self . try_utime ( filename , data . info ( ) . get ( ' last-modified ' , None ) )
2013-01-12 20:34:50 +01:00
self . _hook_progress ( {
' downloaded_bytes ' : byte_counter ,
' total_bytes ' : byte_counter ,
' filename ' : filename ,
' status ' : ' finished ' ,
} )
2012-11-28 02:04:46 +01:00
return True
2013-01-12 20:34:50 +01:00
def _hook_progress ( self , status ) :
for ph in self . _progress_hooks :
ph ( status )
def add_progress_hook ( self , ph ) :
""" ph gets called on download progress, with a dictionary with the entries
* filename : The final filename
* status : One of " downloading " and " finished "
It can also have some of the following entries :
* downloaded_bytes : Bytes on disks
* total_bytes : Total bytes , None if unknown
* tmpfilename : The filename we ' re currently writing to
Hooks are guaranteed to be called at least once ( with status " finished " )
if the download is successful .
"""
self . _progress_hooks . append ( ph )