Compare commits

..

38 Commits

Author SHA1 Message Date
Philipp Hagemeister
8958b6916c release 2013.12.23.4 2013-12-23 05:08:35 +01:00
Philipp Hagemeister
9fc3bef87a Merge remote-tracking branch 'jaimeMF/split-downloaders' 2013-12-23 05:03:32 +01:00
Philipp Hagemeister
d80044c235 [youtube] Prefer videos with sound 2013-12-23 04:51:42 +01:00
Philipp Hagemeister
bc2103f3bf release 2013.12.23.3 2013-12-23 04:39:55 +01:00
Philipp Hagemeister
f82b18efc1 Merge remote-tracking branch 'rzhxeo/youtube' 2013-12-23 04:37:40 +01:00
Philipp Hagemeister
504c668d3b release 2013.12.23.2 2013-12-23 04:31:45 +01:00
Philipp Hagemeister
466617f539 [bliptv] Simplify (From #2000) 2013-12-23 04:31:38 +01:00
Philipp Hagemeister
196938835a Remove debugging code
Introduced by accident in 5d681e960d
2013-12-23 04:30:57 +01:00
Philipp Hagemeister
a94e129a65 release 2013.12.23.1 2013-12-23 04:20:25 +01:00
Philipp Hagemeister
5d681e960d Use bidiv instead of fribidi if available (Fixes #1912) 2013-12-23 04:19:50 +01:00
Philipp Hagemeister
c7b487d96b release 2013.12.23 2013-12-23 03:45:02 +01:00
Philipp Hagemeister
7dbf5ae587 [smotri] Add support for moderated (?) videos (Fixes #2030) 2013-12-23 03:44:47 +01:00
Philipp Hagemeister
8d0bdeba18 [smotri] Make optional attributes optional 2013-12-23 03:38:29 +01:00
Philipp Hagemeister
1b969041d7 [blinkx] Support mobile URLs (Closes #2022) 2013-12-22 07:43:54 +01:00
Philipp Hagemeister
e302f9ce32 [youtube:user] Speed up --match-title 2013-12-22 03:57:42 +01:00
Philipp Hagemeister
5a94982abe Remove unused import 2013-12-22 03:52:12 +01:00
Philipp Hagemeister
7115ca84aa [vimeo/generic] Add support for embedded SWF vimeo videos 2013-12-22 03:34:13 +01:00
Philipp Hagemeister
04ff34ab89 Show all matching URLs 2013-12-22 03:25:55 +01:00
Philipp Hagemeister
bbafbe20c2 [vimeo] Better formatting for regexp 2013-12-22 03:21:28 +01:00
Philipp Hagemeister
c4d55a33fc [brightcove] Test checksum changed 2013-12-20 17:28:50 +01:00
Philipp Hagemeister
147e4aece0 [vbox7] New video checksum 2013-12-20 17:27:43 +01:00
Philipp Hagemeister
bd1488ae64 [mdr] Remove test
For context, refer to the http://de.wikipedia.org/wiki/Depublizieren
2013-12-20 17:24:48 +01:00
Philipp Hagemeister
79fed2a4df [crunchyroll] Fix test (#1721) 2013-12-20 17:20:39 +01:00
Philipp Hagemeister
304cbe981e Merge remote-tracking branch 'rzhxeo/crunchyroll' 2013-12-20 17:13:26 +01:00
Philipp Hagemeister
3fefbf50e3 Merge pull request #2005 from dstftw/ivi.ru
Add support for ivi.ru
2013-12-20 08:12:38 -08:00
dst
a51e37af62 [ivi] Simplify 2013-12-19 10:53:38 +07:00
dst
6c6db72ed4 [ivi] Skip tests for travis build 2013-12-19 06:19:41 +07:00
dst
5ce54a8205 [ivi] Neat import 2013-12-19 05:53:34 +07:00
dst
8c21b7c647 [ivi] Add playlist tests 2013-12-19 05:39:22 +07:00
dst
77aa6b329d [ivi] Add support for ivi.ru 2013-12-19 05:28:16 +07:00
rzhxeo
62d68c43ed Make prefer_free_formats sorting more robust 2013-12-18 21:25:13 +01:00
rzhxeo
bfaae0a768 Filter and sort videos before calling list_formats 2013-12-18 21:24:39 +01:00
rzhxeo
e56f22ae20 [YoutubeIE] Sort formats by resolution 2013-12-18 21:22:37 +01:00
rzhxeo
dbd1988ed9 [YoutubeIE] Add width and height to format dict 2013-12-18 21:21:25 +01:00
rzhxeo
4ea3be0a5c [YoutubeIE] Externalize format selection 2013-12-18 03:30:55 +01:00
Jaime Marquínez Ferrándiz
3bc2ddccc8 Move FileDownloader to its own module and create a new class for each download process
A suitable downloader can be found using the 'get_suitable_downloader' function.

Each subclass implements 'real_download', for downloading an info dict you call the 'download' method, which first checks if the video has already been downloaded
2013-12-11 16:18:48 +01:00
Jaime Marquínez Ferrándiz
8ab470f1b2 Now a new FileDownloader is created when downloading a video
The progress hooks can be added using the method "add_downloader_progress_hook"
2013-12-11 16:04:42 +01:00
rzhxeo
c8434e8316 Add support for crunchyroll.com 2013-11-09 11:25:12 +01:00
27 changed files with 1527 additions and 996 deletions

View File

@@ -39,7 +39,8 @@ which means you can modify it, redistribute it or use it however you like.
/youtube-dl .
--no-cache-dir Disable filesystem caching
--bidi-workaround Work around terminals that lack bidirectional
text support. Requires fribidi executable in PATH
text support. Requires bidiv or fribidi
executable in PATH
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)

View File

@@ -71,7 +71,7 @@ setup(
author_email='ytdl@yt-dl.org',
maintainer='Philipp Hagemeister',
maintainer_email='phihag@phihag.de',
packages=['youtube_dl', 'youtube_dl.extractor'],
packages=['youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader'],
# Provokes warning on most systems (why?!)
# test_suite = 'nose.collector',

View File

@@ -90,7 +90,7 @@ def generator(test_case):
def _hook(status):
if status['status'] == 'finished':
finished_hook_called.add(status['filename'])
ydl.fd.add_progress_hook(_hook)
ydl.add_downloader_progress_hook(_hook)
def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))

View File

@@ -27,7 +27,8 @@ from youtube_dl.extractor import (
BambuserChannelIE,
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE
SmotriUserIE,
IviCompilationIE
)
@@ -168,6 +169,24 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Building Dynamic Websites')
self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
self.assertEqual(len(result['entries']), 10)
def test_ivi_compilation(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'dezhurnyi_angel')
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012)')
self.assertTrue(len(result['entries']) >= 36)
def test_ivi_compilation_season(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20)
if __name__ == '__main__':

View File

@@ -1,724 +1,12 @@
import os
import re
import subprocess
import sys
import time
from .utils import (
compat_urllib_error,
compat_urllib_request,
ContentTooShortError,
determine_ext,
encodeFilename,
format_bytes,
sanitize_open,
timeconvert,
)
class FileDownloader(object):
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
actual video file and writing it to disk.
File downloaders accept a lot of parameters. In order not to saturate
the object constructor with arguments, it receives a dictionary of
options instead.
Available options:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
"""
params = None
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
self.ydl = ydl
self._progress_hooks = []
self.params = params
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
return '%02d:%02d' % (mins, secs)
else:
return '%02d:%02d:%02d' % (hours, mins, secs)
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
return None
return float(byte_counter) / float(data_len) * 100.0
@staticmethod
def format_percent(percent):
if percent is None:
return '---.-%'
return '%6s' % ('%3.1f%%' % percent)
@staticmethod
def calc_eta(start, now, total, current):
if total is None:
return None
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
if eta is None:
return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return int(new_max)
if rate < new_min:
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, **kargs)
def to_stderr(self, message):
self.ydl.to_screen(message)
def to_console_title(self, message):
self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
def report_warning(self, *args, **kargs):
self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit', None)
if rate_limit is None or byte_counter == 0:
return
now = time.time()
elapsed = now - start_time
if elapsed <= 0.0:
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == u'-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
return filename
return filename + u'.part'
def undo_temp_name(self, filename):
if filename.endswith(u'.part'):
return filename[:-len(u'.part')]
return filename
def try_rename(self, old_filename, new_filename):
try:
if old_filename == new_filename:
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError):
self.report_error(u'unable to rename file')
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
return
if not os.path.isfile(encodeFilename(filename)):
return
timestr = last_modified_hdr
if timestr is None:
return
filetime = timeconvert(timestr)
if filetime is None:
return filetime
# Ignore obviously invalid dates
if filetime == 0:
return
try:
os.utime(filename, (time.time(), filetime))
except:
pass
return filetime
def report_destination(self, filename):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
def _report_progress_status(self, msg, is_last_line=False):
fullmsg = u'[download] ' + msg
if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg)
else:
if os.name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length',
0)
if prev_len > len(fullmsg):
fullmsg += u' ' * (prev_len - len(fullmsg))
self._report_progress_prev_line_length = len(fullmsg)
clear_line = u'\r'
else:
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title(u'youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
msg = (u'%s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
if self.params.get('noprogress', False):
return
downloaded_str = format_bytes(downloaded_data_len)
speed_str = self.format_speed(speed)
elapsed_str = FileDownloader.format_seconds(elapsed)
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
self._report_progress_status(
(u'100%% of %s in %s' %
(data_len_str, self.format_seconds(tot_time))),
is_last_line=True)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = u''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = u'~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen(u'')
cursor_in_new_line = True
self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
return proc.returncode
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
# Check for rtmpdump first
try:
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if live:
basic_args += ['--live']
if conn:
basic_args += ['--conn', conn]
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
retval = run_rtmpdump(args)
while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'rtmpdump exited with code %d' % retval)
return False
def _download_with_mplayer(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first
try:
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
return False
# Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'mplayer exited with code %d' % retval)
return False
def _download_m3u8_with_ffmpeg(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', tmpfilename]
for program in ['avconv', 'ffmpeg']:
try:
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
break
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False
# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
from .downloader import FileDownloader as RealFileDownloader
from .downloader import get_suitable_downloader
# This class reproduces the old behaviour of FileDownloader
class FileDownloader(RealFileDownloader):
def _do_download(self, filename, info_dict):
url = info_dict['url']
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
# Attempt to download using rtmpdump
if url.startswith('rtmp'):
return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None),
info_dict.get('page_url', None),
info_dict.get('play_path', None),
info_dict.get('tc_url', None),
info_dict.get('rtmp_live', False),
info_dict.get('rtmp_conn', None))
# Attempt to download using mplayer
if url.startswith('mms') or url.startswith('rtsp'):
return self._download_with_mplayer(filename, url)
# m3u8 manifest are downloaded with ffmpeg
if determine_ext(url) == u'm3u8':
return self._download_m3u8_with_ffmpeg(filename, url)
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
if self.params.get('test', False):
request.add_header('Range','bytes=0-10240')
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', False):
self.report_resuming_byte(resume_len)
request.add_header('Range','bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
# Establish connection
try:
if count == 0 and 'urlhandle' in info_dict:
data = info_dict['urlhandle']
data = compat_urllib_request.urlopen(request)
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
raise
elif err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = compat_urllib_request.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# changing the file size slightly and causing problems for some users. So
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
open_mode = 'wb'
break
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
if count > retries:
self.report_error(u'giving up after %s retries' % retries)
return False
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
while True:
# Download and write
before = time.time()
data_block = data.read(block_size)
after = time.time()
if len(data_block) == 0:
break
byte_counter += len(data_block)
# Open file just in time
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error(u'unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError) as err:
self.to_stderr(u"\n")
self.report_error(u'unable to write data: %s' % str(err))
return False
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
eta = percent = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
if stream is None:
self.to_stderr(u"\n")
self.report_error(u'Did not get any data blocks')
return False
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True
def _hook_progress(self, status):
real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)
real_fd.add_progress_hook(ph)
return real_fd.download(filename, info_dict)

View File

@@ -53,7 +53,7 @@ from .utils import (
YoutubeDLHandler,
)
from .extractor import get_info_extractor, gen_extractors
from .FileDownloader import FileDownloader
from .downloader import get_suitable_downloader
from .version import __version__
@@ -167,7 +167,7 @@ class YoutubeDL(object):
self._ies = []
self._ies_instances = {}
self._pps = []
self._progress_hooks = []
self._fd_progress_hooks = []
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
@@ -183,12 +183,18 @@ class YoutubeDL(object):
width_args = []
else:
width_args = ['-w', str(width)]
self._fribidi = subprocess.Popen(
['fribidi', '-c', 'UTF-8'] + width_args,
sp_kwargs = dict(
stdin=subprocess.PIPE,
stdout=slave,
stderr=self._err_file)
self._fribidi_channel = os.fdopen(master, 'rb')
try:
self._output_process = subprocess.Popen(
['bidiv'] + width_args, **sp_kwargs
)
except OSError:
self._output_process = subprocess.Popen(
['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == 2:
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
@@ -205,8 +211,6 @@ class YoutubeDL(object):
u'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
self.fd = FileDownloader(self, self.params)
if '%(stitle)s' in self.params.get('outtmpl', ''):
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
@@ -242,15 +246,20 @@ class YoutubeDL(object):
self._pps.append(pp)
pp.set_downloader(self)
def add_downloader_progress_hook(self, ph):
"""Add the progress hook to the file downloader"""
self._fd_progress_hooks.append(ph)
def _bidi_workaround(self, message):
if not hasattr(self, '_fribidi_channel'):
if not hasattr(self, '_output_channel'):
return message
assert hasattr(self, '_output_process')
assert type(message) == type(u'')
line_count = message.count(u'\n') + 1
self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
self._fribidi.stdin.flush()
res = u''.join(self._fribidi_channel.readline().decode('utf-8')
self._output_process.stdin.write((message + u'\n').encode('utf-8'))
self._output_process.stdin.flush()
res = u''.join(self._output_channel.readline().decode('utf-8')
for _ in range(line_count))
return res[:-len(u'\n')]
@@ -636,7 +645,7 @@ class YoutubeDL(object):
info_dict['playlist_index'] = None
# This extractors handle format selection themselves
if info_dict['extractor'] in [u'youtube', u'Youku']:
if info_dict['extractor'] in [u'Youku']:
if download:
self.process_info(info_dict)
return info_dict
@@ -662,10 +671,6 @@ class YoutubeDL(object):
if 'ext' not in format:
format['ext'] = determine_ext(format['url'])
if self.params.get('listformats', None):
self.list_formats(info_dict)
return
format_limit = self.params.get('format_limit', None)
if format_limit:
formats = list(takewhile_inclusive(
@@ -678,9 +683,16 @@ class YoutubeDL(object):
except ValueError:
ext_ord = -1
# We only compare the extension if they have the same height and width
return (f.get('height'), f.get('width'), ext_ord)
return (f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
ext_ord)
formats = sorted(formats, key=_free_formats_key)
info_dict['formats'] = formats
if self.params.get('listformats', None):
self.list_formats(info_dict)
return
req_format = self.params.get('format', 'best')
if req_format is None:
req_format = 'best'
@@ -870,7 +882,10 @@ class YoutubeDL(object):
success = True
else:
try:
success = self.fd._do_download(filename, info_dict)
fd = get_suitable_downloader(info_dict)(self, self.params)
for ph in self._fd_progress_hooks:
fd.add_progress_hook(ph)
success = fd.download(filename, info_dict)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error(u'unable to download video data: %s' % str(err))
return

View File

@@ -56,7 +56,6 @@ from .utils import (
compat_print,
DateRange,
decodeOption,
determine_ext,
get_term_width,
DownloadError,
get_cachedir,
@@ -195,7 +194,7 @@ def parseOpts(overrideArguments=None):
type=float, default=None, help=optparse.SUPPRESS_HELP)
general.add_option(
'--bidi-workaround', dest='bidi_workaround', action='store_true',
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
selection.add_option(
@@ -525,7 +524,6 @@ def _real_main(argv=None):
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)]
all_urls = [url for url in all_urls if url not in matchedUrls]
for mu in matchedUrls:
compat_print(u' ' + mu)
sys.exit(0)

View File

@@ -1,4 +1,4 @@
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
@@ -32,6 +32,31 @@ def aes_ctr_decrypt(data, key, counter):
return decrypted_data
def aes_cbc_decrypt(data, key, iv):
"""
Decrypt with aes in CBC mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
decrypted_data=[]
previous_cipher_block = iv
for i in range(block_count):
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
block += [0]*(BLOCK_SIZE_BYTES - len(block))
decrypted_block = aes_decrypt(block, expanded_key)
decrypted_data += xor(decrypted_block, previous_cipher_block)
previous_cipher_block = block
decrypted_data = decrypted_data[:len(data)]
return decrypted_data
def key_expansion(data):
"""
Generate key schedule
@@ -75,7 +100,7 @@ def aes_encrypt(data, expanded_key):
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds+1):
data = sub_bytes(data)
@@ -83,6 +108,26 @@ def aes_encrypt(data, expanded_key):
if i != rounds:
data = mix_columns(data)
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
return data
def aes_decrypt(data, expanded_key):
"""
Decrypt one block with aes
@param {int[]} data 16-Byte cipher
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte state
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
for i in range(rounds, 0, -1):
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
if i != rounds:
data = mix_columns_inv(data)
data = shift_rows_inv(data)
data = sub_bytes_inv(data)
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
return data
@@ -139,14 +184,69 @@ SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
MIX_COLUMN_MATRIX = ((2,3,1,1),
(1,2,3,1),
(1,1,2,3),
(3,1,1,2))
SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1),
(0x1,0x2,0x3,0x1),
(0x1,0x1,0x2,0x3),
(0x3,0x1,0x1,0x2))
MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9),
(0x9,0xE,0xB,0xD),
(0xD,0x9,0xE,0xB),
(0xB,0xD,0x9,0xE))
RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
def sub_bytes(data):
return [SBOX[x] for x in data]
def sub_bytes_inv(data):
return [SBOX_INV[x] for x in data]
def rotate(data):
return data[1:] + [data[0]]
@@ -160,30 +260,31 @@ def key_schedule_core(data, rcon_iteration):
def xor(data1, data2):
return [x^y for x, y in zip(data1, data2)]
def mix_column(data):
def rijndael_mul(a, b):
if(a==0 or b==0):
return 0
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
def mix_column(data, matrix):
data_mixed = []
for row in range(4):
mixed = 0
for column in range(4):
addend = data[column]
if MIX_COLUMN_MATRIX[row][column] in (2,3):
addend <<= 1
if addend > 0xff:
addend &= 0xff
addend ^= 0x1b
if MIX_COLUMN_MATRIX[row][column] == 3:
addend ^= data[column]
mixed ^= addend & 0xff
# xor is (+) and (-)
mixed ^= rijndael_mul(data[column], matrix[row][column])
data_mixed.append(mixed)
return data_mixed
def mix_columns(data):
def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
data_mixed = []
for i in range(4):
column = data[i*4 : (i+1)*4]
data_mixed += mix_column(column)
data_mixed += mix_column(column, matrix)
return data_mixed
def mix_columns_inv(data):
return mix_columns(data, MIX_COLUMN_MATRIX_INV)
def shift_rows(data):
data_shifted = []
for column in range(4):
@@ -191,6 +292,13 @@ def shift_rows(data):
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
return data_shifted
def shift_rows_inv(data):
data_shifted = []
for column in range(4):
for row in range(4):
data_shifted.append( data[((column - row) & 0b11) * 4 + row] )
return data_shifted
def inc(data):
data = data[:] # copy
for i in range(len(data)-1,-1,-1):

View File

@@ -0,0 +1,23 @@
from .common import FileDownloader
from .hls import HlsFD
from .http import HttpFD
from .mplayer import MplayerFD
from .rtmp import RtmpFD
from ..utils import (
determine_ext,
)
def get_suitable_downloader(info_dict):
"""Get the downloader class that can handle the info dict."""
url = info_dict['url']
if url.startswith('rtmp'):
return RtmpFD
if determine_ext(url) == u'm3u8':
return HlsFD
if url.startswith('mms') or url.startswith('rtsp'):
return MplayerFD
else:
return HttpFD

View File

@@ -0,0 +1,321 @@
import math
import os
import re
import subprocess
import sys
import time
from ..utils import (
encodeFilename,
timeconvert,
format_bytes,
)
class FileDownloader(object):
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
actual video file and writing it to disk.
File downloaders accept a lot of parameters. In order not to saturate
the object constructor with arguments, it receives a dictionary of
options instead.
Available options:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
Subclasses of this one must re-define the real_download method.
"""
params = None
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
self.ydl = ydl
self._progress_hooks = []
self.params = params
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
return '%02d:%02d' % (mins, secs)
else:
return '%02d:%02d:%02d' % (hours, mins, secs)
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
return None
return float(byte_counter) / float(data_len) * 100.0
@staticmethod
def format_percent(percent):
if percent is None:
return '---.-%'
return '%6s' % ('%3.1f%%' % percent)
@staticmethod
def calc_eta(start, now, total, current):
if total is None:
return None
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
if eta is None:
return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return int(new_max)
if rate < new_min:
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, **kargs)
def to_stderr(self, message):
self.ydl.to_screen(message)
def to_console_title(self, message):
self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
def report_warning(self, *args, **kargs):
self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit', None)
if rate_limit is None or byte_counter == 0:
return
now = time.time()
elapsed = now - start_time
if elapsed <= 0.0:
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == u'-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
return filename
return filename + u'.part'
def undo_temp_name(self, filename):
if filename.endswith(u'.part'):
return filename[:-len(u'.part')]
return filename
def try_rename(self, old_filename, new_filename):
try:
if old_filename == new_filename:
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
self.report_error(u'unable to rename file')
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
return
if not os.path.isfile(encodeFilename(filename)):
return
timestr = last_modified_hdr
if timestr is None:
return
filetime = timeconvert(timestr)
if filetime is None:
return filetime
# Ignore obviously invalid dates
if filetime == 0:
return
try:
os.utime(filename, (time.time(), filetime))
except:
pass
return filetime
def report_destination(self, filename):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
def _report_progress_status(self, msg, is_last_line=False):
fullmsg = u'[download] ' + msg
if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg)
else:
if os.name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length',
0)
if prev_len > len(fullmsg):
fullmsg += u' ' * (prev_len - len(fullmsg))
self._report_progress_prev_line_length = len(fullmsg)
clear_line = u'\r'
else:
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title(u'youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
msg = (u'%s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
if self.params.get('noprogress', False):
return
downloaded_str = format_bytes(downloaded_data_len)
speed_str = self.format_speed(speed)
elapsed_str = FileDownloader.format_seconds(elapsed)
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
self._report_progress_status(
(u'100%% of %s in %s' %
(data_len_str, self.format_seconds(tot_time))),
is_last_line=True)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
def download(self, filename, info_dict):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
url = info_dict['url']
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
else:
return self.real_download(filename, info_dict)
def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses."""
raise NotImplementedError(u'This method must be implemented by sublcasses')
def _hook_progress(self, status):
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)

View File

@@ -0,0 +1,44 @@
import os
import subprocess
from .common import FileDownloader
from ..utils import (
encodeFilename,
)
class HlsFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', tmpfilename]
for program in ['avconv', 'ffmpeg']:
try:
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
break
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False

View File

@@ -0,0 +1,191 @@
import os
import re
import subprocess
import sys
import time
from .common import FileDownloader
from ..utils import (
compat_urllib_request,
compat_urllib_error,
ContentTooShortError,
encodeFilename,
sanitize_open,
format_bytes,
)
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
if self.params.get('test', False):
request.add_header('Range','bytes=0-10240')
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', False):
self.report_resuming_byte(resume_len)
request.add_header('Range','bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
# Establish connection
try:
if count == 0 and 'urlhandle' in info_dict:
data = info_dict['urlhandle']
data = compat_urllib_request.urlopen(request)
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
raise
elif err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = compat_urllib_request.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# changing the file size slightly and causing problems for some users. So
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
open_mode = 'wb'
break
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
if count > retries:
self.report_error(u'giving up after %s retries' % retries)
return False
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
while True:
# Download and write
before = time.time()
data_block = data.read(block_size)
after = time.time()
if len(data_block) == 0:
break
byte_counter += len(data_block)
# Open file just in time
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error(u'unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError):
self.to_stderr(u"\n")
self.report_error(u'unable to write data: %s' % str(err))
return False
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
eta = percent = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
if stream is None:
self.to_stderr(u"\n")
self.report_error(u'Did not get any data blocks')
return False
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True

View File

@@ -0,0 +1,39 @@
import os
import subprocess
from .common import FileDownloader
from ..utils import (
encodeFilename,
)
class MplayerFD(FileDownloader):
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first
try:
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
return False
# Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'mplayer exited with code %d' % retval)
return False

View File

@@ -0,0 +1,178 @@
import os
import re
import subprocess
import sys
import time
from .common import FileDownloader
from ..utils import (
encodeFilename,
format_bytes,
)
class RtmpFD(FileDownloader):
def real_download(self, filename, info_dict):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = u''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = u'~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen(u'')
cursor_in_new_line = True
self.to_screen(u'[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen(u'')
return proc.returncode
url = info_dict['url']
player_url = info_dict.get('player_url', None)
page_url = info_dict.get('page_url', None)
play_path = info_dict.get('play_path', None)
tc_url = info_dict.get('tc_url', None)
live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn', None)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
# Check for rtmpdump first
try:
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if live:
basic_args += ['--live']
if conn:
basic_args += ['--conn', conn]
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
retval = run_rtmpdump(args)
while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'rtmpdump exited with code %d' % retval)
return False

View File

@@ -33,6 +33,7 @@ from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import (
@@ -83,6 +84,10 @@ from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
from .internetvideoarchive import InternetVideoArchiveIE
from .ivi import (
IviIE,
IviCompilationIE
)
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE

View File

@@ -9,7 +9,7 @@ from ..utils import (
class BlinkxIE(InfoExtractor):
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
_IE_NAME = u'blinkx'
_TEST = {

View File

@@ -70,13 +70,14 @@ class BlipTVIE(InfoExtractor):
info = None
urlh = self._request_webpage(request, None, False,
u'unable to download video info webpage')
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
title = title.decode('UTF-8')
ext = ext.replace('.', '')
self.report_direct_download(title)
info = {
return {
'id': title,
'url': url,
'uploader': None,
@@ -85,49 +86,47 @@ class BlipTVIE(InfoExtractor):
'ext': ext,
'urlhandle': urlh
}
if info is None: # Regular URL
try:
json_code_bytes = urlh.read()
json_code = json_code_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
try:
json_data = json.loads(json_code)
if 'Post' in json_data:
data = json_data['Post']
else:
data = json_data
try:
json_code_bytes = urlh.read()
json_code = json_code_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
if 'additionalMedia' in data:
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
best_format = formats[-1]
video_url = best_format['url']
else:
video_url = data['media']['url']
umobj = re.match(self._URL_EXT, video_url)
if umobj is None:
raise ValueError('Can not determine filename extension')
ext = umobj.group(1)
try:
json_data = json.loads(json_code)
if 'Post' in json_data:
data = json_data['Post']
else:
data = json_data
info = {
'id': compat_str(data['item_id']),
'url': video_url,
'uploader': data['display_name'],
'upload_date': upload_date,
'title': data['title'],
'ext': ext,
'format': data['media']['mimeType'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'player_url': data['embedUrl'],
'user_agent': 'iTunes/10.6.1',
}
except (ValueError,KeyError) as err:
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
if 'additionalMedia' in data:
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
best_format = formats[-1]
video_url = best_format['url']
else:
video_url = data['media']['url']
umobj = re.match(self._URL_EXT, video_url)
if umobj is None:
raise ValueError('Can not determine filename extension')
ext = umobj.group(1)
return [info]
return {
'id': compat_str(data['item_id']),
'url': video_url,
'uploader': data['display_name'],
'upload_date': upload_date,
'title': data['title'],
'ext': ext,
'format': data['media']['mimeType'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'player_url': data['embedUrl'],
'user_agent': 'iTunes/10.6.1',
}
except (ValueError, KeyError) as err:
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
class BlipTVUserIE(InfoExtractor):

View File

@@ -26,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
u'md5': u'8eccab865181d29ec2958f32a6a754f5',
u'md5': u'5423e113865d26e40624dce2e4b45d95',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',

View File

@@ -0,0 +1,171 @@
# encoding: utf-8
import re, base64, zlib
from hashlib import sha1
from math import pow, sqrt, floor
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
bytes_to_intlist,
intlist_to_bytes,
unified_strdate,
clean_html,
)
from ..aes import (
aes_cbc_decrypt,
inc,
)
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
u'file': u'645513.flv',
#u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
u'info_dict': {
u'title': u'Wanna be the Strongest in the World Episode 1 An Idol-Wrestler is Born!',
u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
u'upload_date': u'20131013',
},
u'params': {
# rtmp
u'skip_download': True,
},
}]
_FORMAT_IDS = {
u'360': (u'60', u'106'),
u'480': (u'61', u'106'),
u'720': (u'62', u'106'),
u'1080': (u'80', u'108'),
}
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(data)
iv = bytes_to_intlist(iv)
id = int(id)
def obfuscate_key_aux(count, modulo, start):
output = list(start)
for _ in range(count):
output.append(output[-1] + output[-2])
# cut off start values
output = output[2:]
output = list(map(lambda x: x % modulo + 33, output))
return output
def obfuscate_key(key):
num1 = int(floor(pow(2, 25) * sqrt(6.9)))
num2 = (num1 ^ key) << 5
num3 = key ^ num1
num4 = num3 ^ (num3 >> 3) ^ num2
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
# Extend 160 Bit hash to 256 Bit
return shaHash + [0] * 12
key = obfuscate_key(id)
class Counter:
__value = iv
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
def _convert_subtitles_to_srt(self, subtitles):
i=1
output = u''
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
start = start.replace(u'.', u',')
end = end.replace(u'.', u',')
text = clean_html(text)
text = text.replace(u'\\N', u'\n')
if not text:
continue
output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
i+=1
return output
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
webpage_url = u'http://www.' + mobj.group('url')
video_id = mobj.group(u'video_id')
webpage = self._download_webpage(webpage_url, video_id)
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
if note_m:
raise ExtractorError(note_m)
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
video_title = re.sub(r' {2,}', u' ', video_title)
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
if not video_description:
video_description = None
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt+u'p'
streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
formats.append({
u'url': video_url,
u'play_path': video_play_path,
u'ext': 'flv',
u'format': video_format,
u'format_id': video_format,
})
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
video_id, note=u'Downloading subtitles for '+sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
if not lang_code:
continue
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
return {
u'id': video_id,
u'title': video_title,
u'description': video_description,
u'thumbnail': video_thumbnail,
u'uploader': video_uploader,
u'upload_date': video_upload_date,
u'subtitles': subtitles,
u'formats': formats,
}

View File

@@ -222,7 +222,7 @@ class GenericIE(InfoExtractor):
self.to_screen(u'Brightcove video detected.')
return self.url_result(bc_url, 'Brightcove')
# Look for embedded Vimeo player
# Look for embedded (iframe) Vimeo player
mobj = re.search(
r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
if mobj:
@@ -230,6 +230,12 @@ class GenericIE(InfoExtractor):
surl = smuggle_url(player_url, {'Referer': url})
return self.url_result(surl, 'Vimeo')
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
if mobj:
return self.url_result(mobj.group(1), 'Vimeo')
# Look for embedded YouTube player
matches = re.findall(r'''(?x)
(?:<iframe[^>]+?src=|embedSWF\(\s*)

154
youtube_dl/extractor/ivi.py Normal file
View File

@@ -0,0 +1,154 @@
# encoding: utf-8
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
ExtractorError,
)
class IviIE(InfoExtractor):
IE_DESC = u'ivi.ru'
IE_NAME = u'ivi'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
_TESTS = [
# Single movie
{
u'url': u'http://www.ivi.ru/watch/53141',
u'file': u'53141.mp4',
u'md5': u'6ff5be2254e796ed346251d117196cf4',
u'info_dict': {
u'title': u'Иван Васильевич меняет профессию',
u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346',
u'duration': 5498,
u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
},
u'skip': u'Only works from Russia',
},
# Serial's serie
{
u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
u'file': u'74791.mp4',
u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9',
u'info_dict': {
u'title': u'Дежурный ангел - 1 серия',
u'duration': 2490,
u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
},
u'skip': u'Only works from Russia',
}
]
# Sorted by quality
_known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
# Sorted by size
_known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
def _extract_description(self, html):
m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
return m.group('description') if m is not None else None
def _extract_comment_count(self, html):
m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
return int(m.group('commentcount')) if m is not None else 0
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
api_url = 'http://api.digitalaccess.ru/api/json/'
data = {u'method': u'da.content.get',
u'params': [video_id, {u'site': u's183',
u'referrer': u'http://www.ivi.ru/watch/%s' % video_id,
u'contentid': video_id
}
]
}
request = compat_urllib_request.Request(api_url, json.dumps(data))
video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON')
video_json = json.loads(video_json_page)
if u'error' in video_json:
error = video_json[u'error']
if error[u'origin'] == u'NoRedisValidData':
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True)
result = video_json[u'result']
formats = [{'url': x[u'url'],
'format_id': x[u'content_format']
} for x in result[u'files'] if x[u'content_format'] in self._known_formats]
formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
if len(formats) == 0:
self._downloader.report_warning(u'No media links available for %s' % video_id)
return
duration = result[u'duration']
compilation = result[u'compilation']
title = result[u'title']
title = '%s - %s' % (compilation, title) if compilation is not None else title
previews = result[u'preview']
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
thumbnail = previews[-1][u'url'] if len(previews) > 0 else None
video_page = self._download_webpage(url, video_id, u'Downloading video page')
description = self._extract_description(video_page)
comment_count = self._extract_comment_count(video_page)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'description': description,
'duration': duration,
'comment_count': comment_count,
'formats': formats,
}
class IviCompilationIE(InfoExtractor):
IE_DESC = u'ivi.ru compilations'
IE_NAME = u'ivi:compilation'
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
def _extract_entries(self, html, compilation_id):
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
compilation_id = mobj.group('compilationid')
season_id = mobj.group('seasonid')
if season_id is not None: # Season link
season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id)
playlist_id = '%s/season%s' % (compilation_id, season_id)
playlist_title = self._html_search_meta(u'title', season_page, u'title')
entries = self._extract_entries(season_page, compilation_id)
else: # Compilation link
compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page')
playlist_id = compilation_id
playlist_title = self._html_search_meta(u'title', compilation_page, u'title')
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
if len(seasons) == 0: # No seasons in this compilation
entries = self._extract_entries(compilation_page, compilation_id)
else:
entries = []
for season_id in seasons:
season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
compilation_id, u'Downloading season %s web page' % season_id)
entries.extend(self._extract_entries(season_page, compilation_id))
return self.playlist_result(entries, playlist_id, playlist_title)

View File

@@ -8,23 +8,8 @@ from ..utils import (
class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
_TESTS = [{
u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
u'file': u'165624.mp4',
u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815',
u'info_dict': {
u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
},
},
{
u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
u'file': u'718370.mp3',
u'md5': u'a9d21345a234c7b45dee612f290fd8d7',
u'info_dict': {
u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
},
}]
# No tests, MDR regularily deletes its videos
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)

View File

@@ -1,5 +1,6 @@
# encoding: utf-8
import os.path
import re
import json
import hashlib
@@ -10,6 +11,7 @@ from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
url_basename,
)
@@ -132,7 +134,16 @@ class SmotriIE(InfoExtractor):
# We will extract some from the video web page instead
video_page_url = 'http://' + mobj.group('url')
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
# Warning if video is unavailable
warning = self._html_search_regex(
r'<div class="videoUnModer">(.*?)</div>', video_page,
u'warning messagef', default=None)
if warning is not None:
self._downloader.report_warning(
u'Video %s may not be available; smotri said: %s ' %
(video_id, warning))
# Adult content
if re.search(u'EroConfirmText">', video_page) is not None:
self.report_age_confirmation()
@@ -148,38 +159,44 @@ class SmotriIE(InfoExtractor):
# Extract the rest of meta data
video_title = self._search_meta(u'name', video_page, u'title')
if not video_title:
video_title = video_url.rsplit('/', 1)[-1]
video_title = os.path.splitext(url_basename(video_url))[0]
video_description = self._search_meta(u'description', video_page)
END_TEXT = u' на сайте Smotri.com'
if video_description.endswith(END_TEXT):
if video_description and video_description.endswith(END_TEXT):
video_description = video_description[:-len(END_TEXT)]
START_TEXT = u'Смотреть онлайн ролик '
if video_description.startswith(START_TEXT):
if video_description and video_description.startswith(START_TEXT):
video_description = video_description[len(START_TEXT):]
video_thumbnail = self._search_meta(u'thumbnail', video_page)
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
if upload_date_str:
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
video_upload_date = (
(
upload_date_m.group('year') +
upload_date_m.group('month') +
upload_date_m.group('day')
)
if upload_date_m else None
)
if upload_date_m else None
)
else:
video_upload_date = None
duration_str = self._search_meta(u'duration', video_page)
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
if duration_str:
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
video_duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m else None
)
if duration_m else None
)
else:
video_duration = None
video_uploader = self._html_search_regex(
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',

View File

@@ -15,7 +15,7 @@ class Vbox7IE(InfoExtractor):
_TEST = {
u'url': u'http://vbox7.com/play:249bb972c2',
u'file': u'249bb972c2.flv',
u'md5': u'9c70d6d956f888bdc08c124acc120cfe',
u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
u'info_dict': {
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
}

View File

@@ -16,11 +16,20 @@ from ..utils import (
unsmuggle_url,
)
class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_VALID_URL = r'''(?x)
(?P<proto>https?://)?
(?:(?:www|(?P<player>player))\.)?
vimeo(?P<pro>pro)?\.com/
(?:.*?/)?
(?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
(?:videos?/)?
(?P<id>[0-9]+)
/?(?:[?&].*)?(?:[#].*)?$'''
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [

View File

@@ -162,23 +162,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Dash audio
'141', '172', '140', '171', '139',
]
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
# Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '102', '84', '101', '83', '100', '82',
# Dash video
'138', '248', '137', '247', '136', '246', '245',
'244', '135', '243', '134', '242', '133', '160',
# Dash audio
'172', '141', '171', '140', '139',
]
_video_formats_map = {
'flv': ['35', '34', '6', '5'],
'3gp': ['36', '17', '13'],
'mp4': ['38', '37', '22', '18'],
'webm': ['46', '45', '44', '43'],
}
_video_extensions = {
'13': '3gp',
'17': '3gp',
@@ -236,54 +219,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'248': 'webm',
}
_video_dimensions = {
'5': '400x240',
'6': '???',
'13': '???',
'17': '176x144',
'18': '640x360',
'22': '1280x720',
'34': '640x360',
'35': '854x480',
'36': '320x240',
'37': '1920x1080',
'38': '4096x3072',
'43': '640x360',
'44': '854x480',
'45': '1280x720',
'46': '1920x1080',
'82': '360p',
'83': '480p',
'84': '720p',
'85': '1080p',
'92': '240p',
'93': '360p',
'94': '480p',
'95': '720p',
'96': '1080p',
'100': '360p',
'101': '480p',
'102': '720p',
'132': '240p',
'151': '72p',
'133': '240p',
'134': '360p',
'135': '480p',
'136': '720p',
'137': '1080p',
'138': '>1080p',
'139': '48k',
'140': '128k',
'141': '256k',
'160': '192p',
'171': '128k',
'172': '256k',
'242': '240p',
'243': '360p',
'244': '480p',
'245': '480p',
'246': '480p',
'247': '720p',
'248': '1080p',
'5': {'width': 400, 'height': 240},
'6': {},
'13': {},
'17': {'width': 176, 'height': 144},
'18': {'width': 640, 'height': 360},
'22': {'width': 1280, 'height': 720},
'34': {'width': 640, 'height': 360},
'35': {'width': 854, 'height': 480},
'36': {'width': 320, 'height': 240},
'37': {'width': 1920, 'height': 1080},
'38': {'width': 4096, 'height': 3072},
'43': {'width': 640, 'height': 360},
'44': {'width': 854, 'height': 480},
'45': {'width': 1280, 'height': 720},
'46': {'width': 1920, 'height': 1080},
'82': {'height': 360, 'display': '360p'},
'83': {'height': 480, 'display': '480p'},
'84': {'height': 720, 'display': '720p'},
'85': {'height': 1080, 'display': '1080p'},
'92': {'height': 240, 'display': '240p'},
'93': {'height': 360, 'display': '360p'},
'94': {'height': 480, 'display': '480p'},
'95': {'height': 720, 'display': '720p'},
'96': {'height': 1080, 'display': '1080p'},
'100': {'height': 360, 'display': '360p'},
'101': {'height': 480, 'display': '480p'},
'102': {'height': 720, 'display': '720p'},
'132': {'height': 240, 'display': '240p'},
'151': {'height': 72, 'display': '72p'},
'133': {'height': 240, 'display': '240p'},
'134': {'height': 360, 'display': '360p'},
'135': {'height': 480, 'display': '480p'},
'136': {'height': 720, 'display': '720p'},
'137': {'height': 1080, 'display': '1080p'},
'138': {'height': 1081, 'display': '>1080p'},
'139': {'display': '48k'},
'140': {'display': '128k'},
'141': {'display': '256k'},
'160': {'height': 192, 'display': '192p'},
'171': {'display': '128k'},
'172': {'display': '256k'},
'242': {'height': 240, 'display': '240p'},
'243': {'height': 360, 'display': '360p'},
'244': {'height': 480, 'display': '480p'},
'245': {'height': 480, 'display': '480p'},
'246': {'height': 480, 'display': '480p'},
'247': {'height': 720, 'display': '720p'},
'248': {'height': 1080, 'display': '1080p'},
}
_special_itags = {
'82': '3D',
@@ -1153,13 +1136,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self._downloader.report_warning(err_msg)
return {}
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
self._video_dimensions.get(x, '???'),
' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None:
@@ -1172,48 +1148,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
Transform a dictionary in the format {itag:url} to a list of (itag, url)
with the requested formats.
"""
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
if format_limit is not None and format_limit in available_formats:
format_list = available_formats[available_formats.index(format_limit):]
else:
format_list = available_formats
existing_formats = [x for x in format_list if x in url_map]
existing_formats = [x for x in self._available_formats if x in url_map]
if len(existing_formats) == 0:
raise ExtractorError(u'no known formats available for video')
if self._downloader.params.get('listformats', None):
self._print_formats(existing_formats)
return
if req_format is None or req_format == 'best':
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
elif req_format in ('-1', 'all'):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific formats. We pick the first in a slash-delimeted sequence.
# Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
# available in the specified format. For example,
# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
req_formats = req_format.split('/')
video_url_list = None
for rf in req_formats:
if rf in url_map:
video_url_list = [(rf, url_map[rf])]
break
if rf in self._video_formats_map:
for srf in self._video_formats_map[rf]:
if srf in url_map:
video_url_list = [(srf, url_map[srf])]
break
else:
continue
break
if video_url_list is None:
raise ExtractorError(u'requested format not available')
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
video_url_list.reverse() # order worst to best
return video_url_list
def _extract_from_m3u8(self, manifest_url, video_id):
@@ -1462,50 +1401,67 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url += '&ratebypass=yes'
url_map[url_data['itag'][0]] = url
video_url_list = self._get_video_url_list(url_map)
if not video_url_list:
return
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
url_map = self._extract_from_m3u8(manifest_url, video_id)
video_url_list = self._get_video_url_list(url_map)
if not video_url_list:
return
else:
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
results = []
formats = []
for itag, video_real_url in video_url_list:
# Extension
video_extension = self._video_extensions.get(itag, 'flv')
resolution = self._video_dimensions.get(itag, {}).get('display')
width = self._video_dimensions.get(itag, {}).get('width')
height = self._video_dimensions.get(itag, {}).get('height')
note = self._special_itags.get(itag)
video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
self._video_dimensions.get(itag, '???'),
'%dx%d' % (width, height) if width is not None and height is not None else (resolution if resolution is not None else '???'),
' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
results.append({
'id': video_id,
'url': video_real_url,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
'title': video_title,
'ext': video_extension,
'format': video_format,
'format_id': itag,
'thumbnail': video_thumbnail,
'description': video_description,
'player_url': player_url,
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
formats.append({
'url': video_real_url,
'ext': video_extension,
'format': video_format,
'format_id': itag,
'player_url': player_url,
'_resolution': resolution,
'width': width,
'height': height,
'format_note': note,
})
return results
def _formats_key(f):
note = f.get('format_note')
if note is None:
note = u''
is_dash = u'DASH' in note
return (
0 if is_dash else 1,
f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1)
formats.sort(key=_formats_key)
return {
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'upload_date': upload_date,
'title': video_title,
'thumbnail': video_thumbnail,
'description': video_description,
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'formats': formats,
}
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com playlists'
@@ -1717,7 +1673,7 @@ class YoutubeUserIE(InfoExtractor):
# page by page until there are no video ids - it means we got
# all of them.
video_ids = []
url_results = []
for pagenum in itertools.count(0):
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
@@ -1735,10 +1691,17 @@ class YoutubeUserIE(InfoExtractor):
break
# Extract video identifiers
ids_in_page = []
for entry in response['feed']['entry']:
ids_in_page.append(entry['id']['$t'].split('/')[-1])
video_ids.extend(ids_in_page)
entries = response['feed']['entry']
for entry in entries:
title = entry['title']['$t']
video_id = entry['id']['$t'].split('/')[-1]
url_results.append({
'_type': 'url',
'url': video_id,
'ie_key': 'Youtube',
'id': 'video_id',
'title': title,
})
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
@@ -1746,12 +1709,9 @@ class YoutubeUserIE(InfoExtractor):
# are no more ids on further pages - no need to query
# again.
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
if len(entries) < self._GDATA_PAGE_SIZE:
break
url_results = [
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_results, playlist_title=username)

View File

@@ -1,2 +1,2 @@
__version__ = '2013.12.20'
__version__ = '2013.12.23.4'