Compare commits
161 Commits
2013.08.29
...
2013.09.20
Author | SHA1 | Date | |
---|---|---|---|
|
58f289d013 | ||
|
3d60bb96e1 | ||
|
38d025b3f0 | ||
|
c40c6aaaaa | ||
|
1a810f0d4e | ||
|
63037593c0 | ||
|
7a878d47fa | ||
|
bc4b900898 | ||
|
c5e743f66f | ||
|
6c36d8d6fb | ||
|
71c82637e7 | ||
|
2dad310e2c | ||
|
d0ae9e3a8d | ||
|
a19413c311 | ||
|
1ef80b55dd | ||
|
eb03f4dad3 | ||
|
830dd1944a | ||
|
1237c9a3a5 | ||
|
5d13df79a5 | ||
|
6523223a4c | ||
|
4a67aafb7e | ||
|
f3f34c5b0f | ||
|
6ae8ee3f54 | ||
|
e8f8e80097 | ||
|
4dc0ff3ecf | ||
|
4b6462fc1e | ||
|
c4ece78564 | ||
|
0761d02b0b | ||
|
71c107fc57 | ||
|
7459e3a290 | ||
|
f9e66fb993 | ||
|
6c603ccce3 | ||
|
ef66b0c6ef | ||
|
22b50ecb2f | ||
|
5a6fecc3de | ||
|
cdbccafed9 | ||
|
e69ae5b9e7 | ||
|
92790f4e54 | ||
|
471a5ee908 | ||
|
19e1d35989 | ||
|
0b7f31184d | ||
|
fad84d50fe | ||
|
9a1c32dc54 | ||
|
a921f40799 | ||
|
74ac9bdd82 | ||
|
94518f2087 | ||
|
535f59bbcf | ||
|
71cedb3c0c | ||
|
dd01d6558a | ||
|
ce85f022d2 | ||
|
ad94a6fe44 | ||
|
353ba14060 | ||
|
83de794223 | ||
|
bfd5c93af9 | ||
|
c247d87ef3 | ||
|
07ac9e2cc2 | ||
|
6bc520c207 | ||
|
f1d20fa39f | ||
|
e3dc22ca3a | ||
|
d665f8d3cb | ||
|
055e6f3657 | ||
|
ac4f319ba1 | ||
|
542cca0e8c | ||
|
6a2449df3b | ||
|
7fad1c6328 | ||
|
d82134c339 | ||
|
54d39d8b2f | ||
|
de7f3446e0 | ||
|
f8e52269c1 | ||
|
cf1dd0c59e | ||
|
22c8b52545 | ||
|
1f7dc42cd0 | ||
|
aa8f2641da | ||
|
648d25d43d | ||
|
df3e61003a | ||
|
6b361ad5ee | ||
|
5d8afe69f7 | ||
|
a1ab553858 | ||
|
07463ea162 | ||
|
6d2d21f713 | ||
|
061b2889a9 | ||
|
8963d9c266 | ||
|
890f62e868 | ||
|
8f362589a5 | ||
|
a27a2470cd | ||
|
72836fcee4 | ||
|
a7130543fa | ||
|
a490fda746 | ||
|
7e77275293 | ||
|
d6e203b3dc | ||
|
e3ea479087 | ||
|
faab1d3836 | ||
|
8851a574a3 | ||
|
59282080c8 | ||
|
98f3da4040 | ||
|
1d213233cd | ||
|
fd9cf73836 | ||
|
0638ad9999 | ||
|
1eb527692a | ||
|
09bb17e108 | ||
|
1cf911bc82 | ||
|
f4b052321b | ||
|
a636203ea5 | ||
|
c215217e39 | ||
|
08e291b54d | ||
|
6b95b065be | ||
|
9363169b67 | ||
|
085bea4513 | ||
|
150f20828b | ||
|
08523ee20a | ||
|
5d5171d26a | ||
|
96fb5605b2 | ||
|
7011de0bc2 | ||
|
c3dd69eab4 | ||
|
025171c476 | ||
|
c8dbccde30 | ||
|
4ff7a0f1f6 | ||
|
9c2ade40de | ||
|
aa32314d09 | ||
|
52afe99665 | ||
|
b0446d6a33 | ||
|
8e4e89f1c2 | ||
|
6c758d79de | ||
|
691008087b | ||
|
85f03346eb | ||
|
bdc6b3fc64 | ||
|
847f582290 | ||
|
10f5c016ec | ||
|
2e756879f1 | ||
|
c7a7750d3b | ||
|
9193c1eede | ||
|
b3f0e53048 | ||
|
483e0ddd4d | ||
|
591078babf | ||
|
cd9c100963 | ||
|
b7f89fe692 | ||
|
1301a0dd42 | ||
|
06a401c845 | ||
|
c5b921b597 | ||
|
99859d436c | ||
|
7a20e2e1f8 | ||
|
5c6658d4dd | ||
|
39c6f507df | ||
|
614d9c19c1 | ||
|
bd2dee6c67 | ||
|
18b4e04f1c | ||
|
b4e60dac23 | ||
|
adeb9c73d6 | ||
|
5af7e056a7 | ||
|
a3f62b8255 | ||
|
d80a064eff | ||
|
d55de6eec2 | ||
|
69df680b97 | ||
|
447591e1ae | ||
|
33eb0ce4c4 | ||
|
505c28aac9 | ||
|
8377574c9c | ||
|
372297e713 | ||
|
953e32b2c1 | ||
|
5898e28272 | ||
|
67dfbc0cb9 |
8
.gitignore
vendored
8
.gitignore
vendored
@@ -17,4 +17,10 @@ youtube-dl.tar.gz
|
||||
.coverage
|
||||
cover/
|
||||
updates_key.pem
|
||||
*.egg-info
|
||||
*.egg-info
|
||||
*.srt
|
||||
*.sbv
|
||||
*.vtt
|
||||
*.flv
|
||||
*.mp4
|
||||
*.part
|
||||
|
12
README.md
12
README.md
@@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-U, --update update this program to latest version. Make sure
|
||||
that you have sufficient permissions (run with
|
||||
sudo if needed)
|
||||
-i, --ignore-errors continue on download errors
|
||||
-i, --ignore-errors continue on download errors, for example to to
|
||||
skip unavailable videos in a playlist
|
||||
--dump-user-agent display the current browser identification
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video access
|
||||
@@ -113,7 +114,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specifiy the order of
|
||||
preference using slashes: "-f 22/17/18"
|
||||
preference using slashes: "-f 22/17/18". "-f mp4"
|
||||
and "-f flv" are also supported
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific one
|
||||
is requested
|
||||
@@ -122,10 +124,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
only)
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub write subtitle file (currently youtube only)
|
||||
--write-auto-sub write automatic subtitle file (currently youtube
|
||||
only)
|
||||
--only-sub [deprecated] alias of --skip-download
|
||||
--write-sub write subtitle file
|
||||
--write-auto-sub write automatic subtitle file (youtube only)
|
||||
--all-subs downloads all the available subtitles of the
|
||||
video
|
||||
--list-subs lists all available subtitles for the video
|
||||
|
@@ -4,8 +4,12 @@ __youtube-dl()
|
||||
COMPREPLY=()
|
||||
cur="${COMP_WORDS[COMP_CWORD]}"
|
||||
opts="{{flags}}"
|
||||
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
|
||||
|
||||
if [[ ${cur} == * ]] ; then
|
||||
if [[ ${cur} =~ : ]]; then
|
||||
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
||||
return 0
|
||||
elif [[ ${cur} == * ]] ; then
|
||||
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
|
||||
return 0
|
||||
fi
|
||||
|
405
devscripts/buildserver.py
Normal file
405
devscripts/buildserver.py
Normal file
@@ -0,0 +1,405 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from socketserver import ThreadingMixIn
|
||||
import argparse
|
||||
import ctypes
|
||||
import functools
|
||||
import sys
|
||||
import threading
|
||||
import traceback
|
||||
import os.path
|
||||
|
||||
|
||||
class BuildHTTPServer(ThreadingMixIn, HTTPServer):
|
||||
allow_reuse_address = True
|
||||
|
||||
|
||||
advapi32 = ctypes.windll.advapi32
|
||||
|
||||
SC_MANAGER_ALL_ACCESS = 0xf003f
|
||||
SC_MANAGER_CREATE_SERVICE = 0x02
|
||||
SERVICE_WIN32_OWN_PROCESS = 0x10
|
||||
SERVICE_AUTO_START = 0x2
|
||||
SERVICE_ERROR_NORMAL = 0x1
|
||||
DELETE = 0x00010000
|
||||
SERVICE_STATUS_START_PENDING = 0x00000002
|
||||
SERVICE_STATUS_RUNNING = 0x00000004
|
||||
SERVICE_ACCEPT_STOP = 0x1
|
||||
|
||||
SVCNAME = 'youtubedl_builder'
|
||||
|
||||
LPTSTR = ctypes.c_wchar_p
|
||||
START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR))
|
||||
|
||||
|
||||
class SERVICE_TABLE_ENTRY(ctypes.Structure):
|
||||
_fields_ = [
|
||||
('lpServiceName', LPTSTR),
|
||||
('lpServiceProc', START_CALLBACK)
|
||||
]
|
||||
|
||||
|
||||
HandlerEx = ctypes.WINFUNCTYPE(
|
||||
ctypes.c_int, # return
|
||||
ctypes.c_int, # dwControl
|
||||
ctypes.c_int, # dwEventType
|
||||
ctypes.c_void_p, # lpEventData,
|
||||
ctypes.c_void_p, # lpContext,
|
||||
)
|
||||
|
||||
|
||||
def _ctypes_array(c_type, py_array):
|
||||
ar = (c_type * len(py_array))()
|
||||
ar[:] = py_array
|
||||
return ar
|
||||
|
||||
|
||||
def win_OpenSCManager():
|
||||
res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS)
|
||||
if not res:
|
||||
raise Exception('Opening service manager failed - '
|
||||
'are you running this as administrator?')
|
||||
return res
|
||||
|
||||
|
||||
def win_install_service(service_name, cmdline):
|
||||
manager = win_OpenSCManager()
|
||||
try:
|
||||
h = advapi32.CreateServiceW(
|
||||
manager, service_name, None,
|
||||
SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS,
|
||||
SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
|
||||
cmdline, None, None, None, None, None)
|
||||
if not h:
|
||||
raise OSError('Service creation failed: %s' % ctypes.FormatError())
|
||||
|
||||
advapi32.CloseServiceHandle(h)
|
||||
finally:
|
||||
advapi32.CloseServiceHandle(manager)
|
||||
|
||||
|
||||
def win_uninstall_service(service_name):
|
||||
manager = win_OpenSCManager()
|
||||
try:
|
||||
h = advapi32.OpenServiceW(manager, service_name, DELETE)
|
||||
if not h:
|
||||
raise OSError('Could not find service %s: %s' % (
|
||||
service_name, ctypes.FormatError()))
|
||||
|
||||
try:
|
||||
if not advapi32.DeleteService(h):
|
||||
raise OSError('Deletion failed: %s' % ctypes.FormatError())
|
||||
finally:
|
||||
advapi32.CloseServiceHandle(h)
|
||||
finally:
|
||||
advapi32.CloseServiceHandle(manager)
|
||||
|
||||
|
||||
def win_service_report_event(service_name, msg, is_error=True):
|
||||
with open('C:/sshkeys/log', 'a', encoding='utf-8') as f:
|
||||
f.write(msg + '\n')
|
||||
|
||||
event_log = advapi32.RegisterEventSourceW(None, service_name)
|
||||
if not event_log:
|
||||
raise OSError('Could not report event: %s' % ctypes.FormatError())
|
||||
|
||||
try:
|
||||
type_id = 0x0001 if is_error else 0x0004
|
||||
event_id = 0xc0000000 if is_error else 0x40000000
|
||||
lines = _ctypes_array(LPTSTR, [msg])
|
||||
|
||||
if not advapi32.ReportEventW(
|
||||
event_log, type_id, 0, event_id, None, len(lines), 0,
|
||||
lines, None):
|
||||
raise OSError('Event reporting failed: %s' % ctypes.FormatError())
|
||||
finally:
|
||||
advapi32.DeregisterEventSource(event_log)
|
||||
|
||||
|
||||
def win_service_handler(stop_event, *args):
|
||||
try:
|
||||
raise ValueError('Handler called with args ' + repr(args))
|
||||
TODO
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
msg = str(e) + '\n' + tb
|
||||
win_service_report_event(service_name, msg, is_error=True)
|
||||
raise
|
||||
|
||||
|
||||
def win_service_set_status(handle, status_code):
|
||||
svcStatus = SERVICE_STATUS()
|
||||
svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
|
||||
svcStatus.dwCurrentState = status_code
|
||||
svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
|
||||
|
||||
svcStatus.dwServiceSpecificExitCode = 0
|
||||
|
||||
if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)):
|
||||
raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError())
|
||||
|
||||
|
||||
def win_service_main(service_name, real_main, argc, argv_raw):
|
||||
try:
|
||||
#args = [argv_raw[i].value for i in range(argc)]
|
||||
stop_event = threading.Event()
|
||||
handler = HandlerEx(functools.partial(stop_event, win_service_handler))
|
||||
h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
|
||||
if not h:
|
||||
raise OSError('Handler registration failed: %s' %
|
||||
ctypes.FormatError())
|
||||
|
||||
TODO
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
msg = str(e) + '\n' + tb
|
||||
win_service_report_event(service_name, msg, is_error=True)
|
||||
raise
|
||||
|
||||
|
||||
def win_service_start(service_name, real_main):
|
||||
try:
|
||||
cb = START_CALLBACK(
|
||||
functools.partial(win_service_main, service_name, real_main))
|
||||
dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [
|
||||
SERVICE_TABLE_ENTRY(
|
||||
service_name,
|
||||
cb
|
||||
),
|
||||
SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK))
|
||||
])
|
||||
|
||||
if not advapi32.StartServiceCtrlDispatcherW(dispatch_table):
|
||||
raise OSError('ctypes start failed: %s' % ctypes.FormatError())
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
msg = str(e) + '\n' + tb
|
||||
win_service_report_event(service_name, msg, is_error=True)
|
||||
raise
|
||||
|
||||
|
||||
def main(args=None):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-i', '--install',
|
||||
action='store_const', dest='action', const='install',
|
||||
help='Launch at Windows startup')
|
||||
parser.add_argument('-u', '--uninstall',
|
||||
action='store_const', dest='action', const='uninstall',
|
||||
help='Remove Windows service')
|
||||
parser.add_argument('-s', '--service',
|
||||
action='store_const', dest='action', const='service',
|
||||
help='Run as a Windows service')
|
||||
parser.add_argument('-b', '--bind', metavar='<host:port>',
|
||||
action='store', default='localhost:8142',
|
||||
help='Bind to host:port (default %default)')
|
||||
options = parser.parse_args(args=args)
|
||||
|
||||
if options.action == 'install':
|
||||
fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox')
|
||||
cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind)
|
||||
win_install_service(SVCNAME, cmdline)
|
||||
return
|
||||
|
||||
if options.action == 'uninstall':
|
||||
win_uninstall_service(SVCNAME)
|
||||
return
|
||||
|
||||
if options.action == 'service':
|
||||
win_service_start(SVCNAME, main)
|
||||
return
|
||||
|
||||
host, port_str = options.bind.split(':')
|
||||
port = int(port_str)
|
||||
|
||||
print('Listening on %s:%d' % (host, port))
|
||||
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
|
||||
thr = threading.Thread(target=srv.serve_forever)
|
||||
thr.start()
|
||||
input('Press ENTER to shut down')
|
||||
srv.shutdown()
|
||||
thr.join()
|
||||
|
||||
|
||||
def rmtree(path):
|
||||
for name in os.listdir(path):
|
||||
fname = os.path.join(path, name)
|
||||
if os.path.isdir(fname):
|
||||
rmtree(fname)
|
||||
else:
|
||||
os.chmod(fname, 0o666)
|
||||
os.remove(fname)
|
||||
os.rmdir(path)
|
||||
|
||||
#==============================================================================
|
||||
|
||||
class BuildError(Exception):
|
||||
def __init__(self, output, code=500):
|
||||
self.output = output
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return self.output
|
||||
|
||||
|
||||
class HTTPError(BuildError):
|
||||
pass
|
||||
|
||||
|
||||
class PythonBuilder(object):
|
||||
def __init__(self, **kwargs):
|
||||
pythonVersion = kwargs.pop('python', '2.7')
|
||||
try:
|
||||
key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
|
||||
try:
|
||||
self.pythonPath, _ = _winreg.QueryValueEx(key, '')
|
||||
finally:
|
||||
_winreg.CloseKey(key)
|
||||
except Exception:
|
||||
raise BuildError('No such Python version: %s' % pythonVersion)
|
||||
|
||||
super(PythonBuilder, self).__init__(**kwargs)
|
||||
|
||||
|
||||
class GITInfoBuilder(object):
|
||||
def __init__(self, **kwargs):
|
||||
try:
|
||||
self.user, self.repoName = kwargs['path'][:2]
|
||||
self.rev = kwargs.pop('rev')
|
||||
except ValueError:
|
||||
raise BuildError('Invalid path')
|
||||
except KeyError as e:
|
||||
raise BuildError('Missing mandatory parameter "%s"' % e.args[0])
|
||||
|
||||
path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user)
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
self.basePath = tempfile.mkdtemp(dir=path)
|
||||
self.buildPath = os.path.join(self.basePath, 'build')
|
||||
|
||||
super(GITInfoBuilder, self).__init__(**kwargs)
|
||||
|
||||
|
||||
class GITBuilder(GITInfoBuilder):
|
||||
def build(self):
|
||||
try:
|
||||
subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath])
|
||||
subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise BuildError(e.output)
|
||||
|
||||
super(GITBuilder, self).build()
|
||||
|
||||
|
||||
class YoutubeDLBuilder(object):
|
||||
authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if self.repoName != 'youtube-dl':
|
||||
raise BuildError('Invalid repository "%s"' % self.repoName)
|
||||
if self.user not in self.authorizedUsers:
|
||||
raise HTTPError('Unauthorized user "%s"' % self.user, 401)
|
||||
|
||||
super(YoutubeDLBuilder, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
try:
|
||||
subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
cwd=self.buildPath)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise BuildError(e.output)
|
||||
|
||||
super(YoutubeDLBuilder, self).build()
|
||||
|
||||
|
||||
class DownloadBuilder(object):
|
||||
def __init__(self, **kwargs):
|
||||
self.handler = kwargs.pop('handler')
|
||||
self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:]))
|
||||
self.srcPath = os.path.abspath(os.path.normpath(self.srcPath))
|
||||
if not self.srcPath.startswith(self.buildPath):
|
||||
raise HTTPError(self.srcPath, 401)
|
||||
|
||||
super(DownloadBuilder, self).__init__(**kwargs)
|
||||
|
||||
def build(self):
|
||||
if not os.path.exists(self.srcPath):
|
||||
raise HTTPError('No such file', 404)
|
||||
if os.path.isdir(self.srcPath):
|
||||
raise HTTPError('Is a directory: %s' % self.srcPath, 401)
|
||||
|
||||
self.handler.send_response(200)
|
||||
self.handler.send_header('Content-Type', 'application/octet-stream')
|
||||
self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1])
|
||||
self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size))
|
||||
self.handler.end_headers()
|
||||
|
||||
with open(self.srcPath, 'rb') as src:
|
||||
shutil.copyfileobj(src, self.handler.wfile)
|
||||
|
||||
super(DownloadBuilder, self).build()
|
||||
|
||||
|
||||
class CleanupTempDir(object):
|
||||
def build(self):
|
||||
try:
|
||||
rmtree(self.basePath)
|
||||
except Exception as e:
|
||||
print('WARNING deleting "%s": %s' % (self.basePath, e))
|
||||
|
||||
super(CleanupTempDir, self).build()
|
||||
|
||||
|
||||
class Null(object):
|
||||
def __init__(self, **kwargs):
|
||||
pass
|
||||
|
||||
def start(self):
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def build(self):
|
||||
pass
|
||||
|
||||
|
||||
class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null):
|
||||
pass
|
||||
|
||||
|
||||
class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching.
|
||||
|
||||
def do_GET(self):
|
||||
path = urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
|
||||
action, _, path = path.path.strip('/').partition('/')
|
||||
if path:
|
||||
path = path.split('/')
|
||||
if action in self.actionDict:
|
||||
try:
|
||||
builder = self.actionDict[action](path=path, handler=self, **paramDict)
|
||||
builder.start()
|
||||
try:
|
||||
builder.build()
|
||||
finally:
|
||||
builder.close()
|
||||
except BuildError as e:
|
||||
self.send_response(e.code)
|
||||
msg = unicode(e).encode('UTF-8')
|
||||
self.send_header('Content-Type', 'text/plain; charset=UTF-8')
|
||||
self.send_header('Content-Length', len(msg))
|
||||
self.end_headers()
|
||||
self.wfile.write(msg)
|
||||
except HTTPError as e:
|
||||
self.send_response(e.code, str(e))
|
||||
else:
|
||||
self.send_response(500, 'Unknown build method "%s"' % action)
|
||||
else:
|
||||
self.send_response(500, 'Malformed URL')
|
||||
|
||||
#==============================================================================
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -3,7 +3,8 @@
|
||||
import json
|
||||
import sys
|
||||
import hashlib
|
||||
import urllib.request
|
||||
import os.path
|
||||
|
||||
|
||||
if len(sys.argv) <= 1:
|
||||
print('Specify the version number as parameter')
|
||||
@@ -23,10 +24,14 @@ filenames = {
|
||||
'bin': 'youtube-dl',
|
||||
'exe': 'youtube-dl.exe',
|
||||
'tar': 'youtube-dl-%s.tar.gz' % version}
|
||||
build_dir = os.path.join('..', '..', 'build', version)
|
||||
for key, filename in filenames.items():
|
||||
print('Downloading and checksumming %s...' % filename)
|
||||
url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename)
|
||||
data = urllib.request.urlopen(url).read()
|
||||
fn = os.path.join(build_dir, filename)
|
||||
with open(fn, 'rb') as f:
|
||||
data = f.read()
|
||||
if not data:
|
||||
raise ValueError('File %s is empty!' % fn)
|
||||
sha256sum = hashlib.sha256(data).hexdigest()
|
||||
new_version[key] = (url, sha256sum)
|
||||
|
||||
|
33
devscripts/gh-pages/update-sites.py
Executable file
33
devscripts/gh-pages/update-sites.py
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import os
|
||||
import textwrap
|
||||
|
||||
# We must be able to import youtube_dl
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
import youtube_dl
|
||||
|
||||
def main():
|
||||
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
|
||||
template = tmplf.read()
|
||||
|
||||
ie_htmls = []
|
||||
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
|
||||
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
||||
try:
|
||||
ie_html += ': {}'.format(ie.IE_DESC)
|
||||
except AttributeError:
|
||||
pass
|
||||
if ie.working() == False:
|
||||
ie_html += ' (Currently broken)'
|
||||
ie_htmls.append('<li>{}</li>'.format(ie_html))
|
||||
|
||||
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
|
||||
|
||||
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
|
||||
sitesf.write(template)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -55,8 +55,8 @@ git push origin "$version"
|
||||
/bin/echo -e "\n### OK, now it is time to build the binaries..."
|
||||
REV=$(git rev-parse HEAD)
|
||||
make youtube-dl youtube-dl.tar.gz
|
||||
wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
|
||||
wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
read -p "VM running? (y/n) " -n 1
|
||||
wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
mkdir -p "build/$version"
|
||||
mv youtube-dl youtube-dl.exe "build/$version"
|
||||
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
|
||||
@@ -85,6 +85,7 @@ ROOT=$(pwd)
|
||||
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
|
||||
"$ROOT/devscripts/gh-pages/generate-download.py"
|
||||
"$ROOT/devscripts/gh-pages/update-copyright.py"
|
||||
"$ROOT/devscripts/gh-pages/update-sites.py"
|
||||
git add *.html *.html.in update
|
||||
git commit -m "release $version"
|
||||
git show HEAD
|
||||
|
@@ -1,13 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
# Generate youtube signature algorithm from test cases
|
||||
|
||||
import sys
|
||||
|
||||
tests = [
|
||||
# 93 - vfl79wBKW 2013/07/20
|
||||
(u"qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"€",
|
||||
u".>/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ098765'321mnbvcxzasdfghjklpoiu"),
|
||||
# 92 - vflQw-fB4 2013/07/17
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
|
||||
"mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
|
||||
# 91 - vfl79wBKW 2013/07/20 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~",
|
||||
"/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543.1mnbvcxzasdfghjklpoiu"),
|
||||
# 90
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
|
||||
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
|
||||
@@ -20,21 +27,21 @@ tests = [
|
||||
# 87
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
|
||||
# 86 - vflg0g8PQ 2013/08/29
|
||||
# 86 - vfluy6kdb 2013/09/06
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
">/?;}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
|
||||
# 85
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
||||
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
|
||||
# 85 - vflkuzxcs 2013/09/11
|
||||
('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
|
||||
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
|
||||
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
|
||||
# 83
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
||||
# 82 - vflZK4ZYR 2013/08/23
|
||||
# 82 - vflGNjMhJ 2013/09/12
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
|
||||
".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"),
|
||||
# 81 - vflLC8JvQ 2013/07/25
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
|
||||
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
|
||||
|
@@ -38,7 +38,6 @@
|
||||
"writedescription": false,
|
||||
"writeinfojson": true,
|
||||
"writesubtitles": false,
|
||||
"onlysubtitles": false,
|
||||
"allsubtitles": false,
|
||||
"listssubtitles": false
|
||||
}
|
||||
|
@@ -11,24 +11,50 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE,
|
||||
from helper import get_testcases
|
||||
|
||||
class TestAllURLsMatching(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.ies = gen_extractors()
|
||||
|
||||
def matching_ies(self, url):
|
||||
return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
|
||||
|
||||
def assertMatch(self, url, ie_list):
|
||||
self.assertEqual(self.matching_ies(url), ie_list)
|
||||
|
||||
def test_youtube_playlist_matching(self):
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
|
||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||
assertPlaylist(u'PL63F0C78739B09958')
|
||||
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
|
||||
|
||||
def test_youtube_matching(self):
|
||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
|
||||
|
||||
def test_youtube_channel_matching(self):
|
||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
|
||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
|
||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
|
||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
|
||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
||||
|
||||
def test_youtube_show_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
||||
|
||||
def test_justin_tv_channelid_matching(self):
|
||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||
@@ -47,10 +73,13 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||
|
||||
def test_youtube_extract(self):
|
||||
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
|
||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
@@ -63,15 +92,12 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
||||
|
||||
def test_keywords(self):
|
||||
ies = gen_extractors()
|
||||
matching_ies = lambda url: [ie.IE_NAME for ie in ies
|
||||
if ie.suitable(url) and ie.IE_NAME != 'generic']
|
||||
self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
|
||||
self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
|
||||
self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
|
||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
||||
self.assertMatch(':tds', ['ComedyCentral'])
|
||||
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
||||
self.assertMatch(':cr', ['ComedyCentral'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
71
test/test_dailymotion_subtitles.py
Normal file
71
test/test_dailymotion_subtitles.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import json
|
||||
import io
|
||||
import hashlib
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import DailymotionIE
|
||||
from youtube_dl.utils import *
|
||||
from helper import FakeYDL
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
class TestDailymotionSubtitles(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.DL = FakeYDL()
|
||||
self.url = 'http://www.dailymotion.com/video/xczg00'
|
||||
def getInfoDict(self):
|
||||
IE = DailymotionIE(self.DL)
|
||||
info_dict = IE.extract(self.url)
|
||||
return info_dict
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict[0]['subtitles']
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 5)
|
||||
def test_list_subtitles(self):
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
def test_automatic_captions(self):
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
def test_nosubtitles(self):
|
||||
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -8,7 +8,7 @@ import json
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
|
||||
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE
|
||||
from youtube_dl.utils import *
|
||||
|
||||
from helper import FakeYDL
|
||||
@@ -34,5 +34,21 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], u'Vimeo Tributes')
|
||||
self.assertTrue(len(result['entries']) > 24)
|
||||
|
||||
def test_ustream_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = UstreamChannelIE(dl)
|
||||
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'5124905')
|
||||
self.assertTrue(len(result['entries']) >= 11)
|
||||
|
||||
def test_soundcloud_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = SoundcloudUserIE(dl)
|
||||
result = ie.extract('https://soundcloud.com/the-concept-band')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'9615865')
|
||||
self.assertTrue(len(result['entries']) >= 12)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -11,13 +11,16 @@ import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
#from youtube_dl.utils import htmlentity_transform
|
||||
from youtube_dl.utils import timeconvert
|
||||
from youtube_dl.utils import sanitize_filename
|
||||
from youtube_dl.utils import unescapeHTML
|
||||
from youtube_dl.utils import orderedSet
|
||||
from youtube_dl.utils import DateRange
|
||||
from youtube_dl.utils import unified_strdate
|
||||
from youtube_dl.utils import find_xpath_attr
|
||||
from youtube_dl.utils import (
|
||||
timeconvert,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
orderedSet,
|
||||
DateRange,
|
||||
unified_strdate,
|
||||
find_xpath_attr,
|
||||
get_meta_content,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
_compat_str = lambda b: b.decode('unicode-escape')
|
||||
@@ -127,5 +130,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
|
||||
|
||||
def test_meta_parser(self):
|
||||
testhtml = u'''
|
||||
<head>
|
||||
<meta name="description" content="foo & bar">
|
||||
<meta content='Plato' name='author'/>
|
||||
</head>
|
||||
'''
|
||||
get_meta = lambda name: get_meta_content(name, testhtml)
|
||||
self.assertEqual(get_meta('description'), u'foo & bar')
|
||||
self.assertEqual(get_meta('author'), 'Plato')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -18,85 +18,65 @@ md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
class TestYoutubeSubtitles(unittest.TestCase):
|
||||
def setUp(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['allsubtitles'] = False
|
||||
DL.params['writesubtitles'] = False
|
||||
DL.params['subtitlesformat'] = 'srt'
|
||||
DL.params['listsubtitles'] = False
|
||||
def test_youtube_no_subtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = False
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
subtitles = info_dict[0]['subtitles']
|
||||
self.DL = FakeYDL()
|
||||
self.url = 'QRS8MkLhQmM'
|
||||
def getInfoDict(self):
|
||||
IE = YoutubeIE(self.DL)
|
||||
info_dict = IE.extract(self.url)
|
||||
return info_dict
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict[0]['subtitles']
|
||||
def test_youtube_no_writesubtitles(self):
|
||||
self.DL.params['writesubtitles'] = False
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
def test_youtube_subtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_subtitles_it(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitleslangs'] = ['it']
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles']['it']
|
||||
self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
|
||||
def test_youtube_onlysubtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['onlysubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
def test_youtube_allsubtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['allsubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
subtitles = info_dict[0]['subtitles']
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitlesformat'] = 'sbv'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'sbv'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitlesformat'] = 'vtt'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles']['en']
|
||||
self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'vtt'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
def test_youtube_list_subtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['listsubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
def test_youtube_automatic_captions(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writeautomaticsub'] = True
|
||||
DL.params['subtitleslangs'] = ['it']
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('8YoUxe5ncPo')
|
||||
sub = info_dict[0]['subtitles']['it']
|
||||
self.assertTrue(sub is not None)
|
||||
self.url = '8YoUxe5ncPo'
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(subtitles['it'] is not None)
|
||||
def test_youtube_nosubtitles(self):
|
||||
self.url = 'sAjKT8FhjI8'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
def test_youtube_multiple_langs(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
self.url = 'QRS8MkLhQmM'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['it', 'fr', 'de']
|
||||
DL.params['subtitleslangs'] = langs
|
||||
IE = YoutubeIE(DL)
|
||||
subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
@@ -66,7 +66,7 @@ class FileDownloader(object):
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
(hours, eta_mins) = divmod(mins, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
if hours > 99:
|
||||
return '--:--:--'
|
||||
if hours == 0:
|
||||
|
@@ -74,6 +74,7 @@ class YoutubeDL(object):
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
writeautomaticsub: Write the automatic subtitles to a file
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
(requires writesubtitles or writeautomaticsub)
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
|
||||
subtitleslangs: List of languages of the subtitles to download
|
||||
@@ -141,14 +142,10 @@ class YoutubeDL(object):
|
||||
|
||||
def to_screen(self, message, skip_eol=False):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
assert type(message) == type(u'')
|
||||
if not self.params.get('quiet', False):
|
||||
terminator = [u'\n', u''][skip_eol]
|
||||
output = message + terminator
|
||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||
output = output.encode(preferredencoding(), 'ignore')
|
||||
self._screen_file.write(output)
|
||||
self._screen_file.flush()
|
||||
write_string(output, self._screen_file)
|
||||
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
@@ -492,13 +489,14 @@ class YoutubeDL(object):
|
||||
self.report_writedescription(descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(info_dict['description'])
|
||||
except (KeyError, TypeError):
|
||||
self.report_warning(u'There\'s no description to write.')
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write description file ' + descfn)
|
||||
return
|
||||
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
self.params.get('writeautomaticsub'),
|
||||
self.params.get('allsubtitles', False)])
|
||||
self.params.get('writeautomaticsub')])
|
||||
|
||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
@@ -546,11 +544,11 @@ class YoutubeDL(object):
|
||||
else:
|
||||
try:
|
||||
success = self.fd._do_download(filename, info_dict)
|
||||
except (OSError, IOError) as err:
|
||||
raise UnavailableVideoError(err)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_error(u'unable to download video data: %s' % str(err))
|
||||
return
|
||||
except (OSError, IOError) as err:
|
||||
raise UnavailableVideoError(err)
|
||||
except (ContentTooShortError, ) as err:
|
||||
self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
||||
return
|
||||
|
@@ -28,6 +28,9 @@ __authors__ = (
|
||||
'Axel Noack',
|
||||
'Albert Kim',
|
||||
'Pierre Rudloff',
|
||||
'Huarong Huo',
|
||||
'Ismael Mejía',
|
||||
'Steffan \'Ruirize\' James',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -147,7 +150,7 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('-U', '--update',
|
||||
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
|
||||
general.add_option('-i', '--ignore-errors',
|
||||
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
|
||||
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
|
||||
general.add_option('--dump-user-agent',
|
||||
action='store_true', dest='dump_user_agent',
|
||||
help='display the current browser identification', default=False)
|
||||
@@ -192,7 +195,7 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
video_format.add_option('-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT',
|
||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
|
||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||
video_format.add_option('--all-formats',
|
||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||
video_format.add_option('--prefer-free-formats',
|
||||
@@ -204,13 +207,10 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
subtitles.add_option('--write-sub', '--write-srt',
|
||||
action='store_true', dest='writesubtitles',
|
||||
help='write subtitle file (currently youtube only)', default=False)
|
||||
help='write subtitle file', default=False)
|
||||
subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
|
||||
action='store_true', dest='writeautomaticsub',
|
||||
help='write automatic subtitle file (currently youtube only)', default=False)
|
||||
subtitles.add_option('--only-sub',
|
||||
action='store_true', dest='skip_download',
|
||||
help='[deprecated] alias of --skip-download', default=False)
|
||||
help='write automatic subtitle file (youtube only)', default=False)
|
||||
subtitles.add_option('--all-subs',
|
||||
action='store_true', dest='allsubtitles',
|
||||
help='downloads all the available subtitles of the video', default=False)
|
||||
@@ -221,7 +221,7 @@ def parseOpts(overrideArguments=None):
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT',
|
||||
help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
|
||||
subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
|
||||
action='callback', dest='subtitleslang', metavar='LANGS', type='str',
|
||||
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
|
||||
default=[], callback=_comma_separated_values_options_callback,
|
||||
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
|
||||
|
||||
@@ -355,7 +355,7 @@ def parseOpts(overrideArguments=None):
|
||||
if overrideArguments is not None:
|
||||
opts, args = parser.parse_args(overrideArguments)
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||
else:
|
||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||
if xdg_config_home:
|
||||
@@ -368,9 +368,9 @@ def parseOpts(overrideArguments=None):
|
||||
argv = systemConf + userConf + commandLineConf
|
||||
opts, args = parser.parse_args(argv)
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||
sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
|
||||
sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
|
||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||
write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
|
||||
write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
|
||||
|
||||
return parser, opts, args
|
||||
|
||||
@@ -393,7 +393,7 @@ def _real_main(argv=None):
|
||||
except (IOError, OSError) as err:
|
||||
if opts.verbose:
|
||||
traceback.print_exc()
|
||||
sys.stderr.write(u'ERROR: unable to open cookie file\n')
|
||||
write_string(u'ERROR: unable to open cookie file\n')
|
||||
sys.exit(101)
|
||||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
@@ -420,7 +420,7 @@ def _real_main(argv=None):
|
||||
batchurls = [x.strip() for x in batchurls]
|
||||
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
|
||||
write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
|
||||
except IOError:
|
||||
sys.exit(u'ERROR: batch file could not be read')
|
||||
all_urls = batchurls + args
|
||||
@@ -534,6 +534,11 @@ def _real_main(argv=None):
|
||||
else:
|
||||
date = DateRange(opts.dateafter, opts.datebefore)
|
||||
|
||||
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
|
||||
# this was the old behaviour if only --all-sub was given.
|
||||
if opts.allsubtitles and (opts.writeautomaticsub == False):
|
||||
opts.writesubtitles = True
|
||||
|
||||
if sys.version_info < (3,):
|
||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||
if opts.outtmpl is not None:
|
||||
@@ -592,7 +597,7 @@ def _real_main(argv=None):
|
||||
'allsubtitles': opts.allsubtitles,
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
'subtitleslangs': opts.subtitleslang,
|
||||
'subtitleslangs': opts.subtitleslangs,
|
||||
'matchtitle': decodeOption(opts.matchtitle),
|
||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
||||
'max_downloads': opts.max_downloads,
|
||||
@@ -607,7 +612,7 @@ def _real_main(argv=None):
|
||||
})
|
||||
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||
try:
|
||||
sp = subprocess.Popen(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
@@ -616,14 +621,14 @@ def _real_main(argv=None):
|
||||
out, err = sp.communicate()
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n')
|
||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||
except:
|
||||
try:
|
||||
sys.exc_clear()
|
||||
except:
|
||||
pass
|
||||
sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
|
||||
sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
|
||||
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
|
||||
write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
|
||||
|
||||
ydl.add_default_info_extractors()
|
||||
|
||||
|
@@ -6,6 +6,7 @@ from .arte import ArteTvIE
|
||||
from .auengine import AUEngineIE
|
||||
from .bandcamp import BandcampIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .c56 import C56IE
|
||||
@@ -18,15 +19,26 @@ from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .cspan import CSpanIE
|
||||
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
|
||||
from .daum import DaumIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .escapist import EscapistIE
|
||||
from .exfm import ExfmIE
|
||||
from .facebook import FacebookIE
|
||||
from .fktv import (
|
||||
FKTVIE,
|
||||
FKTVPosteckeIE,
|
||||
)
|
||||
from .flickr import FlickrIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
FranceTvInfoIE,
|
||||
)
|
||||
from .freesound import FreesoundIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .gamespot import GameSpotIE
|
||||
@@ -46,18 +58,22 @@ from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .kankan import KankanIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mit import TechTVMITIE, MITIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mtv import MTVIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import NBCNewsIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .pbs import PBSIE
|
||||
@@ -71,8 +87,10 @@ from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .sohu import SohuIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
||||
from .southparkstudios import SouthParkStudiosIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .statigram import StatigramIE
|
||||
@@ -87,10 +105,12 @@ from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .unistra import UnistraIE
|
||||
from .ustream import UstreamIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
from .vice import ViceIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .vimeo import VimeoIE, VimeoChannelIE
|
||||
from .vine import VineIE
|
||||
|
@@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
for fn,fdata in data['files'].items()
|
||||
if 'Video' in fdata['format']]
|
||||
formats.sort(key=lambda fdata: fdata['file_size'])
|
||||
for f in formats:
|
||||
f['ext'] = determine_ext(f['url'])
|
||||
|
||||
info = {
|
||||
'_type': 'video',
|
||||
@@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
info['thumbnail'] = thumbnail
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = determine_ext(formats[-1]['url'])
|
||||
info.update(formats[-1])
|
||||
|
||||
return info
|
||||
return info
|
||||
|
27
youtube_dl/extractor/bloomberg.py
Normal file
27
youtube_dl/extractor/bloomberg.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
|
||||
},
|
||||
u'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
ooyala_url = self._og_search_video_url(webpage)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canalc2IE(InfoExtractor):
|
||||
_IE_NAME = 'canalc2.tv'
|
||||
IE_NAME = 'canalc2.tv'
|
||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
|
||||
|
||||
_TEST = {
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
@@ -5,24 +6,29 @@ from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||
IE_NAME = u'canalplus.fr'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861',
|
||||
u'file': u'889861.flv',
|
||||
u'md5': u'590a888158b5f0d6832f84001fbf3e99',
|
||||
u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
||||
u'file': u'922470.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Le Petit Journal 20/06/13 - La guerre des drone',
|
||||
u'upload_date': u'20130620',
|
||||
u'title': u'Zapping - 26/08/13',
|
||||
u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
|
||||
u'upload_date': u'20130826',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
u'skip': u'Requires rtmpdump'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, mobj.group('path'))
|
||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||
info_page = self._download_webpage(info_url,video_id,
|
||||
u'Downloading video info')
|
||||
@@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
||||
'thumbnail': media.find('IMAGES/GRAND').text,
|
||||
'description': infos.find('DESCRIPTION').text,
|
||||
'view_count': int(infos.find('NB_VUES').text),
|
||||
}
|
||||
|
@@ -3,18 +3,22 @@ import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
get_element_by_attribute,
|
||||
get_element_by_id,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class DailymotionIE(InfoExtractor):
|
||||
|
||||
class DailymotionIE(SubtitlesInfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
|
||||
IE_NAME = u'dailymotion'
|
||||
_TEST = {
|
||||
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
||||
@@ -33,6 +37,7 @@ class DailymotionIE(InfoExtractor):
|
||||
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
||||
|
||||
video_extension = 'mp4'
|
||||
url = 'http://www.dailymotion.com/video/%s' % video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url)
|
||||
@@ -55,7 +60,8 @@ class DailymotionIE(InfoExtractor):
|
||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
u'Downloading embed page')
|
||||
info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
|
||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE)
|
||||
info = json.loads(info)
|
||||
|
||||
# TODO: support choosing qualities
|
||||
@@ -71,6 +77,12 @@ class DailymotionIE(InfoExtractor):
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
video_url = info[max_quality]
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id)
|
||||
return
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
@@ -78,9 +90,25 @@ class DailymotionIE(InfoExtractor):
|
||||
'upload_date': video_upload_date,
|
||||
'title': self._og_search_title(webpage),
|
||||
'ext': video_extension,
|
||||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url']
|
||||
}]
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
try:
|
||||
sub_list = self._download_webpage(
|
||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
||||
video_id, note=False)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
|
||||
return {}
|
||||
info = json.loads(sub_list)
|
||||
if (info['total'] > 0):
|
||||
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
|
||||
return sub_lang_list
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
||||
|
||||
class DailymotionPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
|
||||
|
74
youtube_dl/extractor/daum.py
Normal file
74
youtube_dl/extractor/daum.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class DaumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||
IE_NAME = u'daum.net'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||
u'file': u'52554690.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
u'upload_date': u'20130831',
|
||||
u'duration': 3868,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||
webpage = self._download_webpage(canonical_url, video_id)
|
||||
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
||||
webpage, u'full id')
|
||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||
info_xml = self._download_webpage(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
u'Downloading video info')
|
||||
urls_xml = self._download_webpage(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||
video_id, u'Downloading video formats info')
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
||||
|
||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||
formats = []
|
||||
for format_el in urls.findall('result/output_list/output_list'):
|
||||
profile = format_el.attrib['profile']
|
||||
format_query = compat_urllib_parse.urlencode({
|
||||
'vid': full_id,
|
||||
'profile': profile,
|
||||
})
|
||||
url_xml = self._download_webpage(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||
video_id, note=False)
|
||||
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
|
||||
format_url = url_doc.find('result/url').text
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_url),
|
||||
'format_id': profile,
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': info.find('TITLE').text,
|
||||
'formats': formats,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': info.find('CONTENTS').text,
|
||||
'duration': int(info.find('DURATION').text),
|
||||
'upload_date': info.find('REGDTTM').text[:8],
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
39
youtube_dl/extractor/defense.py
Normal file
39
youtube_dl/extractor/defense.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DefenseGouvFrIE(InfoExtractor):
|
||||
_IE_NAME = 'defense.gouv.fr'
|
||||
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
|
||||
r'ligthboxvideo/base-de-medias/webtv/(.*)')
|
||||
|
||||
_TEST = {
|
||||
u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
|
||||
u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
|
||||
u'file': u'11213.mp4',
|
||||
u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
|
||||
"info_dict": {
|
||||
"title": "attaque-chimique-syrienne-du-21-aout-2013-1"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
video_id = self._search_regex(
|
||||
r"flashvars.pvg_id=\"(\d+)\";",
|
||||
webpage, 'ID')
|
||||
|
||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||
+ video_id)
|
||||
info = self._download_webpage(json_url, title,
|
||||
'Downloading JSON config')
|
||||
video_url = json.loads(info)['renditions'][0]['url']
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
}
|
@@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor):
|
||||
'width': int(fe.find('./width').text),
|
||||
'height': int(fe.find('./height').text),
|
||||
'url': fe.find('./url').text,
|
||||
'ext': determine_ext(fe.find('./url').text),
|
||||
'filesize': int(fe.find('./filesize').text),
|
||||
'video_bitrate': int(fe.find('./videoBitrate').text),
|
||||
'3sat_qualityname': fe.find('./quality').text,
|
||||
@@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = determine_ext(formats[-1]['url'])
|
||||
info.update(formats[-1])
|
||||
|
||||
return info
|
||||
return info
|
||||
|
37
youtube_dl/extractor/ebaumsworld.py
Normal file
37
youtube_dl/extractor/ebaumsworld.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class EbaumsWorldIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
|
||||
u'file': u'83367677.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'A Giant Python Opens The Door',
|
||||
u'description': u'This is how nightmares start...',
|
||||
u'uploader': u'jihadpizza',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
config_xml = self._download_webpage(
|
||||
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
video_url = config.find('file').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': config.find('title').text,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'description': config.find('description').text,
|
||||
'thumbnail': config.find('image').text,
|
||||
'uploader': config.find('username').text,
|
||||
}
|
79
youtube_dl/extractor/fktv.py
Normal file
79
youtube_dl/extractor/fktv.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import re
|
||||
import random
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class FKTVIE(InfoExtractor):
|
||||
IE_NAME = u'fernsehkritik.tv'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://fernsehkritik.tv/folge-1',
|
||||
u'file': u'00011.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Folge 1 vom 10. April 2007',
|
||||
u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = int(mobj.group('ep'))
|
||||
|
||||
server = random.randint(2, 4)
|
||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
|
||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
|
||||
episode)
|
||||
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
||||
u'playlist', flags=re.DOTALL)
|
||||
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
||||
# TODO: return a single multipart video
|
||||
videos = []
|
||||
for i, _ in enumerate(files, 1):
|
||||
video_id = '%04d%d' % (episode, i)
|
||||
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
||||
video_title = 'Fernsehkritik %d.%d' % (episode, i)
|
||||
videos.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
|
||||
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
|
||||
'thumbnail': video_thumbnail
|
||||
})
|
||||
return videos
|
||||
|
||||
|
||||
class FKTVPosteckeIE(InfoExtractor):
|
||||
IE_NAME = u'fernsehkritik.tv:postecke'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
||||
_TEST = {
|
||||
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
|
||||
u'file': u'0120.flv',
|
||||
u'md5': u'262f0adbac80317412f7e57b4808e5c4',
|
||||
u'info_dict': {
|
||||
u"title": u"Postecke 120"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = int(mobj.group('ep'))
|
||||
|
||||
server = random.randint(2, 4)
|
||||
video_id = '%04d' % episode
|
||||
video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
|
||||
video_title = 'Postecke %d' % episode
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'title': video_title,
|
||||
}
|
67
youtube_dl/extractor/francetv.py
Normal file
67
youtube_dl/extractor/francetv.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_video(self, video_id):
|
||||
xml_desc = self._download_webpage(
|
||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||
'getInfosOeuvre.php?id-diffusion='
|
||||
+ video_id, video_id, 'Downloading XML config')
|
||||
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
|
||||
|
||||
manifest_url = info.find('videos/video/url').text
|
||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||
video_url = video_url.replace('/z/', '/i/')
|
||||
thumbnail_path = info.find('image').text
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': info.find('titre').text,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
|
||||
|
||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = u'pluzz.francetv.fr'
|
||||
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
||||
|
||||
# Can't use tests, videos expire in 7 days
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion="(\d+)"', webpage, 'ID')
|
||||
return self._extract_video(video_id)
|
||||
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = u'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
u'file': u'84981923.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Soir 3',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
|
||||
return self._extract_video(video_id)
|
@@ -21,7 +21,8 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
|
||||
video_url = self._search_regex(
|
||||
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
||||
webpage, u'video URL', flags=re.DOTALL)
|
||||
|
||||
info = {
|
||||
|
@@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor):
|
||||
u"file": u"6410818.mp4",
|
||||
u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
|
||||
u"info_dict": {
|
||||
u"title": u"Arma III - Community Guide: SITREP I",
|
||||
u"title": u"Arma 3 - Community Guide: SITREP I",
|
||||
u"upload_date": u"20130627",
|
||||
}
|
||||
}
|
||||
|
@@ -109,6 +109,11 @@ class GenericIE(InfoExtractor):
|
||||
return new_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
|
||||
try:
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url:
|
||||
@@ -153,7 +158,7 @@ class GenericIE(InfoExtractor):
|
||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
@@ -162,9 +167,9 @@ class GenericIE(InfoExtractor):
|
||||
if mobj.group(1) is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
video_url = mobj.group(1)
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = os.path.basename(video_url)
|
||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||
|
||||
# here's a fun little line of code for you:
|
||||
video_extension = os.path.splitext(video_id)[1][1:]
|
||||
|
@@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor):
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Extract update date
|
||||
upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
|
||||
upload_date = self._html_search_regex(
|
||||
['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'],
|
||||
webpage, u'upload date', fatal=False)
|
||||
if upload_date:
|
||||
# Convert timestring to a format suitable for filename
|
||||
|
@@ -7,11 +7,11 @@ from .common import InfoExtractor
|
||||
class HotNewHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
|
||||
_TEST = {
|
||||
u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'",
|
||||
u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",
|
||||
u'file': u'1435540.mp3',
|
||||
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
u'info_dict': {
|
||||
u"title": u"Freddie Gibbs Songs - Lay It Down"
|
||||
u"title": u"Freddie Gibbs - Lay It Down"
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -19,8 +19,7 @@ class HowcastIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'http://www.howcast.com/videos/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
|
37
youtube_dl/extractor/kickstarter.py
Normal file
37
youtube_dl/extractor/kickstarter.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KickStarterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
|
||||
_TEST = {
|
||||
u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
|
||||
u"file": u"1404461844.mp4",
|
||||
u"md5": u"c81addca81327ffa66c642b5d8b08cab",
|
||||
u"info_dict": {
|
||||
u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
webpage_src = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(r'data-video="(.*?)">',
|
||||
webpage_src, u'video URL')
|
||||
if 'mp4' in video_url:
|
||||
ext = 'mp4'
|
||||
else:
|
||||
ext = 'flv'
|
||||
video_title = self._html_search_regex(r"<title>(.*?)</title>",
|
||||
webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
|
||||
|
||||
results = [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'ext': ext,
|
||||
}]
|
||||
return results
|
@@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
|
||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
|
||||
description = self._og_search_description(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, u'uploader nickname', fatal=False)
|
||||
|
||||
return {
|
||||
|
55
youtube_dl/extractor/metacritic.py
Normal file
55
youtube_dl/extractor/metacritic.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
import operator
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MetacriticIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
||||
u'file': u'3698222.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
|
||||
u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
||||
u'duration': 221,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
# The xml is not well formatted, there are raw '&'
|
||||
info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
|
||||
video_id, u'Downloading info xml').replace('&', '&')
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
|
||||
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
|
||||
formats = []
|
||||
for videoFile in clip.findall('httpURI/videoFile'):
|
||||
rate_str = videoFile.find('rate').text
|
||||
video_url = videoFile.find('filePath').text
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': rate_str,
|
||||
'rate': int(rate_str),
|
||||
})
|
||||
formats.sort(key=operator.itemgetter('rate'))
|
||||
|
||||
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
||||
webpage, u'description', flags=re.DOTALL)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': clip.find('title').text,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'duration': int(clip.find('duration').text),
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
@@ -5,34 +5,27 @@ import socket
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MixcloudIE(InfoExtractor):
|
||||
_WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
|
||||
IE_NAME = u'mixcloud'
|
||||
|
||||
def report_download_json(self, file_id):
|
||||
"""Report JSON download."""
|
||||
self.to_screen(u'Downloading json')
|
||||
|
||||
def get_urls(self, jsonData, fmt, bitrate='best'):
|
||||
"""Get urls from 'audio_formats' section in json"""
|
||||
try:
|
||||
bitrate_list = jsonData[fmt]
|
||||
if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
|
||||
bitrate = max(bitrate_list) # select highest
|
||||
|
||||
url_list = jsonData[fmt][bitrate]
|
||||
except TypeError: # we have no bitrate info.
|
||||
url_list = jsonData[fmt]
|
||||
return url_list
|
||||
_TEST = {
|
||||
u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||
u'file': u'dholbach-cryptkeeper.mp3',
|
||||
u'info_dict': {
|
||||
u'title': u'Cryptkeeper',
|
||||
u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||
u'uploader': u'Daniel Holbach',
|
||||
u'uploader_id': u'dholbach',
|
||||
u'upload_date': u'20111115',
|
||||
},
|
||||
}
|
||||
|
||||
def check_urls(self, url_list):
|
||||
"""Returns 1st active url from list"""
|
||||
@@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
return None
|
||||
|
||||
def _print_formats(self, formats):
|
||||
print('Available formats:')
|
||||
for fmt in formats.keys():
|
||||
for b in formats[fmt]:
|
||||
try:
|
||||
ext = formats[fmt][b][0]
|
||||
print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
|
||||
except TypeError: # we have no bitrate info
|
||||
ext = formats[fmt][0]
|
||||
print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
# extract uploader & filename from url
|
||||
uploader = mobj.group(1).decode('utf-8')
|
||||
file_id = uploader + "-" + mobj.group(2).decode('utf-8')
|
||||
|
||||
# construct API request
|
||||
file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
|
||||
# retrieve .json file with links to files
|
||||
request = compat_urllib_request.Request(file_url)
|
||||
try:
|
||||
self.report_download_json(file_url)
|
||||
jsonData = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
|
||||
uploader = mobj.group(1)
|
||||
cloudcast_name = mobj.group(2)
|
||||
track_id = '-'.join((uploader, cloudcast_name))
|
||||
api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
json_data = self._download_webpage(api_url, track_id,
|
||||
u'Downloading cloudcast info')
|
||||
info = json.loads(json_data)
|
||||
|
||||
# parse JSON
|
||||
json_data = json.loads(jsonData)
|
||||
player_url = json_data['player_swf_url']
|
||||
formats = dict(json_data['audio_formats'])
|
||||
preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
|
||||
song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
|
||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||
final_song_url = self.check_urls(template_url % i for i in range(30))
|
||||
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats(formats)
|
||||
return
|
||||
|
||||
if req_format is None or req_format == 'best':
|
||||
for format_param in formats.keys():
|
||||
url_list = self.get_urls(formats, format_param)
|
||||
# check urls
|
||||
file_url = self.check_urls(url_list)
|
||||
if file_url is not None:
|
||||
break # got it!
|
||||
else:
|
||||
if req_format not in formats:
|
||||
raise ExtractorError(u'Format is not available')
|
||||
|
||||
url_list = self.get_urls(formats, req_format)
|
||||
file_url = self.check_urls(url_list)
|
||||
format_param = req_format
|
||||
|
||||
return [{
|
||||
'id': file_id.decode('utf-8'),
|
||||
'url': file_url.decode('utf-8'),
|
||||
'uploader': uploader.decode('utf-8'),
|
||||
'upload_date': None,
|
||||
'title': json_data['name'],
|
||||
'ext': file_url.split('.')[-1].decode('utf-8'),
|
||||
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
||||
'thumbnail': json_data['thumbnail_url'],
|
||||
'description': json_data['description'],
|
||||
'player_url': player_url.decode('utf-8'),
|
||||
}]
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': info['name'],
|
||||
'url': final_song_url,
|
||||
'ext': 'mp3',
|
||||
'description': info['description'],
|
||||
'thumbnail': info['pictures'].get('extra_large'),
|
||||
'uploader': info['user']['name'],
|
||||
'uploader_id': info['user']['username'],
|
||||
'upload_date': unified_strdate(info['created_time']),
|
||||
'view_count': info['play_count'],
|
||||
}
|
||||
|
73
youtube_dl/extractor/naver.py
Normal file
73
youtube_dl/extractor/naver.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class NaverIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tvcast.naver.com/v/81652',
|
||||
u'file': u'81652.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
|
||||
u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||
u'upload_date': u'20130903',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
webpage)
|
||||
if m_id is None:
|
||||
raise ExtractorError(u'couldn\'t extract vid and key')
|
||||
vid = m_id.group(1)
|
||||
key = m_id.group(2)
|
||||
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
|
||||
query_urls = compat_urllib_parse.urlencode({
|
||||
'masterVid': vid,
|
||||
'protocol': 'p2p',
|
||||
'inKey': key,
|
||||
})
|
||||
info_xml = self._download_webpage(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||
video_id, u'Downloading video info')
|
||||
urls_xml = self._download_webpage(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||
video_id, u'Downloading video formats info')
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
||||
|
||||
formats = []
|
||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||
domain = format_el.find('Domain').text
|
||||
if domain.startswith('rtmp'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': domain + format_el.find('uri').text,
|
||||
'ext': 'mp4',
|
||||
'width': int(format_el.find('width').text),
|
||||
'height': int(format_el.find('height').text),
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': info.find('Subject').text,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'upload_date': info.find('WriteDate').text.replace('.', ''),
|
||||
'view_count': int(info.find('PlayCount').text),
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
38
youtube_dl/extractor/newgrounds.py
Normal file
38
youtube_dl/extractor/newgrounds.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class NewgroundsIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.newgrounds.com/audio/listen/549479',
|
||||
u'file': u'549479.mp3',
|
||||
u'md5': u'fe6033d297591288fa1c1f780386f07a',
|
||||
u'info_dict': {
|
||||
u"title": u"B7 - BusMode",
|
||||
u"uploader": u"Burn7",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
music_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, music_id)
|
||||
|
||||
title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title')
|
||||
uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader')
|
||||
|
||||
music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}'
|
||||
music_url_json = json.loads(music_url_json_string)
|
||||
music_url = music_url_json['url']
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'title': title,
|
||||
'url': music_url,
|
||||
'uploader': uploader,
|
||||
'ext': determine_ext(music_url),
|
||||
}
|
@@ -18,11 +18,15 @@ class OoyalaIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _url_for_embed_code(embed_code):
|
||||
return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
return {'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
'title': unescapeHTML(info['title']),
|
||||
'url': info['url'],
|
||||
'url': info.get('ipad_url') or info['url'],
|
||||
'description': unescapeHTML(more_info['description']),
|
||||
'thumbnail': more_info['promo'],
|
||||
}
|
||||
@@ -35,7 +39,9 @@ class OoyalaIE(InfoExtractor):
|
||||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
||||
player, u'mobile player url')
|
||||
mobile_player = self._download_webpage(mobile_url, embedCode)
|
||||
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
|
||||
videos_info = self._search_regex(
|
||||
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
|
||||
mobile_player, u'info').replace('\\"','"')
|
||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
|
||||
videos_info = json.loads(videos_info)
|
||||
videos_more_info =json.loads(videos_more_info)
|
||||
|
@@ -14,19 +14,6 @@ from ..utils import (
|
||||
class ORFIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
|
||||
u'file': u'6566957.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Wetter',
|
||||
u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
|
||||
},
|
||||
u'params': {
|
||||
# It uses rtmp
|
||||
u'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
47
youtube_dl/extractor/slideshare.py
Normal file
47
youtube_dl/extractor/slideshare.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class SlideshareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||
u'file': u'25665706.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Managing Scale and Complexity',
|
||||
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
slideshare_obj = self._search_regex(
|
||||
r'var slideshare_object = ({.*?}); var user_info =',
|
||||
webpage, u'slideshare object')
|
||||
info = json.loads(slideshare_obj)
|
||||
if info['slideshow']['type'] != u'video':
|
||||
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||
|
||||
doc = info['doc']
|
||||
bucket = info['jsplayer']['video_bucket']
|
||||
ext = info['jsplayer']['video_extension']
|
||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': info['slideshow']['id'],
|
||||
'title': info['slideshow']['title'],
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
'thumbnail': info['slideshow']['pin_image_url'],
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
@@ -8,7 +8,7 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
|
||||
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||
@@ -21,8 +21,11 @@ class SohuIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
def _fetch_data(vid_id):
|
||||
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||
def _fetch_data(vid_id, mytv=False):
|
||||
if mytv:
|
||||
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
|
||||
else:
|
||||
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||
data_url = base_data_url + str(vid_id)
|
||||
data_json = self._download_webpage(
|
||||
data_url, video_id,
|
||||
@@ -31,15 +34,16 @@ class SohuIE(InfoExtractor):
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
mytv = mobj.group('mytv') is not None
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
|
||||
webpage, u'video title')
|
||||
title = raw_title.partition('-')[0].strip()
|
||||
|
||||
vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
|
||||
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
|
||||
u'video path')
|
||||
data = _fetch_data(vid)
|
||||
data = _fetch_data(vid, mytv)
|
||||
|
||||
QUALITIES = ('ori', 'super', 'high', 'nor')
|
||||
vid_ids = [data['data'][q + 'Vid']
|
||||
@@ -51,7 +55,7 @@ class SohuIE(InfoExtractor):
|
||||
# For now, we just pick the highest available quality
|
||||
vid_id = vid_ids[-1]
|
||||
|
||||
format_data = data if vid == vid_id else _fetch_data(vid_id)
|
||||
format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
|
||||
part_count = format_data['data']['totalBlocks']
|
||||
allot = format_data['allot']
|
||||
prot = format_data['prot']
|
||||
|
@@ -1,10 +1,12 @@
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
@@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor):
|
||||
def _resolv_url(cls, url):
|
||||
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||
|
||||
def _extract_info_dict(self, info, full_title=None):
|
||||
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
||||
video_id = info['id']
|
||||
name = full_title or video_id
|
||||
self.report_extraction(name)
|
||||
if quiet == False:
|
||||
self.report_extraction(name)
|
||||
|
||||
thumbnail = info['artwork_url']
|
||||
if thumbnail is not None:
|
||||
@@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
'id': info['id'],
|
||||
'title': info['title'],
|
||||
}
|
||||
|
||||
|
||||
class SoundcloudUserIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
|
||||
IE_NAME = u'soundcloud:user'
|
||||
|
||||
# it's in tests/test_playlists.py
|
||||
_TEST = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group('user')
|
||||
|
||||
url = 'http://soundcloud.com/%s/' % uploader
|
||||
resolv_url = self._resolv_url(url)
|
||||
user_json = self._download_webpage(resolv_url, uploader,
|
||||
u'Downloading user info')
|
||||
user = json.loads(user_json)
|
||||
|
||||
tracks = []
|
||||
for i in itertools.count():
|
||||
data = compat_urllib_parse.urlencode({'offset': i*50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
|
||||
response = self._download_webpage(tracks_url, uploader,
|
||||
u'Downloading tracks page %s' % (i+1))
|
||||
new_tracks = json.loads(response)
|
||||
tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
|
||||
if len(new_tracks) < 50:
|
||||
break
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': compat_str(user['id']),
|
||||
'title': user['username'],
|
||||
'entries': tracks,
|
||||
}
|
||||
|
34
youtube_dl/extractor/southparkstudios.py
Normal file
34
youtube_dl/extractor/southparkstudios.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import re
|
||||
|
||||
from .mtv import MTVIE, _media_xml_tag
|
||||
|
||||
|
||||
class SouthParkStudiosIE(MTVIE):
|
||||
IE_NAME = u'southparkstudios.com'
|
||||
_VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Bat Daded',
|
||||
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||
},
|
||||
}
|
||||
|
||||
# Overwrite MTVIE properties we don't want
|
||||
_TESTS = []
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||
return itemdoc.find(search_path).attrib['url']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
|
||||
webpage, u'mgid')
|
||||
return self._get_videos_info(mgid)
|
91
youtube_dl/extractor/subtitles.py
Normal file
91
youtube_dl/extractor/subtitles.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class SubtitlesInfoExtractor(InfoExtractor):
|
||||
@property
|
||||
def _have_to_download_any_subtitles(self):
|
||||
return any([self._downloader.params.get('writesubtitles', False),
|
||||
self._downloader.params.get('writeautomaticsub')])
|
||||
|
||||
def _list_available_subtitles(self, video_id, webpage=None):
|
||||
""" outputs the available subtitles for the video """
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
|
||||
sub_lang = ",".join(list(sub_lang_list.keys()))
|
||||
self.to_screen(u'%s: Available subtitles for video: %s' %
|
||||
(video_id, sub_lang))
|
||||
auto_lang = ",".join(auto_captions_list.keys())
|
||||
self.to_screen(u'%s: Available automatic captions for video: %s' %
|
||||
(video_id, auto_lang))
|
||||
|
||||
def extract_subtitles(self, video_id, video_webpage=None):
|
||||
"""
|
||||
returns {sub_lang: sub} ,{} if subtitles not found or None if the
|
||||
subtitles aren't requested.
|
||||
"""
|
||||
if not self._have_to_download_any_subtitles:
|
||||
return None
|
||||
available_subs_list = {}
|
||||
if self._downloader.params.get('writeautomaticsub', False):
|
||||
available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
|
||||
if self._downloader.params.get('writesubtitles', False):
|
||||
available_subs_list.update(self._get_available_subtitles(video_id))
|
||||
|
||||
if not available_subs_list: # error, it didn't get the available subtitles
|
||||
return {}
|
||||
if self._downloader.params.get('allsubtitles', False):
|
||||
sub_lang_list = available_subs_list
|
||||
else:
|
||||
if self._downloader.params.get('subtitleslangs', False):
|
||||
requested_langs = self._downloader.params.get('subtitleslangs')
|
||||
elif 'en' in available_subs_list:
|
||||
requested_langs = ['en']
|
||||
else:
|
||||
requested_langs = [list(available_subs_list.keys())[0]]
|
||||
|
||||
sub_lang_list = {}
|
||||
for sub_lang in requested_langs:
|
||||
if not sub_lang in available_subs_list:
|
||||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
|
||||
continue
|
||||
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
|
||||
|
||||
subtitles = {}
|
||||
for sub_lang, url in sub_lang_list.items():
|
||||
subtitle = self._request_subtitle_url(sub_lang, url)
|
||||
if subtitle:
|
||||
subtitles[sub_lang] = subtitle
|
||||
return subtitles
|
||||
|
||||
def _request_subtitle_url(self, sub_lang, url):
|
||||
""" makes the http request for the subtitle """
|
||||
try:
|
||||
sub = self._download_webpage(url, None, note=False)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
if not sub:
|
||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||
return
|
||||
return sub
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
"""
|
||||
returns {sub_lang: url} or {} if not available
|
||||
Must be redefined by the subclasses
|
||||
"""
|
||||
pass
|
||||
|
||||
def _get_available_automatic_caption(self, video_id, webpage):
|
||||
"""
|
||||
returns {sub_lang: url} or {} if not available
|
||||
Must be redefined by the subclasses that support automatic captions,
|
||||
otherwise it will return {}
|
||||
"""
|
||||
self._downloader.report_warning(u'Automatic Captions not supported by this server')
|
||||
return {}
|
@@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor):
|
||||
{
|
||||
'format': fnode.text,
|
||||
'url': video_url_template % fnode.text,
|
||||
'ext': fnode.text.partition('-')[0]
|
||||
}
|
||||
|
||||
for fnode in format_doc.findall('./formats/format')
|
||||
@@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = formats[-1]['format'].partition('-')[0]
|
||||
info.update(formats[-1])
|
||||
|
||||
return info
|
||||
|
@@ -1,6 +1,11 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
get_meta_content,
|
||||
)
|
||||
|
||||
|
||||
class UstreamIE(InfoExtractor):
|
||||
@@ -43,3 +48,25 @@ class UstreamIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
return info
|
||||
|
||||
class UstreamChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
||||
IE_NAME = u'ustream:channel'
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
slug = m.group('slug')
|
||||
webpage = self._download_webpage(url, slug)
|
||||
channel_id = get_meta_content('ustream:channel_id', webpage)
|
||||
|
||||
BASE = 'http://www.ustream.tv'
|
||||
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
|
||||
video_ids = []
|
||||
while next_url:
|
||||
reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
|
||||
video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
|
||||
next_url = reply['nextUrl']
|
||||
|
||||
urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
|
||||
url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
|
||||
return self.playlist_result(url_entries, channel_id)
|
||||
|
56
youtube_dl/extractor/veehd.py
Normal file
56
youtube_dl/extractor/veehd.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
class VeeHDIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://veehd.com/video/4686958',
|
||||
u'file': u'4686958.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Time Lapse View from Space ( ISS)',
|
||||
u'uploader_id': u'spotted',
|
||||
u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
|
||||
webpage, u'player path')
|
||||
player_url = compat_urlparse.urljoin(url, player_path)
|
||||
player_page = self._download_webpage(player_url, video_id,
|
||||
u'Downloading player page')
|
||||
config_json = self._search_regex(r'value=\'config=({.+?})\'',
|
||||
player_page, u'config json')
|
||||
config = json.loads(config_json)
|
||||
|
||||
video_url = compat_urlparse.unquote(config['clip']['url'])
|
||||
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
|
||||
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
|
||||
webpage, u'uploader')
|
||||
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
|
||||
webpage, u'thumbnail')
|
||||
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
|
||||
webpage, u'description', flags=re.DOTALL)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
38
youtube_dl/extractor/vice.py
Normal file
38
youtube_dl/extractor/vice.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ViceIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
||||
u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
},
|
||||
u'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
try:
|
||||
ooyala_url = self._og_search_video_url(webpage)
|
||||
except ExtractorError:
|
||||
try:
|
||||
embed_code = self._search_regex(
|
||||
r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
|
||||
u'ooyala embed code')
|
||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
||||
except ExtractorError:
|
||||
raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
@@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
|
||||
u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://player.vimeo.com/video/54469442',
|
||||
u'file': u'54469442.mp4',
|
||||
u'md5': u'619b811a4417aa4abe78dc653becf511',
|
||||
u'note': u'Videos that embed the url in the player page',
|
||||
u'info_dict': {
|
||||
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
|
||||
u'uploader': u'The BLN & Business of Software',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self):
|
||||
@@ -112,7 +122,8 @@ class VimeoIE(InfoExtractor):
|
||||
|
||||
# Extract the config JSON
|
||||
try:
|
||||
config = webpage.split(' = {config:')[1].split(',assets:')[0]
|
||||
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
|
||||
webpage, u'info section', flags=re.DOTALL)
|
||||
config = json.loads(config)
|
||||
except:
|
||||
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
||||
@@ -132,12 +143,22 @@ class VimeoIE(InfoExtractor):
|
||||
video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
|
||||
|
||||
# Extract video thumbnail
|
||||
video_thumbnail = config["video"]["thumbnail"]
|
||||
video_thumbnail = config["video"].get("thumbnail")
|
||||
if video_thumbnail is None:
|
||||
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
|
||||
|
||||
# Extract video description
|
||||
video_description = get_element_by_attribute("itemprop", "description", webpage)
|
||||
if video_description: video_description = clean_html(video_description)
|
||||
else: video_description = u''
|
||||
video_description = None
|
||||
try:
|
||||
video_description = get_element_by_attribute("itemprop", "description", webpage)
|
||||
if video_description: video_description = clean_html(video_description)
|
||||
except AssertionError as err:
|
||||
# On some pages like (http://player.vimeo.com/video/54469442) the
|
||||
# html tags are not closed, python 2.6 cannot handle it
|
||||
if err.args[0] == 'we should not get here!':
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
# Extract upload date
|
||||
video_upload_date = None
|
||||
@@ -154,14 +175,15 @@ class VimeoIE(InfoExtractor):
|
||||
# TODO bind to format param
|
||||
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
|
||||
files = { 'hd': [], 'sd': [], 'other': []}
|
||||
config_files = config["video"].get("files") or config["request"].get("files")
|
||||
for codec_name, codec_extension in codecs:
|
||||
if codec_name in config["video"]["files"]:
|
||||
if 'hd' in config["video"]["files"][codec_name]:
|
||||
if codec_name in config_files:
|
||||
if 'hd' in config_files[codec_name]:
|
||||
files['hd'].append((codec_name, codec_extension, 'hd'))
|
||||
elif 'sd' in config["video"]["files"][codec_name]:
|
||||
elif 'sd' in config_files[codec_name]:
|
||||
files['sd'].append((codec_name, codec_extension, 'sd'))
|
||||
else:
|
||||
files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
|
||||
files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
|
||||
|
||||
for quality in ('hd', 'sd', 'other'):
|
||||
if len(files[quality]) > 0:
|
||||
@@ -173,8 +195,12 @@ class VimeoIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError(u'No known codec found')
|
||||
|
||||
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
|
||||
%(video_id, sig, timestamp, video_quality, video_codec.upper())
|
||||
video_url = None
|
||||
if isinstance(config_files[video_codec], dict):
|
||||
video_url = config_files[video_codec][video_quality].get("url")
|
||||
if video_url is None:
|
||||
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
|
||||
%(video_id, sig, timestamp, video_quality, video_codec.upper())
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
|
@@ -11,8 +11,8 @@ from ..utils import (
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
"""Information Extractor for xHamster"""
|
||||
_VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_TESTS = [{
|
||||
u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
u'file': u'1509445.flv',
|
||||
u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa',
|
||||
@@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor):
|
||||
u"uploader_id": u"Ruseful2011",
|
||||
u"title": u"FemaleAgent Shy beauty takes the bait"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
u'file': u'2221348.flv',
|
||||
u'md5': u'e767b9475de189320f691f49c679c4c7',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130914",
|
||||
u"uploader_id": u"jojo747400",
|
||||
u"title": u"Britney Spears Sexy Booty"
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
|
||||
seo = mobj.group('seo')
|
||||
mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
|
||||
webpage = self._download_webpage(mrss_url, video_id)
|
||||
|
||||
mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
|
||||
|
@@ -5,8 +5,10 @@ import netrc
|
||||
import re
|
||||
import socket
|
||||
import itertools
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_parse_qs,
|
||||
@@ -130,13 +132,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return
|
||||
self._confirm_age()
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
IE_DESC = u'YouTube.com'
|
||||
_VALID_URL = r"""^
|
||||
(
|
||||
(?:https?://)? # http(s):// (optional)
|
||||
(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
|
||||
tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
|
||||
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
|
||||
tube\.majestyc\.net/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
(?: # the various things that can precede the ID:
|
||||
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
||||
@@ -146,15 +150,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
v=
|
||||
)
|
||||
)? # optional -> youtube.com/xxxx is OK
|
||||
))
|
||||
|youtu\.be/ # just youtu.be/xxxx
|
||||
)
|
||||
)? # all until now is optional -> you can pass the naked ID
|
||||
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
|
||||
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
||||
(?(1).+)? # if we found the ID, everything can follow
|
||||
$"""
|
||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||
# Listed in order of quality
|
||||
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
|
||||
'95', '94', '93', '92', '132', '151',
|
||||
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
|
||||
# Apple HTTP Live Streaming
|
||||
'96', '95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
# Dash video
|
||||
@@ -163,8 +170,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Dash audio
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
|
||||
'95', '94', '93', '92', '132', '151',
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
|
||||
# Apple HTTP Live Streaming
|
||||
'96', '95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '102', '84', '101', '83', '100', '82',
|
||||
# Dash video
|
||||
'138', '248', '137', '247', '136', '246', '245',
|
||||
@@ -172,11 +181,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Dash audio
|
||||
'172', '141', '171', '140', '139',
|
||||
]
|
||||
_video_formats_map = {
|
||||
'flv': ['35', '34', '6', '5'],
|
||||
'3gp': ['36', '17', '13'],
|
||||
'mp4': ['38', '37', '22', '18'],
|
||||
'webm': ['46', '45', '44', '43'],
|
||||
}
|
||||
_video_extensions = {
|
||||
'13': '3gp',
|
||||
'17': 'mp4',
|
||||
'17': '3gp',
|
||||
'18': 'mp4',
|
||||
'22': 'mp4',
|
||||
'36': '3gp',
|
||||
'37': 'mp4',
|
||||
'38': 'mp4',
|
||||
'43': 'webm',
|
||||
@@ -193,7 +209,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'101': 'webm',
|
||||
'102': 'webm',
|
||||
|
||||
# videos that use m3u8
|
||||
# Apple HTTP Live Streaming
|
||||
'92': 'mp4',
|
||||
'93': 'mp4',
|
||||
'94': 'mp4',
|
||||
@@ -234,6 +250,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'22': '720x1280',
|
||||
'34': '360x640',
|
||||
'35': '480x854',
|
||||
'36': '240x320',
|
||||
'37': '1080x1920',
|
||||
'38': '3072x4096',
|
||||
'43': '360x640',
|
||||
@@ -373,7 +390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
|
||||
if YoutubePlaylistIE.suitable(url): return False
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def report_video_webpage_download(self, video_id):
|
||||
@@ -384,19 +401,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
"""Report attempt to download video info webpage."""
|
||||
self.to_screen(u'%s: Downloading video info webpage' % video_id)
|
||||
|
||||
def report_video_subtitles_download(self, video_id):
|
||||
"""Report attempt to download video info webpage."""
|
||||
self.to_screen(u'%s: Checking available subtitles' % video_id)
|
||||
|
||||
def report_video_subtitles_request(self, video_id, sub_lang, format):
|
||||
"""Report attempt to download video info webpage."""
|
||||
self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
|
||||
|
||||
def report_video_subtitles_available(self, video_id, sub_lang_list):
|
||||
"""Report available subtitles."""
|
||||
sub_lang = ",".join(list(sub_lang_list.keys()))
|
||||
self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
|
||||
|
||||
def report_information_extraction(self, video_id):
|
||||
"""Report attempt to extract video information."""
|
||||
self.to_screen(u'%s: Extracting video information' % video_id)
|
||||
@@ -412,8 +416,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def _decrypt_signature(self, s):
|
||||
"""Turn the encrypted s field into a working signature"""
|
||||
|
||||
if len(s) == 92:
|
||||
if len(s) == 93:
|
||||
return s[86:29:-1] + s[88] + s[28:5:-1]
|
||||
elif len(s) == 92:
|
||||
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
|
||||
elif len(s) == 91:
|
||||
return s[84:27:-1] + s[86] + s[26:5:-1]
|
||||
elif len(s) == 90:
|
||||
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
|
||||
elif len(s) == 89:
|
||||
@@ -423,15 +431,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif len(s) == 87:
|
||||
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
|
||||
elif len(s) == 86:
|
||||
return s[83:36:-1] + s[0] + s[35:2:-1]
|
||||
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
|
||||
elif len(s) == 85:
|
||||
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
||||
return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
|
||||
elif len(s) == 84:
|
||||
return s[81:36:-1] + s[0] + s[35:2:-1]
|
||||
elif len(s) == 83:
|
||||
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
||||
elif len(s) == 82:
|
||||
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
|
||||
return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
|
||||
elif len(s) == 81:
|
||||
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
|
||||
elif len(s) == 80:
|
||||
@@ -451,56 +459,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Fallback to the other algortihms
|
||||
return self._decrypt_signature(s)
|
||||
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
self.report_video_subtitles_download(video_id)
|
||||
request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
|
||||
try:
|
||||
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
sub_list = self._download_webpage(
|
||||
'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||
video_id, note=False)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
|
||||
return {}
|
||||
sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
||||
sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
|
||||
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
||||
|
||||
sub_lang_list = {}
|
||||
for l in lang_list:
|
||||
lang = l[1]
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': lang,
|
||||
'v': video_id,
|
||||
'fmt': self._downloader.params.get('subtitlesformat'),
|
||||
})
|
||||
url = u'http://www.youtube.com/api/timedtext?' + params
|
||||
sub_lang_list[lang] = url
|
||||
if not sub_lang_list:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
return sub_lang_list
|
||||
|
||||
def _list_available_subtitles(self, video_id):
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
self.report_video_subtitles_available(video_id, sub_lang_list)
|
||||
|
||||
def _request_subtitle(self, sub_lang, sub_name, video_id, format):
|
||||
"""
|
||||
Return the subtitle as a string or None if they are not found
|
||||
"""
|
||||
self.report_video_subtitles_request(video_id, sub_lang, format)
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': sub_lang,
|
||||
'name': sub_name,
|
||||
'v': video_id,
|
||||
'fmt': format,
|
||||
})
|
||||
url = 'http://www.youtube.com/api/timedtext?' + params
|
||||
try:
|
||||
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
if not sub:
|
||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||
return
|
||||
return sub
|
||||
|
||||
def _request_automatic_caption(self, video_id, webpage):
|
||||
def _get_available_automatic_caption(self, video_id, webpage):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
self.to_screen(u'%s: Looking for automatic captions' % video_id)
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
||||
err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
|
||||
err_msg = u'Couldn\'t find automatic captions for %s' % video_id
|
||||
if mobj is None:
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
@@ -509,53 +499,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
args = player_config[u'args']
|
||||
caption_url = args[u'ttsurl']
|
||||
timestamp = args[u'timestamp']
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': 'en',
|
||||
'tlang': sub_lang,
|
||||
'fmt': sub_format,
|
||||
'ts': timestamp,
|
||||
'kind': 'asr',
|
||||
# We get the available subtitles
|
||||
list_params = compat_urllib_parse.urlencode({
|
||||
'type': 'list',
|
||||
'tlangs': 1,
|
||||
'asrs': 1,
|
||||
})
|
||||
subtitles_url = caption_url + '&' + params
|
||||
sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
|
||||
return {sub_lang: sub}
|
||||
list_url = caption_url + '&' + list_params
|
||||
list_page = self._download_webpage(list_url, video_id)
|
||||
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
|
||||
original_lang_node = caption_list.find('track')
|
||||
if original_lang_node.attrib.get('kind') != 'asr' :
|
||||
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
||||
return {}
|
||||
original_lang = original_lang_node.attrib['lang_code']
|
||||
|
||||
sub_lang_list = {}
|
||||
for lang_node in caption_list.findall('target'):
|
||||
sub_lang = lang_node.attrib['lang_code']
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': original_lang,
|
||||
'tlang': sub_lang,
|
||||
'fmt': sub_format,
|
||||
'ts': timestamp,
|
||||
'kind': 'asr',
|
||||
})
|
||||
sub_lang_list[sub_lang] = caption_url + '&' + params
|
||||
return sub_lang_list
|
||||
# An extractor error can be raise by the download process if there are
|
||||
# no automatic captions but there are subtitles
|
||||
except (KeyError, ExtractorError):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
def _extract_subtitles(self, video_id):
|
||||
"""
|
||||
Return a dictionary: {language: subtitles} or {} if the subtitles
|
||||
couldn't be found
|
||||
"""
|
||||
available_subs_list = self._get_available_subtitles(video_id)
|
||||
sub_format = self._downloader.params.get('subtitlesformat')
|
||||
if not available_subs_list: #There was some error, it didn't get the available subtitles
|
||||
return {}
|
||||
if self._downloader.params.get('allsubtitles', False):
|
||||
sub_lang_list = available_subs_list
|
||||
else:
|
||||
if self._downloader.params.get('subtitleslangs', False):
|
||||
reqested_langs = self._downloader.params.get('subtitleslangs')
|
||||
elif 'en' in available_subs_list:
|
||||
reqested_langs = ['en']
|
||||
else:
|
||||
reqested_langs = [list(available_subs_list.keys())[0]]
|
||||
|
||||
sub_lang_list = {}
|
||||
for sub_lang in reqested_langs:
|
||||
if not sub_lang in available_subs_list:
|
||||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
|
||||
continue
|
||||
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
|
||||
subtitles = {}
|
||||
for sub_lang in sub_lang_list:
|
||||
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
|
||||
if subtitle:
|
||||
subtitles[sub_lang] = subtitle
|
||||
return subtitles
|
||||
|
||||
def _print_formats(self, formats):
|
||||
print('Available formats:')
|
||||
@@ -597,13 +572,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
else:
|
||||
# Specific formats. We pick the first in a slash-delimeted sequence.
|
||||
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
||||
# Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
|
||||
# available in the specified format. For example,
|
||||
# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
||||
# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
|
||||
# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
|
||||
req_formats = req_format.split('/')
|
||||
video_url_list = None
|
||||
for rf in req_formats:
|
||||
if rf in url_map:
|
||||
video_url_list = [(rf, url_map[rf])]
|
||||
break
|
||||
if rf in self._video_formats_map:
|
||||
for srf in self._video_formats_map[rf]:
|
||||
if srf in url_map:
|
||||
video_url_list = [(srf, url_map[srf])]
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
if video_url_list is None:
|
||||
raise ExtractorError(u'requested format not available')
|
||||
return video_url_list
|
||||
@@ -743,15 +730,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_description = u''
|
||||
|
||||
# subtitles
|
||||
video_subtitles = None
|
||||
|
||||
if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
|
||||
video_subtitles = self._extract_subtitles(video_id)
|
||||
elif self._downloader.params.get('writeautomaticsub', False):
|
||||
video_subtitles = self._request_automatic_caption(video_id, video_webpage)
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id)
|
||||
self._list_available_subtitles(video_id, video_webpage)
|
||||
return
|
||||
|
||||
if 'length_seconds' not in video_info:
|
||||
@@ -805,10 +787,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if self._downloader.params.get('verbose'):
|
||||
s = url_data['s'][0]
|
||||
if age_gate:
|
||||
player_version = self._search_regex(r'ad3-(.+?)\.swf',
|
||||
video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
|
||||
'flash player', fatal=False)
|
||||
player = 'flash player %s' % player_version
|
||||
player = 'flash player'
|
||||
else:
|
||||
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
|
||||
'html5 player', fatal=False)
|
||||
@@ -920,8 +899,11 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
|
||||
for entry in response['feed']['entry']:
|
||||
index = entry['yt$position']['$t']
|
||||
if 'media$group' in entry and 'media$player' in entry['media$group']:
|
||||
videos.append((index, entry['media$group']['media$player']['url']))
|
||||
if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
|
||||
videos.append((
|
||||
index,
|
||||
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
|
||||
))
|
||||
|
||||
videos = [v[1] for v in sorted(videos)]
|
||||
|
||||
@@ -987,13 +969,20 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
|
||||
class YoutubeUserIE(InfoExtractor):
|
||||
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
||||
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
|
||||
_GDATA_PAGE_SIZE = 50
|
||||
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
|
||||
_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
|
||||
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||
IE_NAME = u'youtube:user'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
# Don't return True if the url can be extracted with other youtube
|
||||
# extractor, the regex would is too permissive and it would match.
|
||||
other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
|
||||
if any(ie.suitable(url) for ie in other_ies): return False
|
||||
else: return super(YoutubeUserIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract username
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -1016,13 +1005,18 @@ class YoutubeUserIE(InfoExtractor):
|
||||
page = self._download_webpage(gdata_url, username,
|
||||
u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
|
||||
|
||||
try:
|
||||
response = json.loads(page)
|
||||
except ValueError as err:
|
||||
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
|
||||
if 'entry' not in response['feed']:
|
||||
# Number of videos is a multiple of self._MAX_RESULTS
|
||||
break
|
||||
|
||||
# Extract video identifiers
|
||||
ids_in_page = []
|
||||
|
||||
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
|
||||
if mobj.group(1) not in ids_in_page:
|
||||
ids_in_page.append(mobj.group(1))
|
||||
|
||||
for entry in response['feed']['entry']:
|
||||
ids_in_page.append(entry['id']['$t'].split('/')[-1])
|
||||
video_ids.extend(ids_in_page)
|
||||
|
||||
# A little optimization - if current page is not
|
||||
@@ -1161,7 +1155,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = u'youtube:favorites'
|
||||
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
|
||||
_LOGIN_REQUIRED = True
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -249,7 +249,17 @@ def htmlentity_transform(matchobj):
|
||||
return (u'&%s;' % entity)
|
||||
|
||||
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
|
||||
class AttrParser(compat_html_parser.HTMLParser):
|
||||
class BaseHTMLParser(compat_html_parser.HTMLParser):
|
||||
def __init(self):
|
||||
compat_html_parser.HTMLParser.__init__(self)
|
||||
self.html = None
|
||||
|
||||
def loads(self, html):
|
||||
self.html = html
|
||||
self.feed(html)
|
||||
self.close()
|
||||
|
||||
class AttrParser(BaseHTMLParser):
|
||||
"""Modified HTMLParser that isolates a tag with the specified attribute"""
|
||||
def __init__(self, attribute, value):
|
||||
self.attribute = attribute
|
||||
@@ -257,10 +267,9 @@ class AttrParser(compat_html_parser.HTMLParser):
|
||||
self.result = None
|
||||
self.started = False
|
||||
self.depth = {}
|
||||
self.html = None
|
||||
self.watch_startpos = False
|
||||
self.error_count = 0
|
||||
compat_html_parser.HTMLParser.__init__(self)
|
||||
BaseHTMLParser.__init__(self)
|
||||
|
||||
def error(self, message):
|
||||
if self.error_count > 10 or self.started:
|
||||
@@ -269,11 +278,6 @@ class AttrParser(compat_html_parser.HTMLParser):
|
||||
self.error_count += 1
|
||||
self.goahead(1)
|
||||
|
||||
def loads(self, html):
|
||||
self.html = html
|
||||
self.feed(html)
|
||||
self.close()
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
attrs = dict(attrs)
|
||||
if self.started:
|
||||
@@ -334,6 +338,38 @@ def get_element_by_attribute(attribute, value, html):
|
||||
pass
|
||||
return parser.get_result()
|
||||
|
||||
class MetaParser(BaseHTMLParser):
|
||||
"""
|
||||
Modified HTMLParser that isolates a meta tag with the specified name
|
||||
attribute.
|
||||
"""
|
||||
def __init__(self, name):
|
||||
BaseHTMLParser.__init__(self)
|
||||
self.name = name
|
||||
self.content = None
|
||||
self.result = None
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag != 'meta':
|
||||
return
|
||||
attrs = dict(attrs)
|
||||
if attrs.get('name') == self.name:
|
||||
self.result = attrs.get('content')
|
||||
|
||||
def get_result(self):
|
||||
return self.result
|
||||
|
||||
def get_meta_content(name, html):
|
||||
"""
|
||||
Return the content attribute from the meta tag with the given name attribute.
|
||||
"""
|
||||
parser = MetaParser(name)
|
||||
try:
|
||||
parser.loads(html)
|
||||
except compat_html_parser.HTMLParseError:
|
||||
pass
|
||||
return parser.get_result()
|
||||
|
||||
|
||||
def clean_html(html):
|
||||
"""Clean an HTML snippet into a readable string"""
|
||||
@@ -664,7 +700,16 @@ def unified_strdate(date_str):
|
||||
date_str = date_str.replace(',',' ')
|
||||
# %z (UTC offset) is only supported in python>=3.2
|
||||
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
|
||||
format_expressions = [
|
||||
'%d %B %Y',
|
||||
'%B %d %Y',
|
||||
'%b %d %Y',
|
||||
'%Y-%m-%d',
|
||||
'%d/%m/%Y',
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
'%d.%m.%Y %H:%M',
|
||||
'%Y-%m-%dT%H:%M:%SZ',
|
||||
]
|
||||
for expression in format_expressions:
|
||||
try:
|
||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||
@@ -745,6 +790,18 @@ def platform_name():
|
||||
return res
|
||||
|
||||
|
||||
def write_string(s, out=None):
|
||||
if out is None:
|
||||
out = sys.stderr
|
||||
assert type(s) == type(u'')
|
||||
|
||||
if ('b' in getattr(out, 'mode', '') or
|
||||
sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
|
||||
s = s.encode(preferredencoding(), 'ignore')
|
||||
out.write(s)
|
||||
out.flush()
|
||||
|
||||
|
||||
def bytes_to_intlist(bs):
|
||||
if not bs:
|
||||
return []
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.08.29'
|
||||
__version__ = '2013.09.20.1'
|
||||
|
Reference in New Issue
Block a user