Compare commits
105 Commits
2016.05.21
...
2016.06.03
Author | SHA1 | Date | |
---|---|---|---|
|
762d44c956 | ||
|
4d8856d511 | ||
|
c917106be4 | ||
|
76e9cd7f24 | ||
|
bf4c6a38e1 | ||
|
7f3c3dfa52 | ||
|
9c3c447eb3 | ||
|
ad73083ff0 | ||
|
1e8b59243f | ||
|
c88270271e | ||
|
b96f007eeb | ||
|
9a4aec8b7e | ||
|
54fb199681 | ||
|
8c32e5dc32 | ||
|
0ea590076f | ||
|
4a684895c0 | ||
|
f4e4aa9b6b | ||
|
5e3856a2c5 | ||
|
6e6b9f600f | ||
|
6a1df4fb5f | ||
|
dde1ce7c06 | ||
|
811586ebcf | ||
|
0ff3749bfe | ||
|
28bab13348 | ||
|
877032314f | ||
|
8ec2b2c41c | ||
|
197a5da1d0 | ||
|
abbb2938fa | ||
|
f657b1a5f2 | ||
|
86a52881c6 | ||
|
8267423652 | ||
|
917a3196f8 | ||
|
56bd028a0f | ||
|
681b923b5c | ||
|
9ed6d8c6c5 | ||
|
f3fb420b82 | ||
|
165e3561e9 | ||
|
27f17c0eab | ||
|
44c8892369 | ||
|
f574103d7c | ||
|
6d138e98e3 | ||
|
2a329110b9 | ||
|
2bee7b25f3 | ||
|
92cf872a48 | ||
|
6461f2b7ec | ||
|
807cf7b07f | ||
|
de7d76af52 | ||
|
11c70deba7 | ||
|
f36532404d | ||
|
77b8b4e696 | ||
|
2615fa7584 | ||
|
fac2af3c51 | ||
|
6f8cb24219 | ||
|
448bb5f333 | ||
|
293c255688 | ||
|
ac88d2316e | ||
|
5950cb1d6d | ||
|
761052db92 | ||
|
240b60453e | ||
|
85b0fe7d64 | ||
|
0a5685b26f | ||
|
6f748df43f | ||
|
b410cb83d4 | ||
|
da9d82840a | ||
|
4ee0b8afdb | ||
|
1de32771e1 | ||
|
688c634b7d | ||
|
0d6ee97508 | ||
|
6b43132ce9 | ||
|
a4690b3244 | ||
|
444417edb5 | ||
|
277c7465f5 | ||
|
25bcd3550e | ||
|
a4760d204f | ||
|
e8593f346a | ||
|
05b651e3a5 | ||
|
42a7439717 | ||
|
b1e9ebd080 | ||
|
0c50eeb987 | ||
|
4b464a6a78 | ||
|
5db9df622f | ||
|
5181759c0d | ||
|
e54373204a | ||
|
102810ef04 | ||
|
78d3b3e213 | ||
|
7a46542f97 | ||
|
eb7941e3e6 | ||
|
db3b8b2103 | ||
|
c5f5155100 | ||
|
4a12077855 | ||
|
a4a7c44bd3 | ||
|
70346165fe | ||
|
c776b99691 | ||
|
e9297256d4 | ||
|
e5871c672b | ||
|
9b06b0fb92 | ||
|
4f3a25c2b4 | ||
|
21a19aa94d | ||
|
c6b9cf05e1 | ||
|
4d8819d249 | ||
|
898f4b49cc | ||
|
0150a00f33 | ||
|
c8831015f4 | ||
|
92d221ad48 | ||
|
0db9a05f88 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.03**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.05.21.2
|
||||
[debug] youtube-dl version 2016.06.03
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
6
.gitignore
vendored
6
.gitignore
vendored
@@ -28,12 +28,16 @@ updates_key.pem
|
||||
*.mp4
|
||||
*.m4a
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
test/local_parameters.json
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
|
||||
# IntelliJ related files
|
||||
.idea
|
||||
.idea/*
|
||||
*.iml
|
||||
|
||||
tmp/
|
||||
|
@@ -14,7 +14,6 @@ script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
- phihag@phihag.de
|
||||
- yasoob.khld@gmail.com
|
||||
# irc:
|
||||
# channels:
|
||||
|
4
Makefile
4
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@@ -69,7 +69,7 @@ README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
||||
youtube-dl.1: README.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
|
||||
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
||||
rm -f youtube-dl.1.temp.md
|
||||
|
||||
|
29
README.md
29
README.md
@@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget:
|
||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
||||
|
||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
||||
|
||||
@@ -73,8 +73,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: Do not read the user
|
||||
in the global configuration file
|
||||
/etc/youtube-dl.conf: Do not read the user
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
@@ -162,7 +162,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
(experimental)
|
||||
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT Maximum download rate in bytes per second
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
@@ -256,11 +256,12 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
permanently. By default
|
||||
$XDG_CACHE_HOME/youtube-dl or
|
||||
~/.cache/youtube-dl . At the moment, only
|
||||
YouTube player files (for videos with
|
||||
obfuscated signatures) are cached, but that
|
||||
may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
@@ -433,7 +434,7 @@ You can use `--ignore-config` if you want to disable the configuration file for
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
@@ -693,6 +694,10 @@ hash -r
|
||||
|
||||
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
|
||||
|
||||
### youtube-dl is extremely slow to start on Windows
|
||||
|
||||
Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
|
||||
|
||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
||||
|
||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||
@@ -780,9 +785,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
|
||||
|
||||
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||
|
||||
### The exe throws a *Runtime error from Visual C++*
|
||||
### The exe throws an error due to missing `MSVCR100.dll`
|
||||
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
|
||||
|
||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||
|
||||
|
@@ -1,17 +1,42 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from socketserver import ThreadingMixIn
|
||||
import argparse
|
||||
import ctypes
|
||||
import functools
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import traceback
|
||||
import os.path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
|
||||
from youtube_dl.compat import (
|
||||
compat_http_server,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
class BuildHTTPServer(ThreadingMixIn, HTTPServer):
|
||||
# These are not used outside of buildserver.py thus not in compat.py
|
||||
|
||||
try:
|
||||
import winreg as compat_winreg
|
||||
except ImportError: # Python 2
|
||||
import _winreg as compat_winreg
|
||||
|
||||
try:
|
||||
import socketserver as compat_socketserver
|
||||
except ImportError: # Python 2
|
||||
import SocketServer as compat_socketserver
|
||||
|
||||
try:
|
||||
compat_input = raw_input
|
||||
except NameError: # Python 3
|
||||
compat_input = input
|
||||
|
||||
|
||||
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
|
||||
allow_reuse_address = True
|
||||
|
||||
|
||||
@@ -191,7 +216,7 @@ def main(args=None):
|
||||
action='store_const', dest='action', const='service',
|
||||
help='Run as a Windows service')
|
||||
parser.add_argument('-b', '--bind', metavar='<host:port>',
|
||||
action='store', default='localhost:8142',
|
||||
action='store', default='0.0.0.0:8142',
|
||||
help='Bind to host:port (default %default)')
|
||||
options = parser.parse_args(args=args)
|
||||
|
||||
@@ -216,7 +241,7 @@ def main(args=None):
|
||||
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
|
||||
thr = threading.Thread(target=srv.serve_forever)
|
||||
thr.start()
|
||||
input('Press ENTER to shut down')
|
||||
compat_input('Press ENTER to shut down')
|
||||
srv.shutdown()
|
||||
thr.join()
|
||||
|
||||
@@ -231,8 +256,6 @@ def rmtree(path):
|
||||
os.remove(fname)
|
||||
os.rmdir(path)
|
||||
|
||||
#==============================================================================
|
||||
|
||||
|
||||
class BuildError(Exception):
|
||||
def __init__(self, output, code=500):
|
||||
@@ -249,15 +272,25 @@ class HTTPError(BuildError):
|
||||
|
||||
class PythonBuilder(object):
|
||||
def __init__(self, **kwargs):
|
||||
pythonVersion = kwargs.pop('python', '2.7')
|
||||
try:
|
||||
key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
|
||||
python_version = kwargs.pop('python', '3.4')
|
||||
python_path = None
|
||||
for node in ('Wow6432Node\\', ''):
|
||||
try:
|
||||
self.pythonPath, _ = _winreg.QueryValueEx(key, '')
|
||||
finally:
|
||||
_winreg.CloseKey(key)
|
||||
except Exception:
|
||||
raise BuildError('No such Python version: %s' % pythonVersion)
|
||||
key = compat_winreg.OpenKey(
|
||||
compat_winreg.HKEY_LOCAL_MACHINE,
|
||||
r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
|
||||
try:
|
||||
python_path, _ = compat_winreg.QueryValueEx(key, '')
|
||||
finally:
|
||||
compat_winreg.CloseKey(key)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not python_path:
|
||||
raise BuildError('No such Python version: %s' % python_version)
|
||||
|
||||
self.pythonPath = python_path
|
||||
|
||||
super(PythonBuilder, self).__init__(**kwargs)
|
||||
|
||||
@@ -305,8 +338,10 @@ class YoutubeDLBuilder(object):
|
||||
|
||||
def build(self):
|
||||
try:
|
||||
subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
cwd=self.buildPath)
|
||||
proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
|
||||
proc.wait()
|
||||
#subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
# cwd=self.buildPath)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise BuildError(e.output)
|
||||
|
||||
@@ -369,12 +404,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
|
||||
pass
|
||||
|
||||
|
||||
class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
|
||||
|
||||
def do_GET(self):
|
||||
path = urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
|
||||
path = compat_urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
|
||||
action, _, path = path.path.strip('/').partition('/')
|
||||
if path:
|
||||
path = path.split('/')
|
||||
@@ -388,7 +423,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
builder.close()
|
||||
except BuildError as e:
|
||||
self.send_response(e.code)
|
||||
msg = unicode(e).encode('UTF-8')
|
||||
msg = compat_str(e).encode('UTF-8')
|
||||
self.send_header('Content-Type', 'text/plain; charset=UTF-8')
|
||||
self.send_header('Content-Length', len(msg))
|
||||
self.end_headers()
|
||||
@@ -400,7 +435,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
else:
|
||||
self.send_response(500, 'Malformed URL')
|
||||
|
||||
#==============================================================================
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -1,13 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import optparse
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.error('Expected an output filename')
|
||||
|
||||
outfile, = args
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
readme = filter_options(readme)
|
||||
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(readme)
|
||||
|
||||
|
||||
def filter_options(readme):
|
||||
ret = ''
|
||||
@@ -37,27 +70,5 @@ def filter_options(readme):
|
||||
|
||||
return ret
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
readme = filter_options(readme)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
print(readme.encode('utf-8'))
|
||||
else:
|
||||
print(readme)
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -6,7 +6,7 @@
|
||||
# * the git config user.signingkey is properly set
|
||||
|
||||
# You will need
|
||||
# pip install coverage nose rsa
|
||||
# pip install coverage nose rsa wheel
|
||||
|
||||
# TODO
|
||||
# release notes
|
||||
@@ -15,10 +15,28 @@
|
||||
set -e
|
||||
|
||||
skip_tests=true
|
||||
if [ "$1" = '--run-tests' ]; then
|
||||
skip_tests=false
|
||||
shift
|
||||
fi
|
||||
buildserver='localhost:8142'
|
||||
|
||||
while true
|
||||
do
|
||||
case "$1" in
|
||||
--run-tests)
|
||||
skip_tests=false
|
||||
shift
|
||||
;;
|
||||
--buildserver)
|
||||
buildserver="$2"
|
||||
shift 2
|
||||
;;
|
||||
--*)
|
||||
echo "ERROR: unknown option $1"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
||||
version="$1"
|
||||
@@ -35,6 +53,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us
|
||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
|
||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make clean
|
||||
@@ -66,7 +85,7 @@ git push origin "$version"
|
||||
REV=$(git rev-parse HEAD)
|
||||
make youtube-dl youtube-dl.tar.gz
|
||||
read -p "VM running? (y/n) " -n 1
|
||||
wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
mkdir -p "build/$version"
|
||||
mv youtube-dl youtube-dl.exe "build/$version"
|
||||
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
|
||||
|
@@ -55,6 +55,7 @@
|
||||
- **arte.tv:future**
|
||||
- **arte.tv:info**
|
||||
- **arte.tv:magazine**
|
||||
- **arte.tv:playlist**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **AudiMedia**
|
||||
@@ -136,6 +137,7 @@
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
@@ -205,6 +207,7 @@
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
@@ -326,8 +329,8 @@
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
- **Libsyn**
|
||||
- **life**: Life.ru
|
||||
- **life:embed**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
@@ -336,6 +339,7 @@
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
- **loc**: Library of Congress
|
||||
- **LocalNews8**
|
||||
- **LoveHomePorn**
|
||||
- **lrt.lt**
|
||||
@@ -512,6 +516,8 @@
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
- **radiobremen**
|
||||
- **radiocanada**
|
||||
- **RadioCanadaAudioVideo**
|
||||
- **radiofrance**
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
@@ -521,8 +527,10 @@
|
||||
- **RedTube**
|
||||
- **RegioTV**
|
||||
- **Restudy**
|
||||
- **Reuters**
|
||||
- **ReverbNation**
|
||||
- **Revision3**
|
||||
- **revision**
|
||||
- **revision3:embed**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RottenTomatoes**
|
||||
@@ -561,6 +569,7 @@
|
||||
- **ScreencastOMatic**
|
||||
- **ScreenJunkies**
|
||||
- **ScreenwaveMedia**
|
||||
- **Seeker**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
- **ServingSys**
|
||||
@@ -682,8 +691,8 @@
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **tvp.pl**
|
||||
- **tvp.pl:Series**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
@@ -766,7 +775,8 @@
|
||||
- **VuClip**
|
||||
- **vulture.com**
|
||||
- **Walla**
|
||||
- **WashingtonPost**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
- **wat.tv**
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
|
@@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
|
||||
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
|
||||
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
|
||||
|
||||
def test_compat_etree_fromstring_doctype(self):
|
||||
xml = '''<?xml version="1.0"?>
|
||||
<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
|
||||
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
|
||||
compat_etree_fromstring(xml)
|
||||
|
||||
def test_struct_unpack(self):
|
||||
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
|
||||
|
||||
|
@@ -16,6 +16,15 @@ import threading
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
@@ -31,6 +40,22 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
||||
elif self.path == '/302':
|
||||
if sys.version_info[0] == 3:
|
||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
return
|
||||
|
||||
new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
|
||||
self.send_response(302)
|
||||
self.send_header(b'Location', new_url.encode('utf-8'))
|
||||
self.end_headers()
|
||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
||||
else:
|
||||
assert False
|
||||
|
||||
@@ -47,18 +72,32 @@ class FakeLogger(object):
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def test_unicode_path_redirection(self):
|
||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
||||
if sys.version_info[0] == 3:
|
||||
return
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
r = ydl.extract_info('http://localhost:%d/302' % self.port)
|
||||
self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
class TestHTTPS(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
if os.name == 'java':
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = self.httpd.socket.sock
|
||||
else:
|
||||
sock = self.httpd.socket
|
||||
self.port = sock.getsockname()[1]
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
@@ -94,14 +133,14 @@ class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('normal'))
|
||||
self.port = self.proxy.socket.getsockname()[1]
|
||||
self.port = http_server_port(self.proxy)
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.cn_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('cn'))
|
||||
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||
self.cn_port = http_server_port(self.cn_proxy)
|
||||
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||
self.cn_proxy_thread.daemon = True
|
||||
self.cn_proxy_thread.start()
|
||||
|
@@ -157,8 +157,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertTrue(sanitize_filename(':', restricted=True) != '')
|
||||
|
||||
self.assertEqual(sanitize_filename(
|
||||
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True),
|
||||
'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy')
|
||||
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
|
||||
'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
|
||||
|
||||
def test_sanitize_ids(self):
|
||||
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
|
||||
|
@@ -245,13 +245,20 @@ try:
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
|
||||
class _TreeBuilder(etree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
pass
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
compat_etree_fromstring = xml.etree.ElementTree.fromstring
|
||||
def compat_etree_fromstring(text):
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
else:
|
||||
# python 2.x tries to encode unicode strings with ascii (see the
|
||||
# XMLParser._fixtext method)
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
try:
|
||||
_etree_iter = etree.Element.iter
|
||||
except AttributeError: # Python <=2.6
|
||||
@@ -265,7 +272,7 @@ else:
|
||||
# 2.7 source
|
||||
def _XML(text, parser=None):
|
||||
if not parser:
|
||||
parser = etree.XMLParser(target=etree.TreeBuilder())
|
||||
parser = etree.XMLParser(target=_TreeBuilder())
|
||||
parser.feed(text)
|
||||
return parser.close()
|
||||
|
||||
@@ -277,7 +284,7 @@ else:
|
||||
return el
|
||||
|
||||
def compat_etree_fromstring(text):
|
||||
doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
|
||||
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
|
||||
for el in _etree_iter(doc):
|
||||
if el.text is not None and isinstance(el.text, bytes):
|
||||
el.text = el.text.decode('utf-8')
|
||||
|
@@ -319,7 +319,7 @@ class F4mFD(FragmentFD):
|
||||
doc = compat_etree_fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
for f in self._get_unencrypted_media(doc)]
|
||||
if requested_bitrate is None:
|
||||
if requested_bitrate is None or len(formats) == 1:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
|
@@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url)
|
||||
@@ -78,60 +75,6 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
return video_id, lang
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||
ids = (video_id, '')
|
||||
# some pages contain multiple videos (like
|
||||
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||
# so we first try to look for json URLs that contain the video id from
|
||||
# the 'vid' parameter.
|
||||
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'embed url', group='url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
@@ -235,6 +178,74 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return info_dict
|
||||
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||
ids = (video_id, '')
|
||||
# some pages contain multiple videos (like
|
||||
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||
# so we first try to look for json URLs that contain the video id from
|
||||
# the 'vid' parameter.
|
||||
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'embed url', group='url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:creative'
|
||||
@@ -267,7 +278,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:info'
|
||||
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
|
||||
'info_dict': {
|
||||
'id': '067528-000-A',
|
||||
@@ -275,7 +286,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
'title': 'Service civique, un cache misère ?',
|
||||
'upload_date': '20160403',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
@@ -300,6 +311,8 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:ddc'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
if lang == 'folge':
|
||||
@@ -318,7 +331,7 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:concert'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||
'info_dict': {
|
||||
@@ -328,14 +341,14 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
'upload_date': '20140128',
|
||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:cinema'
|
||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://cinema.arte.tv/de/node/38291',
|
||||
'md5': '6b275511a5107c60bacbeeda368c3aa1',
|
||||
'info_dict': {
|
||||
@@ -345,7 +358,7 @@ class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
'upload_date': '20160122',
|
||||
'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVMagazineIE(ArteTVPlus7IE):
|
||||
@@ -390,9 +403,41 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
json_url = mobj.group('json_url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
|
||||
'info_dict': {
|
||||
'id': 'PL-013263',
|
||||
'title': 'Areva & Uramin',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, lang = self._extract_url_info(url)
|
||||
collection = self._download_json(
|
||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||
% (lang, playlist_id), playlist_id)
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
entries = [
|
||||
self._extract_from_json_url(
|
||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
||||
for video in collection['videos'] if video.get('jsonUrl')]
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
@@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '2b68e5851514c20efdff2afc5603b8b4',
|
||||
'md5': '73d0b3171568232574e45652f8720b5c',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'mp3',
|
||||
@@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)[0]
|
||||
track_id = compat_str(data['id'])
|
||||
|
||||
if not data.get('file'):
|
||||
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data['file'].items():
|
||||
@@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(data['id']),
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
|
@@ -1,34 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308313,
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012660,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'timestamp': 1397983878,
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
@@ -36,75 +44,169 @@ class BiliBiliIE(InfoExtractor):
|
||||
'id': '1041170',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'uploader': '枫叶逝去',
|
||||
'timestamp': 1396501299,
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '4808130',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
|
||||
'info_dict': {
|
||||
'id': '4808130_part1',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '926f9f67d0c482091872fbd8eca7ea3d',
|
||||
'info_dict': {
|
||||
'id': '4808130_part2',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '4b7b225b968402d7c32348c646f1fd83',
|
||||
'info_dict': {
|
||||
'id': '4808130_part3',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '7b795e214166501e9141139eea236e91',
|
||||
'info_dict': {
|
||||
'id': '4808130_part4',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||
# the Android client
|
||||
# TODO: find the sign algorithm used in the flash player
|
||||
_APP_KEY = '86385cdc024c0f6c'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
page_num = mobj.group('page_num') or '1'
|
||||
|
||||
view_data = self._download_json(
|
||||
'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
|
||||
video_id)
|
||||
if 'error' in view_data:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cid = view_data['cid']
|
||||
title = unescapeHTML(view_data['title'])
|
||||
params = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))
|
||||
cid = params['cid'][0]
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
|
||||
cid,
|
||||
'Downloading page %s/%s' % (page_num, view_data['pages'])
|
||||
)
|
||||
info_xml_str = self._download_webpage(
|
||||
'http://interface.bilibili.com/v_cdn_play',
|
||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||
note='Downloading video info page')
|
||||
|
||||
if xpath_text(doc, './result') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
|
||||
err_msg = None
|
||||
durls = None
|
||||
info_xml = None
|
||||
try:
|
||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||
err_msg = (info_json or {}).get('error_text')
|
||||
else:
|
||||
err_msg = xpath_text(info_xml, './message')
|
||||
|
||||
if info_xml is not None:
|
||||
durls = info_xml.findall('./durl')
|
||||
if not durls:
|
||||
if err_msg:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||
else:
|
||||
raise ExtractorError('No videos found!')
|
||||
|
||||
entries = []
|
||||
|
||||
for durl in doc.findall('./durl'):
|
||||
for durl in durls:
|
||||
size = xpath_text(durl, ['./filesize', './size'])
|
||||
formats = [{
|
||||
'url': durl.find('./url').text,
|
||||
'filesize': int_or_none(size),
|
||||
'ext': 'flv',
|
||||
}]
|
||||
backup_urls = durl.find('./backup_url')
|
||||
if backup_urls is not None:
|
||||
for backup_url in backup_urls.findall('./url'):
|
||||
formats.append({'url': backup_url.text})
|
||||
formats.reverse()
|
||||
for backup_url in durl.findall('./backup_url/url'):
|
||||
formats.append({
|
||||
'url': backup_url.text,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||
'title': title,
|
||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||
if datetime_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': compat_str(cid),
|
||||
'title': title,
|
||||
'description': view_data.get('description'),
|
||||
'thumbnail': view_data.get('pic'),
|
||||
'uploader': view_data.get('author'),
|
||||
'timestamp': int_or_none(view_data.get('created')),
|
||||
'view_count': int_or_none(view_data.get('play')),
|
||||
'duration': int_or_none(xpath_text(doc, './timelength')),
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
|
||||
if len(entries) == 1:
|
||||
entries[0].update(info)
|
||||
return entries[0]
|
||||
else:
|
||||
info.update({
|
||||
for idx, entry in enumerate(entries):
|
||||
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
})
|
||||
return info
|
||||
}
|
||||
|
@@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||
'info_dict': {
|
||||
'id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
@@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
@@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
@@ -66,11 +68,12 @@ class CBSIE(CBSBaseIE):
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
webpage, 'content id')
|
||||
content_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not content_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
webpage, 'content id')
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
|
@@ -20,54 +20,64 @@ class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'info_dict': {
|
||||
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||
'ext': 'mp4',
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
|
||||
},
|
||||
_TESTS = [{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'info_dict': {
|
||||
'id': 'Events/TechEd/Australia/2013/KOS002',
|
||||
'ext': 'mp4',
|
||||
'title': 'Developer Kick-Off Session: Stuff We Love',
|
||||
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
|
||||
'duration': 4576,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
|
||||
'Mads Kristensen'],
|
||||
},
|
||||
{
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'info_dict': {
|
||||
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'ext': 'mp4',
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'authors': ['Mike Wilmot'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'info_dict': {
|
||||
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'ext': 'mp4',
|
||||
'title': 'Self-service BI with Power BI - nuclear testing',
|
||||
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
|
||||
'duration': 1540,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'authors': ['Mike Wilmot'],
|
||||
},
|
||||
{
|
||||
# low quality mp4 is best
|
||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'info_dict': {
|
||||
'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ranges for the Standard Library',
|
||||
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
|
||||
'duration': 5646,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
}, {
|
||||
# low quality mp4 is best
|
||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'info_dict': {
|
||||
'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ranges for the Standard Library',
|
||||
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
|
||||
'duration': 5646,
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
|
||||
'info_dict': {
|
||||
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
|
||||
'title': 'Channel 9',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||
|
||||
@@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
return self.playlist_result(contents)
|
||||
|
||||
def _extract_list(self, content_path):
|
||||
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
|
||||
def _extract_list(self, video_id, rss_url=None):
|
||||
if not rss_url:
|
||||
rss_url = self._RSS_URL % video_id
|
||||
rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
|
||||
entries = [self.url_result(session_url.text, 'Channel9')
|
||||
for session_url in rss.findall('./channel/item/link')]
|
||||
title_text = rss.find('./channel/title').text
|
||||
return self.playlist_result(entries, content_path, title_text)
|
||||
return self.playlist_result(entries, video_id, title_text)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
content_path = mobj.group('contentpath')
|
||||
rss = mobj.group('rss')
|
||||
|
||||
webpage = self._download_webpage(url, content_path, 'Downloading web page')
|
||||
if rss:
|
||||
return self._extract_list(content_path, url)
|
||||
|
||||
page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
|
||||
if page_type_m is not None:
|
||||
page_type = page_type_m.group('pagetype')
|
||||
webpage = self._download_webpage(
|
||||
url, content_path, 'Downloading web page')
|
||||
|
||||
page_type = self._search_regex(
|
||||
r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
|
||||
webpage, 'page type', default=None, group='pagetype')
|
||||
if page_type:
|
||||
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
||||
return self._extract_entry_item(webpage, content_path)
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
@@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor):
|
||||
return self._extract_list(content_path)
|
||||
else:
|
||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
||||
|
||||
else: # Assuming list
|
||||
return self._extract_list(content_path)
|
||||
|
@@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
@@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
|
@@ -987,7 +987,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
fatal=True, m3u8_id=None):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest',
|
||||
@@ -1001,11 +1001,11 @@ class InfoExtractor(object):
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
|
||||
|
||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
fatal=True, m3u8_id=None):
|
||||
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
||||
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
||||
if akamai_pv is not None and ';' in akamai_pv.text:
|
||||
@@ -1029,9 +1029,26 @@ class InfoExtractor(object):
|
||||
'base URL', default=None)
|
||||
if base_url:
|
||||
base_url = base_url.strip()
|
||||
|
||||
bootstrap_info = xpath_text(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||
'bootstrap info', default=None)
|
||||
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
width = int_or_none(media_el.attrib.get('width'))
|
||||
height = int_or_none(media_el.attrib.get('height'))
|
||||
format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
|
||||
# If <bootstrapInfo> is present, the specified f4m is a
|
||||
# stream-level manifest, and only set-level manifests may refer to
|
||||
# external resources. See section 11.4 and section 4 of F4M spec
|
||||
if bootstrap_info is None:
|
||||
media_url = None
|
||||
# @href is introduced in 2.0, see section 11.6 of F4M spec
|
||||
if manifest_version == '2.0':
|
||||
media_url = media_el.attrib.get('href')
|
||||
if media_url is None:
|
||||
media_url = media_el.attrib.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
manifest_url = (
|
||||
@@ -1041,19 +1058,37 @@ class InfoExtractor(object):
|
||||
# since bitrates in parent manifest (this one) and media_url manifest
|
||||
# may differ leading to inability to resolve the format by requested
|
||||
# bitrate in f4m downloader
|
||||
if determine_ext(manifest_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
ext = determine_ext(manifest_url)
|
||||
if ext == 'f4m':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal))
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
# Sometimes stream-level manifest contains single media entry that
|
||||
# does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
|
||||
# At the same time parent's media entry in set-level manifest may
|
||||
# contain it. We will copy it from parent in such cases.
|
||||
if len(f4m_formats) == 1:
|
||||
f = f4m_formats[0]
|
||||
f.update({
|
||||
'tbr': f.get('tbr') or tbr,
|
||||
'width': f.get('width') or width,
|
||||
'height': f.get('height') or height,
|
||||
'format_id': f.get('format_id') if not tbr else format_id,
|
||||
})
|
||||
formats.extend(f4m_formats)
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', preference=preference,
|
||||
m3u8_id=m3u8_id, fatal=fatal))
|
||||
continue
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||
'format_id': format_id,
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'ext': 'flv' if bootstrap_info else None,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(media_el.attrib.get('width')),
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'preference': preference,
|
||||
})
|
||||
return formats
|
||||
|
143
youtube_dl/extractor/coub.py
Normal file
143
youtube_dl/extractor/coub.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class CoubIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://coub.com/view/5u5n1',
|
||||
'info_dict': {
|
||||
'id': '5u5n1',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Matrix Moonwalk',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 4.6,
|
||||
'timestamp': 1428527772,
|
||||
'upload_date': '20150408',
|
||||
'uploader': 'Артём Лоскутников',
|
||||
'uploader_id': 'artyom.loskutnikov',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'coub:5u5n1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# longer video id
|
||||
'url': 'http://coub.com/view/237d5l5h',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
coub = self._download_json(
|
||||
'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
|
||||
|
||||
if coub.get('error'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
|
||||
|
||||
title = coub['title']
|
||||
|
||||
file_versions = coub['file_versions']
|
||||
|
||||
QUALITIES = ('low', 'med', 'high')
|
||||
|
||||
MOBILE = 'mobile'
|
||||
IPHONE = 'iphone'
|
||||
HTML5 = 'html5'
|
||||
|
||||
SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
|
||||
|
||||
quality_key = qualities(QUALITIES)
|
||||
preference_key = qualities(SOURCE_PREFERENCE)
|
||||
|
||||
formats = []
|
||||
|
||||
for kind, items in file_versions.get(HTML5, {}).items():
|
||||
if kind not in ('video', 'audio'):
|
||||
continue
|
||||
if not isinstance(items, dict):
|
||||
continue
|
||||
for quality, item in items.items():
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
item_url = item.get('url')
|
||||
if not item_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': '%s-%s-%s' % (HTML5, kind, quality),
|
||||
'filesize': int_or_none(item.get('size')),
|
||||
'vcodec': 'none' if kind == 'audio' else None,
|
||||
'quality': quality_key(quality),
|
||||
'preference': preference_key(HTML5),
|
||||
})
|
||||
|
||||
iphone_url = file_versions.get(IPHONE, {}).get('url')
|
||||
if iphone_url:
|
||||
formats.append({
|
||||
'url': iphone_url,
|
||||
'format_id': IPHONE,
|
||||
'preference': preference_key(IPHONE),
|
||||
})
|
||||
|
||||
mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
|
||||
if mobile_url:
|
||||
formats.append({
|
||||
'url': mobile_url,
|
||||
'format_id': '%s-audio' % MOBILE,
|
||||
'preference': preference_key(MOBILE),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = coub.get('picture')
|
||||
duration = float_or_none(coub.get('duration'))
|
||||
timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
|
||||
uploader = coub.get('channel', {}).get('title')
|
||||
uploader_id = coub.get('channel', {}).get('permalink')
|
||||
|
||||
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
|
||||
like_count = int_or_none(coub.get('likes_count'))
|
||||
repost_count = int_or_none(coub.get('recoubs_count'))
|
||||
comment_count = int_or_none(coub.get('comments_count'))
|
||||
|
||||
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
|
||||
if age_restricted is not None:
|
||||
age_limit = 18 if age_restricted is True else 0
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@@ -2,13 +2,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
IE_NAME = 'dw'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
|
||||
@@ -31,6 +34,16 @@ class DWIE(InfoExtractor):
|
||||
'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
|
||||
'upload_date': '20160311',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
|
||||
'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
|
||||
'info_dict': {
|
||||
'id': '19274438',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to the 90s – Hip Hop',
|
||||
'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
|
||||
'upload_date': '20160521',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -38,6 +51,7 @@ class DWIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
title = hidden_inputs['media_title']
|
||||
media_id = hidden_inputs.get('media_id') or media_id
|
||||
|
||||
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
||||
formats = self._extract_smil_formats(
|
||||
@@ -49,13 +63,20 @@ class DWIE(InfoExtractor):
|
||||
else:
|
||||
formats = [{'url': hidden_inputs['file_name']}]
|
||||
|
||||
upload_date = hidden_inputs.get('display_date')
|
||||
if not upload_date:
|
||||
upload_date = self._html_search_regex(
|
||||
r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
|
||||
'upload date', default=None)
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': hidden_inputs.get('preview_image'),
|
||||
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
||||
'upload_date': hidden_inputs.get('display_date'),
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@@ -11,8 +11,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
'info_dict': {
|
||||
@@ -23,8 +23,12 @@ class EpornerIE(InfoExtractor):
|
||||
'duration': 1838,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
'info_dict': {
|
||||
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
||||
'ext': 'mp4',
|
||||
@@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
|
||||
'description': None,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
||||
'url': 'http://espn.go.com/video/clip?id=2743663',
|
||||
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
|
||||
'info_dict': {
|
||||
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must-See Moments: Best of the MLS season',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
|
@@ -56,6 +56,7 @@ from .arte import (
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
@@ -143,6 +144,7 @@ from .cnn import (
|
||||
CNNBlogsIE,
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
@@ -231,6 +233,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
@@ -379,6 +382,7 @@ from .leeco import (
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .libraryofcongress import LibraryOfCongressIE
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import (
|
||||
LifeNewsIE,
|
||||
@@ -617,6 +621,10 @@ from .qqmusic import (
|
||||
QQMusicPlaylistIE,
|
||||
)
|
||||
from .r7 import R7IE
|
||||
from .radiocanada import (
|
||||
RadioCanadaIE,
|
||||
RadioCanadaAudioVideoIE,
|
||||
)
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
@@ -630,8 +638,12 @@ from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .revision3 import Revision3IE
|
||||
from .revision3 import (
|
||||
Revision3EmbedIE,
|
||||
Revision3IE,
|
||||
)
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
@@ -670,6 +682,7 @@ from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenjunkies import ScreenJunkiesIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .seeker import SeekerIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
@@ -827,7 +840,10 @@ from .tvc import (
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvp import (
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
)
|
||||
from .tvplay import TVPlayIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
@@ -941,7 +957,10 @@ from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
WashingtonPostArticleIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
from .wdr import (
|
||||
|
64
youtube_dl/extractor/eyedotv.py
Normal file
64
youtube_dl/extractor/eyedotv.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EyedoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
|
||||
'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
|
||||
'info_dict': {
|
||||
'id': '16301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Journée du conseil scientifique de l\'Afnic 2015',
|
||||
'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
|
||||
'uploader': 'Afnic Live',
|
||||
'uploader_id': '8023',
|
||||
}
|
||||
}
|
||||
_ROOT_URL = 'http://live.eyedo.net:1935/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
|
||||
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
|
||||
|
||||
title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
|
||||
state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
|
||||
if state_live_code == 'avenir':
|
||||
raise ExtractorError(
|
||||
'%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
|
||||
expected=True)
|
||||
|
||||
is_live = state_live_code == 'live'
|
||||
m3u8_url = None
|
||||
# http://eyedo.tv/Content/Html5/Scripts/html5view.js
|
||||
if is_live:
|
||||
if xpath_text(video_data, 'Cdn') == 'true':
|
||||
m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
|
||||
'description': xpath_text(video_data, _add_ns('Description')),
|
||||
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
||||
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
||||
'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
|
||||
'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
|
||||
'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
|
||||
}
|
@@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor):
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -62,6 +62,7 @@ from .digiteka import DigitekaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -783,6 +784,19 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||
}
|
||||
},
|
||||
# Another Livestream embed, without 'new.' in URL
|
||||
{
|
||||
'url': 'https://www.freespeech.org/',
|
||||
'info_dict': {
|
||||
'id': '123537347',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# Live stream
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# LazyYT
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
@@ -867,18 +881,6 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||
}
|
||||
},
|
||||
# Kaltura embed
|
||||
{
|
||||
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
|
||||
'info_dict': {
|
||||
'id': '1_eergr3h1',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150226',
|
||||
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
|
||||
'timestamp': int,
|
||||
'title': 'John Carlson Postgame 2/25/15',
|
||||
},
|
||||
},
|
||||
# Kaltura embed (different embed code)
|
||||
{
|
||||
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
|
||||
@@ -904,6 +906,19 @@ class GenericIE(InfoExtractor):
|
||||
'uploader_id': 'echojecka',
|
||||
},
|
||||
},
|
||||
# Kaltura embed with single quotes
|
||||
{
|
||||
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
|
||||
'info_dict': {
|
||||
'id': '0_izeg5utt',
|
||||
'ext': 'mp4',
|
||||
'title': '35871',
|
||||
'timestamp': 1355743100,
|
||||
'upload_date': '20121217',
|
||||
'uploader_id': 'batchUser',
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@@ -1018,14 +1033,18 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
# UDN embed
|
||||
{
|
||||
'url': 'http://www.udn.com/news/story/7314/822787',
|
||||
'url': 'https://video.udn.com/news/300346',
|
||||
'md5': 'fd2060e988c326991037b9aff9df21a6',
|
||||
'info_dict': {
|
||||
'id': '300346',
|
||||
'ext': 'mp4',
|
||||
'title': '中一中男師變性 全校師生力挺',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Ooyala embed
|
||||
{
|
||||
@@ -1193,6 +1212,16 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'Lake8737',
|
||||
}
|
||||
},
|
||||
# Duplicated embedded video URLs
|
||||
{
|
||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||
'info_dict': {
|
||||
'id': '149298443_480_16c25b74_2',
|
||||
'ext': 'mp4',
|
||||
'title': 'vs. Blue Orange Spring Game',
|
||||
'uploader': 'www.hudl.com',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -1499,6 +1528,11 @@ class GenericIE(InfoExtractor):
|
||||
if bc_urls:
|
||||
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
||||
|
||||
# Look for ThePlatform embeds
|
||||
tp_urls = ThePlatformIE._extract_urls(webpage)
|
||||
if tp_urls:
|
||||
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||
@@ -1862,7 +1896,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
|
||||
r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Livestream')
|
||||
@@ -1874,7 +1908,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
|
||||
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result(smuggle_url(
|
||||
@@ -2105,7 +2139,7 @@ class GenericIE(InfoExtractor):
|
||||
raise UnsupportedError(url)
|
||||
|
||||
entries = []
|
||||
for video_url in found:
|
||||
for video_url in orderedSet(found):
|
||||
video_url = unescapeHTML(video_url)
|
||||
video_url = video_url.replace('\\/', '/')
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
@@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
|
||||
'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '42428ce8a00585f9bc36e49226eae7a1',
|
||||
'info_dict': {
|
||||
'id': 'fk6OhWpXgIQ',
|
||||
'ext': 'mp4',
|
||||
@@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
|
||||
'uploader_id': 'groupon',
|
||||
'uploader': 'Groupon',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
_PROVIDERS = {
|
||||
|
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||
'md5': '8b743df908c42f60cf6496586c7f12c3',
|
||||
'md5': '7d45932269a288149483144f01b99789',
|
||||
'info_dict': {
|
||||
'id': '390161',
|
||||
'ext': 'mp4',
|
||||
@@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
|
||||
'duration': 56.823,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
143
youtube_dl/extractor/libraryofcongress.py
Normal file
143
youtube_dl/extractor/libraryofcongress.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
class LibraryOfCongressIE(InfoExtractor):
|
||||
IE_NAME = 'loc'
|
||||
IE_DESC = 'Library of Congress'
|
||||
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# embedded via <div class="media-player"
|
||||
'url': 'http://loc.gov/item/90716351/',
|
||||
'md5': '353917ff7f0255aa6d4b80a034833de8',
|
||||
'info_dict': {
|
||||
'id': '90716351',
|
||||
'ext': 'mp4',
|
||||
'title': "Pa's trip to Mars",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# webcast embedded via mediaObjectId
|
||||
'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578',
|
||||
'info_dict': {
|
||||
'id': '5578',
|
||||
'ext': 'mp4',
|
||||
'title': 'Help! Preservation Training Needs Here, There & Everywhere',
|
||||
'duration': 3765,
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with direct download links
|
||||
'url': 'https://www.loc.gov/item/78710669/',
|
||||
'info_dict': {
|
||||
'id': '78710669',
|
||||
'ext': 'mp4',
|
||||
'title': 'La vie et la passion de Jesus-Christ',
|
||||
'duration': 0,
|
||||
'view_count': int,
|
||||
'formats': 'mincount:4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
media_id = self._search_regex(
|
||||
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
||||
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
||||
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
|
||||
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
|
||||
webpage, 'media id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
||||
video_id)['mediaObject']
|
||||
|
||||
derivative = data['derivatives'][0]
|
||||
media_url = derivative['derivativeUrl']
|
||||
|
||||
title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(
|
||||
webpage)
|
||||
|
||||
# Following algorithm was extracted from setAVSource js function
|
||||
# found in webpage
|
||||
media_url = media_url.replace('rtmp', 'https')
|
||||
|
||||
is_video = data.get('mediaType', 'v').lower() == 'v'
|
||||
ext = determine_ext(media_url)
|
||||
if ext not in ('mp4', 'mp3'):
|
||||
media_url += '.mp4' if is_video else '.mp3'
|
||||
|
||||
if 'vod/mp4:' in media_url:
|
||||
formats = [{
|
||||
'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
|
||||
'format_id': 'hls',
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
'quality': 1,
|
||||
}]
|
||||
elif 'vod/mp3:' in media_url:
|
||||
formats = [{
|
||||
'url': media_url.replace('vod/mp3:', ''),
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
|
||||
download_urls = set()
|
||||
for m in re.finditer(
|
||||
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
|
||||
format_id = m.group('id').lower()
|
||||
if format_id == 'gif':
|
||||
continue
|
||||
download_url = m.group('url')
|
||||
if download_url in download_urls:
|
||||
continue
|
||||
download_urls.add(download_url)
|
||||
formats.append({
|
||||
'url': download_url,
|
||||
'format_id': format_id,
|
||||
'filesize_approx': parse_filesize(m.group('size')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = float_or_none(data.get('duration'))
|
||||
view_count = int_or_none(data.get('viewCount'))
|
||||
|
||||
subtitles = {}
|
||||
cc_url = data.get('ccUrl')
|
||||
if cc_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': cc_url,
|
||||
'ext': 'ttml',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -7,48 +7,53 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class LifeNewsIE(InfoExtractor):
|
||||
IE_NAME = 'lifenews'
|
||||
IE_DESC = 'LIFE | NEWS'
|
||||
_VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
||||
IE_NAME = 'life'
|
||||
IE_DESC = 'Life.ru'
|
||||
_VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single video embedded via video/source
|
||||
'url': 'http://lifenews.ru/news/98736',
|
||||
'url': 'https://life.ru/t/новости/98736',
|
||||
'md5': '77c95eaefaca216e32a76a343ad89d23',
|
||||
'info_dict': {
|
||||
'id': '98736',
|
||||
'ext': 'mp4',
|
||||
'title': 'Мужчина нашел дома архив оборонного завода',
|
||||
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
|
||||
'timestamp': 1344154740,
|
||||
'upload_date': '20120805',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# single video embedded via iframe
|
||||
'url': 'http://lifenews.ru/news/152125',
|
||||
'url': 'https://life.ru/t/новости/152125',
|
||||
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
|
||||
'info_dict': {
|
||||
'id': '152125',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
||||
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
||||
'timestamp': 1427961840,
|
||||
'upload_date': '20150402',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# two videos embedded via iframe
|
||||
'url': 'http://lifenews.ru/news/153461',
|
||||
'url': 'https://life.ru/t/новости/153461',
|
||||
'info_dict': {
|
||||
'id': '153461',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
'timestamp': 1430825520,
|
||||
'view_count': int,
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
||||
@@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'timestamp': 1430825520,
|
||||
'upload_date': '20150505',
|
||||
},
|
||||
}, {
|
||||
@@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'timestamp': 1430825520,
|
||||
'upload_date': '20150505',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/video/13035',
|
||||
'url': 'https://life.ru/t/новости/213035',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
section = mobj.group('section')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://lifenews.ru/%s/%s' % (section, video_id),
|
||||
video_id, 'Downloading page')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_urls = re.findall(
|
||||
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
|
||||
@@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):
|
||||
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage),
|
||||
' - Первый по срочным новостям — LIFE | NEWS')
|
||||
' - Life.ru')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
||||
comment_count = self._html_search_regex(
|
||||
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
|
||||
webpage, 'comment count', fatal=False)
|
||||
r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
|
||||
webpage, 'view count', fatal=False, group='value')
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
|
||||
webpage, 'upload date', fatal=False, group='value'))
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'upload_date': upload_date,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
def make_entry(video_id, video_url, index=None):
|
||||
@@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='m3u8'))
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _extract_stream_info(self, stream_info):
|
||||
broadcast_id = stream_info['broadcast_id']
|
||||
broadcast_id = compat_str(stream_info['broadcast_id'])
|
||||
is_live = stream_info.get('is_live')
|
||||
|
||||
formats = []
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
unsmuggle_url,
|
||||
determine_ext,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
@@ -15,71 +16,80 @@ from ..compat import compat_urllib_parse_urlencode
|
||||
class OoyalaBaseIE(InfoExtractor):
|
||||
_PLAYER_BASE = 'http://player.ooyala.com/'
|
||||
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
|
||||
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?'
|
||||
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
|
||||
|
||||
def _extract(self, content_tree_url, video_id, domain='example.org'):
|
||||
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
|
||||
metadata = content_tree[list(content_tree)[0]]
|
||||
embed_code = metadata['embed_code']
|
||||
pcode = metadata.get('asset_pcode') or embed_code
|
||||
video_info = {
|
||||
'id': embed_code,
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
|
||||
'duration': float_or_none(metadata.get('duration'), 1000),
|
||||
}
|
||||
title = metadata['title']
|
||||
|
||||
auth_data = self._download_json(
|
||||
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
|
||||
compat_urllib_parse_urlencode({
|
||||
'domain': domain,
|
||||
'supportedFormats': 'mp4,rtmp,m3u8,hds',
|
||||
}), video_id)
|
||||
|
||||
cur_auth_data = auth_data['authorization_data'][embed_code]
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
|
||||
auth_data = self._download_json(
|
||||
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
|
||||
compat_urllib_parse_urlencode({
|
||||
'domain': domain,
|
||||
'supportedFormats': supported_format
|
||||
}),
|
||||
video_id, 'Downloading %s JSON' % supported_format)
|
||||
|
||||
cur_auth_data = auth_data['authorization_data'][embed_code]
|
||||
|
||||
if cur_auth_data['authorized']:
|
||||
for stream in cur_auth_data['streams']:
|
||||
url = base64.b64decode(
|
||||
stream['url']['data'].encode('ascii')).decode('utf-8')
|
||||
if url in urls:
|
||||
continue
|
||||
urls.append(url)
|
||||
delivery_type = stream['delivery_type']
|
||||
if delivery_type == 'hls' or '.m3u8' in url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url, embed_code, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif delivery_type == 'hds' or '.f4m' in url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||
elif '.smil' in url:
|
||||
formats.extend(self._extract_smil_formats(
|
||||
url, embed_code, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': stream.get('delivery_type'),
|
||||
'vcodec': stream.get('video_codec'),
|
||||
'format_id': delivery_type,
|
||||
'width': int_or_none(stream.get('width')),
|
||||
'height': int_or_none(stream.get('height')),
|
||||
'abr': int_or_none(stream.get('audio_bitrate')),
|
||||
'vbr': int_or_none(stream.get('video_bitrate')),
|
||||
'fps': float_or_none(stream.get('framerate')),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, cur_auth_data['message']), expected=True)
|
||||
if cur_auth_data['authorized']:
|
||||
for stream in cur_auth_data['streams']:
|
||||
s_url = base64.b64decode(
|
||||
stream['url']['data'].encode('ascii')).decode('utf-8')
|
||||
if s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
ext = determine_ext(s_url, None)
|
||||
delivery_type = stream['delivery_type']
|
||||
if delivery_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
s_url, embed_code, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif delivery_type == 'hds' or ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
s_url, embed_code, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': s_url,
|
||||
'ext': ext or stream.get('delivery_type'),
|
||||
'vcodec': stream.get('video_codec'),
|
||||
'format_id': delivery_type,
|
||||
'width': int_or_none(stream.get('width')),
|
||||
'height': int_or_none(stream.get('height')),
|
||||
'abr': int_or_none(stream.get('audio_bitrate')),
|
||||
'vbr': int_or_none(stream.get('video_bitrate')),
|
||||
'fps': float_or_none(stream.get('framerate')),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, cur_auth_data['message']), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_info['formats'] = formats
|
||||
return video_info
|
||||
subtitles = {}
|
||||
for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles[lang] = [{
|
||||
'url': sub_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': embed_code,
|
||||
'title': title,
|
||||
'description': metadata.get('description'),
|
||||
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
|
||||
'duration': float_or_none(metadata.get('duration'), 1000),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class OoyalaIE(OoyalaBaseIE):
|
||||
|
@@ -2,7 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class PeriscopeIE(InfoExtractor):
|
||||
@@ -42,8 +45,11 @@ class PeriscopeIE(InfoExtractor):
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
|
||||
uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
|
||||
uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
|
||||
user = broadcast_data.get('user', {})
|
||||
|
||||
uploader = broadcast.get('user_display_name') or user.get('display_name')
|
||||
uploader_id = (broadcast.get('username') or user.get('username') or
|
||||
broadcast.get('user_id') or user.get('id'))
|
||||
|
||||
title = '%s - %s' % (uploader, status) if uploader else status
|
||||
state = broadcast.get('state').lower()
|
||||
@@ -92,6 +98,7 @@ class PeriscopeUserIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'LularoeHusbandMike',
|
||||
'title': 'LULAROE HUSBAND MIKE',
|
||||
'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
|
||||
},
|
||||
# Periscope only shows videos in the last 24 hours, so it's possible to
|
||||
# get 0 videos
|
||||
@@ -103,16 +110,19 @@ class PeriscopeUserIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
|
||||
broadcast_data = self._parse_json(self._html_search_meta(
|
||||
'broadcast-data', webpage, default='{}'), user_id)
|
||||
username = broadcast_data.get('user', {}).get('display_name')
|
||||
user_broadcasts = self._parse_json(
|
||||
self._html_search_meta('user-broadcasts', webpage, default='{}'),
|
||||
data_store = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-store=(["\'])(?P<data>.+?)\1',
|
||||
webpage, 'data store', default='{}', group='data')),
|
||||
user_id)
|
||||
|
||||
user = data_store.get('User', {}).get('user', {})
|
||||
title = user.get('display_name') or user.get('username')
|
||||
description = user.get('description')
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
|
||||
for broadcast in user_broadcasts.get('broadcasts', [])]
|
||||
for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
|
||||
|
||||
return self.playlist_result(entries, user_id, username)
|
||||
return self.playlist_result(entries, user_id, title, description)
|
||||
|
@@ -4,9 +4,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,6 +22,19 @@ class PlaywireIE(InfoExtractor):
|
||||
'duration': 145.94,
|
||||
},
|
||||
}, {
|
||||
# m3u8 in f4m
|
||||
'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
|
||||
'info_dict': {
|
||||
'id': '4840492',
|
||||
'ext': 'mp4',
|
||||
'title': 'ITV EL SHOW FULL',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Multiple resolutions while bitrates missing
|
||||
'url': 'http://cdn.playwire.com/11625/embed/85228.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@@ -48,25 +60,10 @@ class PlaywireIE(InfoExtractor):
|
||||
thumbnail = content.get('poster')
|
||||
src = content['media']['f4m']
|
||||
|
||||
f4m = self._download_xml(src, video_id)
|
||||
base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
|
||||
formats = []
|
||||
for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media.get('bitrate'))
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
f = {
|
||||
'url': '%s/%s' % (base_url, media.attrib['url']),
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
if not (tbr or width or height):
|
||||
f['quality'] = 1 if '-hd.' in media_url else 0
|
||||
formats.append(f)
|
||||
formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
|
||||
for a_format in formats:
|
||||
if not dict_get(a_format, ['tbr', 'width', 'height']):
|
||||
a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
130
youtube_dl/extractor/radiocanada.py
Normal file
130
youtube_dl/extractor/radiocanada.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
xpath_element,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RadioCanadaIE(InfoExtractor):
|
||||
IE_NAME = 'radiocanada'
|
||||
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||
'info_dict': {
|
||||
'id': '7184272',
|
||||
'ext': 'flv',
|
||||
'title': 'Le parcours du tireur capté sur vidéo',
|
||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
app_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
formats = []
|
||||
# TODO: extract m3u8 and f4m formats
|
||||
# m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
|
||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||
for device_type in ('flash',):
|
||||
v_data = self._download_xml(
|
||||
'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
|
||||
video_id, note='Downloading %s XML' % device_type, query={
|
||||
'appCode': app_code,
|
||||
'idMedia': video_id,
|
||||
'connectionType': 'broadband',
|
||||
'multibitrate': 'true',
|
||||
'deviceType': device_type,
|
||||
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
|
||||
'paysJ391wsHjbOJwvCs26toz': 'CA',
|
||||
'bypasslock': 'NZt5K62gRqfc',
|
||||
})
|
||||
v_url = xpath_text(v_data, 'url')
|
||||
if not v_url:
|
||||
continue
|
||||
if v_url == 'null':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
|
||||
ext = determine_ext(v_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
ext = determine_ext(v_url)
|
||||
bitrates = xpath_element(v_data, 'bitrates')
|
||||
for url_e in bitrates.findall('url'):
|
||||
tbr = int_or_none(url_e.get('bitrate'))
|
||||
if not tbr:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
|
||||
'ext': 'flv',
|
||||
'protocol': 'rtmp',
|
||||
'width': int_or_none(url_e.get('width')),
|
||||
'height': int_or_none(url_e.get('height')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._download_xml(
|
||||
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
|
||||
video_id, note='Downloading metadata XML', query={
|
||||
'appCode': app_code,
|
||||
'idMedia': video_id,
|
||||
})
|
||||
|
||||
def get_meta(name):
|
||||
el = find_xpath_attr(metadata, './/Meta', 'name', name)
|
||||
return el.text if el is not None else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': get_meta('Title'),
|
||||
'description': get_meta('Description') or get_meta('ShortDescription'),
|
||||
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
|
||||
'duration': int_or_none(get_meta('length')),
|
||||
'series': get_meta('Emission'),
|
||||
'season_number': int_or_none('SrcSaison'),
|
||||
'episode_number': int_or_none('SrcEpisode'),
|
||||
'upload_date': unified_strdate(get_meta('Date')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||
'radiocanada:audiovideo'
|
||||
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
||||
'info_dict': {
|
||||
'id': '7527184',
|
||||
'ext': 'flv',
|
||||
'title': 'Barack Obama au Vietnam',
|
||||
'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
|
||||
'upload_date': '20160523',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
|
69
youtube_dl/extractor/reuters.py
Normal file
69
youtube_dl/extractor/reuters.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class ReutersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
|
||||
'md5': '8015113643a0b12838f160b0b81cc2ee',
|
||||
'info_dict': {
|
||||
'id': '368575562',
|
||||
'ext': 'mp4',
|
||||
'title': 'San Francisco police chief resigns',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
|
||||
video_data = js_to_json(self._search_regex(
|
||||
r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
|
||||
webpage, 'video data'))
|
||||
|
||||
def get_json_value(key, fatal=False):
|
||||
return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
|
||||
|
||||
title = unescapeHTML(get_json_value('title', fatal=True))
|
||||
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
|
||||
|
||||
mas_data = self._download_json(
|
||||
'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
|
||||
video_id, transform_source=js_to_json)
|
||||
formats = []
|
||||
for f in mas_data:
|
||||
f_url = f.get('url')
|
||||
if not f_url:
|
||||
continue
|
||||
method = f.get('method')
|
||||
if method == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
container = f.get('container')
|
||||
ext = '3gp' if method == 'mobile' else container
|
||||
formats.append({
|
||||
'format_id': ext,
|
||||
'url': f_url,
|
||||
'ext': ext,
|
||||
'container': container if method != 'mobile' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': get_json_value('thumb'),
|
||||
'duration': int_or_none(get_json_value('seconds')),
|
||||
'formats': formats,
|
||||
}
|
@@ -13,8 +13,64 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class Revision3EmbedIE(InfoExtractor):
|
||||
IE_NAME = 'revision3:embed'
|
||||
_VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
|
||||
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
|
||||
'info_dict': {
|
||||
'id': '67558',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Pros & Cons Of Zoos',
|
||||
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
|
||||
'uploader_id': 'dnews',
|
||||
'uploader': 'DNews',
|
||||
}
|
||||
}
|
||||
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('playlist_id')
|
||||
playlist_type = mobj.group('playlist_type') or 'video_id'
|
||||
video_data = self._download_json(
|
||||
'http://revision3.com/api/getPlaylist.json', playlist_id, query={
|
||||
'api_key': self._API_KEY,
|
||||
'codecs': 'h264,vp8,theora',
|
||||
playlist_type: playlist_id,
|
||||
})['items'][0]
|
||||
|
||||
formats = []
|
||||
for vcodec, media in video_data['media'].items():
|
||||
for quality_id, quality in media.items():
|
||||
if quality_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality['url'], playlist_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': quality['url'],
|
||||
'format_id': '%s-%s' % (vcodec, quality_id),
|
||||
'tbr': int_or_none(quality.get('bitrate')),
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': playlist_id,
|
||||
'title': unescapeHTML(video_data['title']),
|
||||
'description': unescapeHTML(video_data.get('summary')),
|
||||
'uploader': video_data.get('show', {}).get('name'),
|
||||
'uploader_id': video_data.get('show', {}).get('slug'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class Revision3IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
|
||||
IE_NAME = 'revision'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
|
||||
'md5': 'd94a72d85d0a829766de4deb8daaf7df',
|
||||
@@ -32,52 +88,14 @@ class Revision3IE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
# Show
|
||||
'url': 'http://testtube.com/brainstuff',
|
||||
'info_dict': {
|
||||
'id': '251',
|
||||
'title': 'BrainStuff',
|
||||
'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.',
|
||||
},
|
||||
'playlist_mincount': 93,
|
||||
}, {
|
||||
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
|
||||
'info_dict': {
|
||||
'id': '58227',
|
||||
'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
|
||||
'duration': 275,
|
||||
'ext': 'webm',
|
||||
'title': '5 Weird Ways Plants Can Eat Animals',
|
||||
'description': 'Why have some plants evolved to eat meat?',
|
||||
'upload_date': '20150120',
|
||||
'timestamp': 1421763300,
|
||||
'uploader': 'DNews',
|
||||
'uploader_id': 'dnews',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
|
||||
'info_dict': {
|
||||
'id': '71618',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
|
||||
'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
|
||||
'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
|
||||
'uploader': 'Editors\' Picks',
|
||||
'uploader_id': 'tt-editors-picks',
|
||||
'timestamp': 1453309200,
|
||||
'upload_date': '20160120',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'http://revision3.com/variant',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Tag
|
||||
'url': 'http://testtube.com/tech-news',
|
||||
'info_dict': {
|
||||
'id': '21018',
|
||||
'title': 'tech news',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'url': 'http://revision3.com/vr',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
|
||||
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
||||
@@ -119,33 +137,9 @@ class Revision3IE(InfoExtractor):
|
||||
})
|
||||
return info
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
|
||||
video_id)['items'][0]
|
||||
|
||||
formats = []
|
||||
for vcodec, media in video_data['media'].items():
|
||||
for quality_id, quality in media.items():
|
||||
if quality_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality['url'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': quality['url'],
|
||||
'format_id': '%s-%s' % (vcodec, quality_id),
|
||||
'tbr': int_or_none(quality.get('bitrate')),
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
'title': unescapeHTML(video_data['title']),
|
||||
'description': unescapeHTML(video_data.get('summary')),
|
||||
'uploader': video_data.get('show', {}).get('name'),
|
||||
'uploader_id': video_data.get('show', {}).get('slug'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'formats': formats,
|
||||
'_type': 'url_transparent',
|
||||
'url': 'revision3:%s' % video_id,
|
||||
})
|
||||
return info
|
||||
else:
|
||||
|
57
youtube_dl/extractor/seeker.py
Normal file
57
youtube_dl/extractor/seeker.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SeekerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
# player.loadRevision3Item
|
||||
'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
|
||||
'md5': '30c1dc4030cc715cf05b423d0947ac18',
|
||||
'info_dict': {
|
||||
'id': '76243',
|
||||
'ext': 'webm',
|
||||
'title': 'Should Trump Be Required To Release His Tax Returns?',
|
||||
'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
|
||||
'uploader': 'Seeker Daily',
|
||||
'uploader_id': 'seekerdaily',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
|
||||
'info_dict': {
|
||||
'id': '67558',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Pros & Cons Of Zoos',
|
||||
'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
|
||||
'uploader': 'DNews',
|
||||
'uploader_id': 'dnews',
|
||||
},
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': '1834116536',
|
||||
'title': 'After Gorilla Killing, Changes Ahead for Zoos',
|
||||
'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, article_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
|
||||
if mobj:
|
||||
playlist_type, playlist_id = mobj.groups()
|
||||
return self.url_result(
|
||||
'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
|
||||
else:
|
||||
entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
|
||||
r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
|
||||
return self.playlist_result(
|
||||
entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))
|
@@ -96,20 +96,18 @@ class SpankwireIE(InfoExtractor):
|
||||
formats = []
|
||||
for height, video_url in zip(heights, video_urls):
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
_, quality = path.split('/')[4].split('_')[:2]
|
||||
f = {
|
||||
'url': video_url,
|
||||
'height': height,
|
||||
}
|
||||
tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
|
||||
if tbr:
|
||||
f.update({
|
||||
'tbr': int(tbr),
|
||||
'format_id': '%dp' % height,
|
||||
})
|
||||
m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path)
|
||||
if m:
|
||||
tbr = int(m.group('tbr'))
|
||||
height = int(m.group('height'))
|
||||
else:
|
||||
f['format_id'] = quality
|
||||
formats.append(f)
|
||||
tbr = None
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
@@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
|
||||
'md5': '3d6361864d7cac20b57c8784da17166f',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
@@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
|
||||
'duration': 422.255,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""TF1 uses the wat.tv player."""
|
||||
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
|
||||
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
'info_dict': {
|
||||
@@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._html_search_regex(
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1',
|
||||
webpage, 'wat id', group='id')
|
||||
return self.url_result('wat:%s' % wat_id, 'Wat')
|
||||
|
@@ -151,6 +151,22 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
m = re.search(
|
||||
r'''(?x)
|
||||
<meta\s+
|
||||
property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
|
||||
content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
|
||||
''', webpage)
|
||||
if m:
|
||||
return [m.group('url')]
|
||||
|
||||
matches = re.findall(
|
||||
r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||
if matches:
|
||||
return list(zip(*matches))[1]
|
||||
|
||||
@staticmethod
|
||||
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
|
||||
flags = '10' if include_qs else '00'
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -6,20 +6,13 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TvpIE(InfoExtractor):
|
||||
IE_NAME = 'tvp.pl'
|
||||
_VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$'
|
||||
class TVPIE(InfoExtractor):
|
||||
IE_NAME = 'tvp'
|
||||
IE_DESC = 'Telewizja Polska'
|
||||
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
|
||||
'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
|
||||
'info_dict': {
|
||||
'id': '4278035',
|
||||
'ext': 'wmv',
|
||||
'title': 'Ogniem i mieczem, odc. 2',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
|
||||
'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
|
||||
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
||||
'info_dict': {
|
||||
'id': '194536',
|
||||
@@ -36,12 +29,22 @@ class TvpIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||
'md5': 'c3b15ed1af288131115ff17a17c19dda',
|
||||
'info_dict': {
|
||||
'id': '17834272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Na sygnale, odc. 39',
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -92,8 +95,8 @@ class TvpIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class TvpSeriesIE(InfoExtractor):
|
||||
IE_NAME = 'tvp.pl:Series'
|
||||
class TVPSeriesIE(InfoExtractor):
|
||||
IE_NAME = 'tvp:series'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -127,7 +130,7 @@ class TvpSeriesIE(InfoExtractor):
|
||||
videos_paths = re.findall(
|
||||
'(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
|
||||
entries = [
|
||||
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key())
|
||||
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key())
|
||||
for v_path in videos_paths]
|
||||
|
||||
return {
|
||||
|
@@ -142,7 +142,9 @@ class UdemyIE(InfoExtractor):
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<'])
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
|
||||
r'>Logout<'))
|
||||
|
||||
# already logged in
|
||||
if is_logged(login_popup):
|
||||
|
@@ -2,10 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
ExtractorError,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
@@ -16,13 +19,16 @@ class UDNEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL
|
||||
_TESTS = [{
|
||||
'url': 'http://video.udn.com/embed/news/300040',
|
||||
'md5': 'de06b4c90b042c128395a88f0384817e',
|
||||
'info_dict': {
|
||||
'id': '300040',
|
||||
'ext': 'mp4',
|
||||
'title': '生物老師男變女 全校挺"做自己"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.udn.com/embed/news/300040',
|
||||
'only_matching': True,
|
||||
@@ -38,39 +44,53 @@ class UDNEmbedIE(InfoExtractor):
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
options = json.loads(js_to_json(self._html_search_regex(
|
||||
r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
|
||||
r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))
|
||||
|
||||
video_urls = options['video']
|
||||
|
||||
if video_urls.get('youtube'):
|
||||
return self.url_result(video_urls.get('youtube'), 'Youtube')
|
||||
|
||||
try:
|
||||
del video_urls['youtube']
|
||||
except KeyError:
|
||||
pass
|
||||
formats = []
|
||||
for video_type, api_url in video_urls.items():
|
||||
if not api_url:
|
||||
continue
|
||||
|
||||
formats = [{
|
||||
'url': self._download_webpage(
|
||||
video_url = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, api_url), video_id,
|
||||
'retrieve url for %s video' % video_type),
|
||||
'format_id': video_type,
|
||||
'preference': 0 if video_type == 'mp4' else -1,
|
||||
} for video_type, api_url in video_urls.items() if api_url]
|
||||
note='retrieve url for %s video' % video_type)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No videos found', expected=True)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds'))
|
||||
else:
|
||||
mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url)
|
||||
a_format = {
|
||||
'url': video_url,
|
||||
# video_type may be 'mp4', which confuses YoutubeDL
|
||||
'format_id': 'http-' + video_type,
|
||||
}
|
||||
if mobj:
|
||||
a_format.update({
|
||||
'height': int_or_none(mobj.group('height')),
|
||||
'tbr': int_or_none(mobj.group('tbr')),
|
||||
})
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = None
|
||||
|
||||
if options.get('gallery') and len(options['gallery']):
|
||||
thumbnail = options['gallery'][0].get('original')
|
||||
thumbnails = [{
|
||||
'url': img_url,
|
||||
'id': img_type,
|
||||
} for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': options['title'],
|
||||
'thumbnail': thumbnail
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
@@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
|
||||
'uploader': 'afp-news',
|
||||
'duration': 123,
|
||||
},
|
||||
'skip': 'This video has been deleted.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
|
||||
|
@@ -11,12 +11,14 @@ class ViceIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
|
||||
'md5': 'e9d77741f9e42ba583e683cd170660f7',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'flv',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
'duration': 725.983,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.vice.com/video/how-to-hack-a-car',
|
||||
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
|
||||
@@ -29,6 +31,7 @@ class ViceIE(InfoExtractor):
|
||||
'uploader': 'Motherboard',
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'only_matching': True,
|
||||
|
@@ -141,6 +141,10 @@ class ViewLiftIE(ViewLiftBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.kesari.tv/news/video/1461919076414',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Was once Kaltura embed
|
||||
'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -217,7 +217,6 @@ class VKIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
info_url = url
|
||||
if video_id:
|
||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
# Some videos (removed?) can only be downloaded with list id specified
|
||||
|
@@ -1,8 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import division, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -23,7 +22,7 @@ class VLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "[V] Girl's Day's Broadcast",
|
||||
'title': "[V LIVE] Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
},
|
||||
@@ -35,24 +34,11 @@ class VLiveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
|
||||
# UTC+x - UTC+9 (KST)
|
||||
tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone
|
||||
tz_offset = -tz // 60 - 9 * 60
|
||||
self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset)
|
||||
|
||||
status_params = self._download_json(
|
||||
'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
|
||||
video_id, 'Downloading JSON status',
|
||||
headers={'Referer': url.encode('utf-8')})
|
||||
status = status_params.get('status')
|
||||
air_start = status_params.get('onAirStartAt', '')
|
||||
is_live = status_params.get('isLive')
|
||||
|
||||
video_params = self._search_regex(
|
||||
r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)',
|
||||
r'\bvlive\.video\.init\(([^)]+)\)',
|
||||
webpage, 'video params')
|
||||
live_params, long_video_id, key = re.split(
|
||||
r'"\s*,\s*"', video_params)[1:4]
|
||||
status, _, _, live_params, long_video_id, key = re.split(
|
||||
r'"\s*,\s*"', video_params)[2:8]
|
||||
|
||||
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
||||
live_params = self._parse_json('"%s"' % live_params, video_id)
|
||||
@@ -61,8 +47,6 @@ class VLiveIE(InfoExtractor):
|
||||
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
|
||||
if long_video_id and key:
|
||||
return self._replay(video_id, webpage, long_video_id, key)
|
||||
elif is_live:
|
||||
status = 'LIVE_END'
|
||||
else:
|
||||
status = 'COMING_SOON'
|
||||
|
||||
@@ -70,7 +54,7 @@ class VLiveIE(InfoExtractor):
|
||||
raise ExtractorError('Uploading for replay. Please wait...',
|
||||
expected=True)
|
||||
elif status == 'COMING_SOON':
|
||||
raise ExtractorError('Coming soon! %s' % air_start, expected=True)
|
||||
raise ExtractorError('Coming soon!', expected=True)
|
||||
elif status == 'CANCELED':
|
||||
raise ExtractorError('We are sorry, '
|
||||
'but the live broadcast has been canceled.',
|
||||
|
@@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Google\'s new material design direction',
|
||||
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# data-ooyala-id
|
||||
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
||||
@@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The Nexus 6: hands-on with Google\'s phablet',
|
||||
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# volume embed
|
||||
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
||||
@@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The new frontier of LGBTQ civil rights, explained',
|
||||
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
|
||||
@@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'upload_date': '20160324',
|
||||
'uploader_id': 'voxdotcom',
|
||||
'uploader': 'Vox',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
@@ -117,7 +121,7 @@ class VoxMediaIE(InfoExtractor):
|
||||
volume_webpage = self._download_webpage(
|
||||
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
|
||||
for provider_video_type in ('ooyala', 'youtube'):
|
||||
provider_video_id = video_data.get('%s_id' % provider_video_type)
|
||||
if provider_video_id:
|
||||
|
@@ -11,7 +11,96 @@ from ..utils import (
|
||||
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
IE_NAME = 'washingtonpost'
|
||||
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TEST = {
|
||||
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
|
||||
'info_dict': {
|
||||
'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Egypt finds belongings, debris from plane crash',
|
||||
'description': 'md5:a17ceee432f215a5371388c1f680bd86',
|
||||
'upload_date': '20160520',
|
||||
'uploader': 'Reuters',
|
||||
'timestamp': 1463778452,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
|
||||
video_id, transform_source=strip_jsonp)[0]['contentConfig']
|
||||
title = video_data['title']
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for s in video_data.get('streams', []):
|
||||
s_url = s.get('url')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
video_type = s.get('type')
|
||||
if video_type == 'smil':
|
||||
continue
|
||||
elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
width = m3u8_format.get('width')
|
||||
if not width:
|
||||
continue
|
||||
vbr = self._search_regex(
|
||||
r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
|
||||
if vbr:
|
||||
m3u8_format.update({
|
||||
'vbr': int_or_none(vbr),
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
width = int_or_none(s.get('width'))
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
has_width = width != 0
|
||||
formats.append({
|
||||
'format_id': (
|
||||
'%s-%d-%d' % (video_type, width, vbr)
|
||||
if width
|
||||
else video_type),
|
||||
'vbr': vbr if has_width else None,
|
||||
'width': width,
|
||||
'height': int_or_none(s.get('height')),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if has_width else 'none',
|
||||
'filesize': int_or_none(s.get('fileSize')),
|
||||
'url': s_url,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
|
||||
})
|
||||
source_media_url = video_data.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('blurb'),
|
||||
'uploader': video_data.get('credits', {}).get('source'),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('videoDuration'), 100),
|
||||
'timestamp': int_or_none(
|
||||
video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
|
||||
}
|
||||
|
||||
|
||||
class WashingtonPostArticleIE(InfoExtractor):
|
||||
IE_NAME = 'washingtonpost:article'
|
||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
@@ -63,6 +152,10 @@ class WashingtonPostIE(InfoExtractor):
|
||||
}]
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
@@ -74,54 +167,7 @@ class WashingtonPostIE(InfoExtractor):
|
||||
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
|
||||
data-video-uuid=
|
||||
)"([^"]+)"''', webpage)
|
||||
entries = []
|
||||
for i, uuid in enumerate(uuids, start=1):
|
||||
vinfo_all = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
|
||||
page_id,
|
||||
transform_source=strip_jsonp,
|
||||
note='Downloading information of video %d/%d' % (i, len(uuids))
|
||||
)
|
||||
vinfo = vinfo_all[0]['contentConfig']
|
||||
uploader = vinfo.get('credits', {}).get('source')
|
||||
timestamp = int_or_none(
|
||||
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
|
||||
|
||||
formats = [{
|
||||
'format_id': (
|
||||
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
|
||||
if s.get('width')
|
||||
else s.get('type')),
|
||||
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
|
||||
'width': s.get('width'),
|
||||
'height': s.get('height'),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
|
||||
'filesize': s.get('fileSize'),
|
||||
'url': s.get('url'),
|
||||
'ext': 'mp4',
|
||||
'preference': -100 if s.get('type') == 'smil' else None,
|
||||
'protocol': {
|
||||
'MP4': 'http',
|
||||
'F4F': 'f4m',
|
||||
}.get(s.get('type')),
|
||||
} for s in vinfo.get('streams', [])]
|
||||
source_media_url = vinfo.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': uuid,
|
||||
'title': vinfo['title'],
|
||||
'description': vinfo.get('blurb'),
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(vinfo.get('videoDuration'), 100),
|
||||
'timestamp': timestamp,
|
||||
})
|
||||
entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
@@ -2,25 +2,26 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
HEADRequest,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
|
||||
_VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
|
||||
'md5': 'ce70e9223945ed26a8056d413ca55dc9',
|
||||
'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
|
||||
'info_dict': {
|
||||
'id': '11713067',
|
||||
'display_id': 'soupe-figues-l-orange-aux-epices',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soupe de figues à l\'orange et aux épices',
|
||||
'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
|
||||
@@ -33,7 +34,6 @@ class WatIE(InfoExtractor):
|
||||
'md5': 'fbc84e4378165278e743956d9c1bf16b',
|
||||
'info_dict': {
|
||||
'id': '11713075',
|
||||
'display_id': 'gregory-lemarchal-voix-ange',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
|
||||
'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
|
||||
@@ -44,96 +44,85 @@ class WatIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def download_video_info(self, real_id):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
|
||||
|
||||
# 'contentv4' is used in the website, but it also returns the related
|
||||
# videos, we don't need them
|
||||
info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
|
||||
return info['media']
|
||||
|
||||
def _real_extract(self, url):
|
||||
def real_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
real_id = mobj.group('real_id')
|
||||
if not real_id:
|
||||
short_id = mobj.group('short_id')
|
||||
webpage = self._download_webpage(url, display_id or short_id)
|
||||
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
|
||||
|
||||
video_info = self.download_video_info(real_id)
|
||||
video_info = self._download_json(
|
||||
'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||
|
||||
geo_list = video_info.get('geoList')
|
||||
country = geo_list[0] if geo_list else ''
|
||||
|
||||
chapters = video_info['chapters']
|
||||
first_chapter = chapters[0]
|
||||
files = video_info['files']
|
||||
first_file = files[0]
|
||||
|
||||
if real_id_for_chapter(first_chapter) != real_id:
|
||||
def video_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
|
||||
if video_id_for_chapter(first_chapter) != video_id:
|
||||
self.to_screen('Multipart video detected')
|
||||
chapter_urls = []
|
||||
for chapter in chapters:
|
||||
chapter_id = real_id_for_chapter(chapter)
|
||||
# Yes, when we this chapter is processed by WatIE,
|
||||
# it will download the info again
|
||||
chapter_info = self.download_video_info(chapter_id)
|
||||
chapter_urls.append(chapter_info['url'])
|
||||
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
|
||||
return self.playlist_result(entries, real_id, video_info['title'])
|
||||
|
||||
upload_date = None
|
||||
if 'date_diffusion' in first_chapter:
|
||||
upload_date = unified_strdate(first_chapter['date_diffusion'])
|
||||
entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
|
||||
return self.playlist_result(entries, video_id, video_info['title'])
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the short id for getting the video url
|
||||
# the video id for getting the video url
|
||||
|
||||
formats = [{
|
||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||
'format_id': 'Mobile',
|
||||
}]
|
||||
date_diffusion = first_chapter.get('date_diffusion')
|
||||
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
||||
|
||||
fmts = [('SD', 'web')]
|
||||
if first_file.get('hasHD'):
|
||||
fmts.append(('HD', 'webhd'))
|
||||
def extract_url(path_template, url_type):
|
||||
req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
|
||||
head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
|
||||
red_url = head.geturl()
|
||||
if req_url == red_url:
|
||||
raise ExtractorError(
|
||||
'%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
|
||||
expected=True)
|
||||
return red_url
|
||||
|
||||
def compute_token(param):
|
||||
timestamp = '%08x' % int(self._download_webpage(
|
||||
'http://www.wat.tv/servertime', real_id,
|
||||
'Downloading server time').split('|')[0])
|
||||
magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
|
||||
return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
|
||||
m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
|
||||
http_url = extract_url('android5/%s.mp4', 'http')
|
||||
|
||||
for fmt in fmts:
|
||||
webid = '/%s/%s' % (fmt[1], real_id)
|
||||
video_url = self._download_webpage(
|
||||
'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
|
||||
real_id,
|
||||
'Downloading %s video URL' % fmt[0],
|
||||
'Failed to download %s video URL' % fmt[0],
|
||||
False)
|
||||
if not video_url:
|
||||
formats = []
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
formats.extend(m3u8_formats)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
for m3u8_format in m3u8_formats:
|
||||
mobj = re.search(
|
||||
r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
|
||||
if not mobj:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': fmt[0],
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
m3u8_format.update({
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
if not vbr or not abr:
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': first_chapter['title'],
|
||||
'thumbnail': first_chapter['preview'],
|
||||
'description': first_chapter['description'],
|
||||
'view_count': video_info['views'],
|
||||
'upload_date': upload_date,
|
||||
'duration': first_file['duration'],
|
||||
'duration': video_info['files'][0]['duration'],
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -12,37 +12,52 @@ from ..utils import (
|
||||
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
'info_dict': {
|
||||
'id': '1509445',
|
||||
'ext': 'mp4',
|
||||
'title': 'FemaleAgent Shy beauty takes the bait',
|
||||
'upload_date': '20121014',
|
||||
'uploader': 'Ruseful2011',
|
||||
'duration': 893.52,
|
||||
'age_limit': 18,
|
||||
}
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
'md5': '8281348b8d3c53d39fffb377d24eac4e',
|
||||
'info_dict': {
|
||||
'id': '1509445',
|
||||
'ext': 'mp4',
|
||||
'title': 'FemaleAgent Shy beauty takes the bait',
|
||||
'upload_date': '20121014',
|
||||
'uploader': 'Ruseful2011',
|
||||
'duration': 893.52,
|
||||
'age_limit': 18,
|
||||
},
|
||||
{
|
||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
'info_dict': {
|
||||
'id': '2221348',
|
||||
'ext': 'mp4',
|
||||
'title': 'Britney Spears Sexy Booty',
|
||||
'upload_date': '20130914',
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200.48,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
'info_dict': {
|
||||
'id': '2221348',
|
||||
'ext': 'mp4',
|
||||
'title': 'Britney Spears Sexy Booty',
|
||||
'upload_date': '20130914',
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200.48,
|
||||
'age_limit': 18,
|
||||
},
|
||||
{
|
||||
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
|
||||
'only_matching': True,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
]
|
||||
}, {
|
||||
# empty seo
|
||||
'url': 'http://xhamster.com/movies/5667973/.html',
|
||||
'info_dict': {
|
||||
'id': '5667973',
|
||||
'ext': 'mp4',
|
||||
'title': '....',
|
||||
'upload_date': '20160208',
|
||||
'uploader': 'parejafree',
|
||||
'duration': 72.0,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
def extract_video_url(webpage, name):
|
||||
@@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
|
||||
r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
|
||||
webpage, 'xhamster url', default=None)
|
||||
|
||||
if not video_url:
|
||||
|
@@ -20,18 +20,24 @@ class YandexMusicBaseIE(InfoExtractor):
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
if response.get('type') == 'captcha' or 'captcha' in response:
|
||||
YandexMusicBaseIE._raise_captcha()
|
||||
|
||||
@staticmethod
|
||||
def _raise_captcha():
|
||||
raise ExtractorError(
|
||||
'YandexMusic has considered youtube-dl requests automated and '
|
||||
'asks you to solve a CAPTCHA. You can either wait for some '
|
||||
'time until unblocked and optionally use --sleep-interval '
|
||||
'in future or alternatively you can go to https://music.yandex.ru/ '
|
||||
'solve CAPTCHA, then export cookies and pass cookie file to '
|
||||
'youtube-dl with --cookies',
|
||||
expected=True)
|
||||
|
||||
def _download_webpage(self, *args, **kwargs):
|
||||
webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs)
|
||||
if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage:
|
||||
raise ExtractorError(
|
||||
'YandexMusic has considered youtube-dl requests automated and '
|
||||
'asks you to solve a CAPTCHA. You can either wait for some '
|
||||
'time until unblocked and optionally use --sleep-interval '
|
||||
'in future or alternatively you can go to https://music.yandex.ru/ '
|
||||
'solve CAPTCHA, then export cookies and pass cookie file to '
|
||||
'youtube-dl with --cookies',
|
||||
expected=True)
|
||||
self._raise_captcha()
|
||||
return webpage
|
||||
|
||||
def _download_json(self, *args, **kwargs):
|
||||
|
@@ -275,6 +275,8 @@ class YoukuIE(InfoExtractor):
|
||||
'format_id': self.get_format_name(fm),
|
||||
'ext': self.parse_ext_l(fm),
|
||||
'filesize': int(seg['size']),
|
||||
'width': stream.get('width'),
|
||||
'height': stream.get('height'),
|
||||
})
|
||||
|
||||
return {
|
||||
|
@@ -395,8 +395,8 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
downloader.add_option(
|
||||
'-r', '--rate-limit',
|
||||
dest='ratelimit', metavar='LIMIT',
|
||||
'-r', '--limit-rate', '--rate-limit',
|
||||
dest='ratelimit', metavar='RATE',
|
||||
help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
|
||||
downloader.add_option(
|
||||
'-R', '--retries',
|
||||
|
@@ -83,11 +83,8 @@ def update_self(to_screen, verbose, opener):
|
||||
|
||||
print_notes(to_screen, versions_info['versions'])
|
||||
|
||||
filename = sys.argv[0]
|
||||
# Py2EXE: Filename could be different
|
||||
if hasattr(sys, 'frozen') and not os.path.isfile(filename):
|
||||
if os.path.isfile(filename + '.exe'):
|
||||
filename += '.exe'
|
||||
# sys.executable is set to the full pathname of the exe-file for py2exe
|
||||
filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
|
||||
|
||||
if not os.access(filename, os.W_OK):
|
||||
to_screen('ERROR: no write permissions on %s' % filename)
|
||||
@@ -95,7 +92,7 @@ def update_self(to_screen, verbose, opener):
|
||||
|
||||
# Py2EXE
|
||||
if hasattr(sys, 'frozen'):
|
||||
exe = os.path.abspath(filename)
|
||||
exe = filename
|
||||
directory = os.path.dirname(exe)
|
||||
if not os.access(directory, os.W_OK):
|
||||
to_screen('ERROR: no write permissions on %s' % directory)
|
||||
|
@@ -105,9 +105,9 @@ KNOWN_EXTENSIONS = (
|
||||
'f4f', 'f4m', 'm3u8', 'smil')
|
||||
|
||||
# needed for sanitizing filenames in restricted mode
|
||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ',
|
||||
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'],
|
||||
'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy')))
|
||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
|
||||
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
|
||||
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
|
||||
|
||||
|
||||
def preferredencoding():
|
||||
@@ -861,9 +861,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
|
||||
if sys.version_info >= (3, 0):
|
||||
location = location.encode('iso-8859-1').decode('utf-8')
|
||||
else:
|
||||
location = location.decode('utf-8')
|
||||
location_escaped = escape_url(location)
|
||||
if location != location_escaped:
|
||||
del resp.headers['Location']
|
||||
if sys.version_info < (3, 0):
|
||||
location_escaped = location_escaped.encode('utf-8')
|
||||
resp.headers['Location'] = location_escaped
|
||||
return resp
|
||||
|
||||
@@ -1035,6 +1039,7 @@ def unified_strdate(date_str, day_first=True):
|
||||
format_expressions.extend([
|
||||
'%d-%m-%Y',
|
||||
'%d.%m.%Y',
|
||||
'%d.%m.%y',
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%d/%m/%Y %H:%M:%S',
|
||||
@@ -1055,7 +1060,10 @@ def unified_strdate(date_str, day_first=True):
|
||||
if upload_date is None:
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
try:
|
||||
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
except ValueError:
|
||||
pass
|
||||
if upload_date is not None:
|
||||
return compat_str(upload_date)
|
||||
|
||||
@@ -1907,7 +1915,7 @@ def parse_age_limit(s):
|
||||
|
||||
def strip_jsonp(code):
|
||||
return re.sub(
|
||||
r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||
r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||
|
||||
|
||||
def js_to_json(code):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.05.21.2'
|
||||
__version__ = '2016.06.03'
|
||||
|
Reference in New Issue
Block a user