Compare commits
263 Commits
2014.12.16
...
2015.01.09
Author | SHA1 | Date | |
---|---|---|---|
|
1302394603 | ||
|
dd622d7c4e | ||
|
b8da6b9fc6 | ||
|
4baea47c42 | ||
|
176cf9e0c3 | ||
|
7b6faddfc8 | ||
|
f90ad27375 | ||
|
230b2287dd | ||
|
754c838903 | ||
|
aa2fd59857 | ||
|
9932a65370 | ||
|
5e4166478d | ||
|
b0e87c3110 | ||
|
c0bdf32a3c | ||
|
92b065dc53 | ||
|
9298d4e3df | ||
|
740a7fcbc8 | ||
|
5fbf25a681 | ||
|
db6e625005 | ||
|
811cacdc2c | ||
|
ce08a86462 | ||
|
11497d5bba | ||
|
0217c78377 | ||
|
bd6b25ce0e | ||
|
d51a853d5c | ||
|
9ed99402f5 | ||
|
ec3a6a3137 | ||
|
796858a53f | ||
|
5b78caca94 | ||
|
bec2248141 | ||
|
211503c39f | ||
|
adb1307b9a | ||
|
99673f04bc | ||
|
e9a537774d | ||
|
367f539769 | ||
|
398133cf55 | ||
|
52fc3ba405 | ||
|
fdd6e18b75 | ||
|
58a84b8cb6 | ||
|
c5d666d374 | ||
|
5d8993b06a | ||
|
c758bf9fd7 | ||
|
900813a328 | ||
|
2bad0e5d20 | ||
|
ccc5842bc9 | ||
|
fd86c2026d | ||
|
e4a8eae701 | ||
|
75e51819d0 | ||
|
8ee341500d | ||
|
0590062925 | ||
|
799d88d3d8 | ||
|
760aea9a96 | ||
|
d6a31b1766 | ||
|
0b54a5b10a | ||
|
6309cb9b41 | ||
|
27a82a1b93 | ||
|
ecd1936695 | ||
|
76b3c61012 | ||
|
0df2dea73b | ||
|
f8bb576c4f | ||
|
ee61f6f3e2 | ||
|
f14f2a6d79 | ||
|
2c322cc5d6 | ||
|
3b8f3a1504 | ||
|
8f9529cd05 | ||
|
f4bca0b348 | ||
|
6291438073 | ||
|
18c3c15391 | ||
|
dda620e88c | ||
|
d7cc31b63e | ||
|
5e3e1c82d8 | ||
|
aa80652f47 | ||
|
9d247bbd2d | ||
|
93e40a7b2f | ||
|
03ff2cc1c4 | ||
|
a285b6377b | ||
|
cd791a5ea0 | ||
|
87830900a9 | ||
|
dfc9d9f50a | ||
|
75311a7e16 | ||
|
628bc4d1e7 | ||
|
a4c3f48639 | ||
|
bdf80aa542 | ||
|
adf3c58ad3 | ||
|
caf90bfaa5 | ||
|
2f985f4bb4 | ||
|
67c2bcdf4c | ||
|
1d2d0e3ff2 | ||
|
9fda6ee39f | ||
|
bc3e582fe4 | ||
|
bc1fc5ddbc | ||
|
63948fc62c | ||
|
f4858a7103 | ||
|
26886e6140 | ||
|
7a1818c99b | ||
|
2ccd1b10e5 | ||
|
788fa208c8 | ||
|
8848314c08 | ||
|
c11125f9ed | ||
|
95ceeec722 | ||
|
b68ff25917 | ||
|
3e3327ea17 | ||
|
b158bb8693 | ||
|
2bf098eda4 | ||
|
382e05fa56 | ||
|
19b05d886e | ||
|
e65566a9cc | ||
|
baa3c3f0f6 | ||
|
f4f339529c | ||
|
7d02fae85b | ||
|
6e46c3f1fd | ||
|
c7e675940c | ||
|
d26b1317ed | ||
|
a221f22969 | ||
|
817f786fbb | ||
|
62420c73cb | ||
|
2522a0b7da | ||
|
46d32a12c9 | ||
|
c491418526 | ||
|
c067545c17 | ||
|
823a155293 | ||
|
324b2c78fa | ||
|
d34f98289b | ||
|
644096b15c | ||
|
15cebcc363 | ||
|
faa4ea68c0 | ||
|
29a9385ff0 | ||
|
476eae0c2a | ||
|
8399267671 | ||
|
db546cf87f | ||
|
317639758a | ||
|
fdbabca85f | ||
|
6f790e5821 | ||
|
6f5cdeb611 | ||
|
9eb4f404cb | ||
|
f58487b392 | ||
|
5b9aefef77 | ||
|
772fd5cc44 | ||
|
50a0f6df7e | ||
|
9f435c5f1c | ||
|
931e2d1d26 | ||
|
a42419da42 | ||
|
9a237b776c | ||
|
02ec32a1ef | ||
|
a1e9e6440f | ||
|
5878e6398c | ||
|
6c6f1408f2 | ||
|
b7a7319c38 | ||
|
68f705cac5 | ||
|
079d1dcd80 | ||
|
7b24bbdf49 | ||
|
f86d543ebb | ||
|
60e47a2699 | ||
|
b8bc7a696b | ||
|
7d900ef1bf | ||
|
1931a73f39 | ||
|
966ea3aebd | ||
|
b3013681ff | ||
|
416c7fcbce | ||
|
e83eebb12f | ||
|
a349873226 | ||
|
fccae2b911 | ||
|
3ee08848db | ||
|
0129b4dd45 | ||
|
1c57e7f1f4 | ||
|
d0caf3a11e | ||
|
a87bb090d9 | ||
|
beb95e7781 | ||
|
5435d7af91 | ||
|
0c0a70f4c6 | ||
|
e3947e2b7f | ||
|
da3f7fb7f8 | ||
|
429ddfd38d | ||
|
479514d015 | ||
|
355e41466d | ||
|
03d9aad87c | ||
|
3e2bcf530b | ||
|
6343a5f68e | ||
|
00de9a9828 | ||
|
7fc2cd819e | ||
|
974739aab5 | ||
|
0cc4f8e385 | ||
|
513fd2a872 | ||
|
ae6986fb89 | ||
|
e8e28989eb | ||
|
0fa629d05b | ||
|
ff7a07d5c4 | ||
|
5a18403057 | ||
|
1b7b1d6eac | ||
|
23cfa4ae45 | ||
|
e82def52a9 | ||
|
bcfe9db299 | ||
|
cf00ae7640 | ||
|
f9b9e88646 | ||
|
c2500434c3 | ||
|
f74b341dde | ||
|
461b00f34a | ||
|
4cda41ac7b | ||
|
6a1c4fbfcb | ||
|
31424c126f | ||
|
53096539dc | ||
|
2c0b475235 | ||
|
a542405200 | ||
|
3e2b085ef9 | ||
|
885e4384a1 | ||
|
2b8f151094 | ||
|
5ac71f0b27 | ||
|
39ac7c9435 | ||
|
ed7bdc8a90 | ||
|
55f0cab3a3 | ||
|
544dec6298 | ||
|
e0ae1814b1 | ||
|
9532d72371 | ||
|
1362bbbb4b | ||
|
f00fd51dae | ||
|
a8896c5ac2 | ||
|
5d3808524d | ||
|
c8f167823f | ||
|
70f6796e7d | ||
|
85d253af6b | ||
|
a86cbf5876 | ||
|
3f1399de8a | ||
|
1f809a8560 | ||
|
653d14e2f9 | ||
|
85fab7e47b | ||
|
3aa9176f08 | ||
|
33b53b6021 | ||
|
3f7421b71b | ||
|
ee45625290 | ||
|
2c2a42587b | ||
|
e2f65efcf9 | ||
|
081d6e4784 | ||
|
1d4247f64e | ||
|
1ff30d7b79 | ||
|
16ea817968 | ||
|
a2a4bae929 | ||
|
c58843b3a1 | ||
|
a22524b004 | ||
|
87c4c21e75 | ||
|
b9465395cb | ||
|
edf41477f0 | ||
|
5f627b4448 | ||
|
60e5428925 | ||
|
748ec66725 | ||
|
e54a3a2f01 | ||
|
0e4cb4f406 | ||
|
f7ffe72ac7 | ||
|
cd58dc3e56 | ||
|
c33bcf2051 | ||
|
7642c08763 | ||
|
fdc8000810 | ||
|
a91c9b15e3 | ||
|
27d67ea2ba | ||
|
d6a8160902 | ||
|
6e1b9395c6 | ||
|
b1ccbed3d4 | ||
|
37381350f8 | ||
|
7af808a5ef | ||
|
876bef5937 | ||
|
a16af51873 | ||
|
dc9a441bfa | ||
|
ee6dfe8308 | ||
|
2cb5b03e53 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -31,3 +31,5 @@ updates_key.pem
|
|||||||
test/testdata
|
test/testdata
|
||||||
.tox
|
.tox
|
||||||
youtube-dl.zsh
|
youtube-dl.zsh
|
||||||
|
.idea
|
||||||
|
.idea/*
|
@@ -9,7 +9,6 @@ notifications:
|
|||||||
email:
|
email:
|
||||||
- filippo.valsorda@gmail.com
|
- filippo.valsorda@gmail.com
|
||||||
- phihag@phihag.de
|
- phihag@phihag.de
|
||||||
- jaime.marquinez.ferrandiz+travis@gmail.com
|
|
||||||
- yasoob.khld@gmail.com
|
- yasoob.khld@gmail.com
|
||||||
# irc:
|
# irc:
|
||||||
# channels:
|
# channels:
|
||||||
|
8
AUTHORS
8
AUTHORS
@@ -93,3 +93,11 @@ Zack Fernandes
|
|||||||
cryptonaut
|
cryptonaut
|
||||||
Adrian Kretz
|
Adrian Kretz
|
||||||
Mathias Rav
|
Mathias Rav
|
||||||
|
Petr Kutalek
|
||||||
|
Will Glynn
|
||||||
|
Max Reimann
|
||||||
|
Cédric Luthi
|
||||||
|
Thijs Vermeir
|
||||||
|
Joel Leclerc
|
||||||
|
Christopher Krooss
|
||||||
|
Ondřej Caletka
|
||||||
|
@@ -44,7 +44,7 @@ In particular, every site support request issue should only pertain to services
|
|||||||
|
|
||||||
### Is anyone going to need the feature?
|
### Is anyone going to need the feature?
|
||||||
|
|
||||||
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||||
|
|
||||||
### Is your question about youtube-dl?
|
### Is your question about youtube-dl?
|
||||||
|
|
||||||
|
18
Makefile
18
Makefile
@@ -1,4 +1,4 @@
|
|||||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
||||||
@@ -36,12 +36,7 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtu
|
|||||||
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
|
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
|
||||||
|
|
||||||
codetest:
|
codetest:
|
||||||
PYFLAKES_OUT=$$(pyflakes youtube_dl | grep -v youtube_dl/extractor/__init__.py); \
|
flake8 .
|
||||||
if test -n "$$PYFLAKES_OUT"; then \
|
|
||||||
echo "$$PYFLAKES_OUT"; \
|
|
||||||
exit 1; \
|
|
||||||
fi
|
|
||||||
pep8 . --ignore E501
|
|
||||||
|
|
||||||
test:
|
test:
|
||||||
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
||||||
@@ -51,11 +46,11 @@ test:
|
|||||||
ot: offlinetest
|
ot: offlinetest
|
||||||
|
|
||||||
offlinetest: codetest
|
offlinetest: codetest
|
||||||
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations
|
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
|
||||||
|
|
||||||
tar: youtube-dl.tar.gz
|
tar: youtube-dl.tar.gz
|
||||||
|
|
||||||
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest
|
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest supportedsites
|
||||||
|
|
||||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
||||||
|
|
||||||
@@ -68,11 +63,14 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
|||||||
chmod a+x youtube-dl
|
chmod a+x youtube-dl
|
||||||
|
|
||||||
README.md: youtube_dl/*.py youtube_dl/*/*.py
|
README.md: youtube_dl/*.py youtube_dl/*/*.py
|
||||||
COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
|
COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
|
||||||
|
|
||||||
CONTRIBUTING.md: README.md
|
CONTRIBUTING.md: README.md
|
||||||
python devscripts/make_contributing.py README.md CONTRIBUTING.md
|
python devscripts/make_contributing.py README.md CONTRIBUTING.md
|
||||||
|
|
||||||
|
supportedsites:
|
||||||
|
python devscripts/make_supportedsites.py docs/supportedsites.md
|
||||||
|
|
||||||
README.txt: README.md
|
README.txt: README.md
|
||||||
pandoc -f markdown -t plain README.md -o README.txt
|
pandoc -f markdown -t plain README.md -o README.txt
|
||||||
|
|
||||||
|
37
README.md
37
README.md
@@ -219,6 +219,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
for each command-line argument. If the URL
|
for each command-line argument. If the URL
|
||||||
refers to a playlist, dump the whole
|
refers to a playlist, dump the whole
|
||||||
playlist information in a single line.
|
playlist information in a single line.
|
||||||
|
--print-json Be quiet and print the video information as
|
||||||
|
JSON (video is still being downloaded).
|
||||||
--newline output progress bar as new lines
|
--newline output progress bar as new lines
|
||||||
--no-progress do not print progress bar
|
--no-progress do not print progress bar
|
||||||
--console-title display progress in console titlebar
|
--console-title display progress in console titlebar
|
||||||
@@ -248,14 +250,15 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
## Video Format Options:
|
## Video Format Options:
|
||||||
-f, --format FORMAT video format code, specify the order of
|
-f, --format FORMAT video format code, specify the order of
|
||||||
preference using slashes: -f 22/17/18 . -f
|
preference using slashes, as in -f 22/17/18
|
||||||
mp4 , -f m4a and -f flv are also
|
. Instead of format codes, you can select
|
||||||
supported. You can also use the special
|
by extension for the extensions aac, m4a,
|
||||||
names "best", "bestvideo", "bestaudio",
|
mp3, mp4, ogg, wav, webm. You can also use
|
||||||
"worst", "worstvideo" and "worstaudio". By
|
the special names "best", "bestvideo",
|
||||||
default, youtube-dl will pick the best
|
"bestaudio", "worst". By default, youtube-
|
||||||
quality. Use commas to download multiple
|
dl will pick the best quality. Use commas
|
||||||
audio formats, such as -f
|
to download multiple audio formats, such as
|
||||||
|
-f
|
||||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||||
You can merge the video and audio of two
|
You can merge the video and audio of two
|
||||||
formats into a single file using -f <video-
|
formats into a single file using -f <video-
|
||||||
@@ -326,7 +329,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
|
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
||||||
|
|
||||||
# OUTPUT TEMPLATE
|
# OUTPUT TEMPLATE
|
||||||
|
|
||||||
@@ -449,6 +452,22 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
|||||||
|
|
||||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||||
|
|
||||||
|
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||||
|
|
||||||
|
If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
|
||||||
|
|
||||||
|
To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
|
||||||
|
|
||||||
|
From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
|
||||||
|
|
||||||
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
|
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
|
||||||
|
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||||
|
|
||||||
|
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
|
||||||
|
|
||||||
# DEVELOPER INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
@@ -16,7 +16,7 @@ def main():
|
|||||||
template = tmplf.read()
|
template = tmplf.read()
|
||||||
|
|
||||||
ie_htmls = []
|
ie_htmls = []
|
||||||
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
|
for ie in youtube_dl.list_extractors(age_limit=None):
|
||||||
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
||||||
ie_desc = getattr(ie, 'IE_DESC', None)
|
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||||
if ie_desc is False:
|
if ie_desc is False:
|
||||||
|
45
devscripts/make_supportedsites.py
Normal file
45
devscripts/make_supportedsites.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import io
|
||||||
|
import optparse
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
# Import youtube_dl
|
||||||
|
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
|
||||||
|
sys.path.append(ROOT_DIR)
|
||||||
|
import youtube_dl
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||||
|
options, args = parser.parse_args()
|
||||||
|
if len(args) != 1:
|
||||||
|
parser.error('Expected an output filename')
|
||||||
|
|
||||||
|
outfile, = args
|
||||||
|
|
||||||
|
def gen_ies_md(ies):
|
||||||
|
for ie in ies:
|
||||||
|
ie_md = '**{0}**'.format(ie.IE_NAME)
|
||||||
|
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||||
|
if ie_desc is False:
|
||||||
|
continue
|
||||||
|
if ie_desc is not None:
|
||||||
|
ie_md += ': {0}'.format(ie.IE_DESC)
|
||||||
|
if not ie.working():
|
||||||
|
ie_md += ' (Currently broken)'
|
||||||
|
yield ie_md
|
||||||
|
|
||||||
|
ies = sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower())
|
||||||
|
out = '# Supported sites\n' + ''.join(
|
||||||
|
' - ' + md + '\n'
|
||||||
|
for md in gen_ies_md(ies))
|
||||||
|
|
||||||
|
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||||
|
outf.write(out)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
500
docs/supportedsites.md
Normal file
500
docs/supportedsites.md
Normal file
@@ -0,0 +1,500 @@
|
|||||||
|
# Supported sites
|
||||||
|
- **1up.com**
|
||||||
|
- **220.ro**
|
||||||
|
- **24video**
|
||||||
|
- **3sat**
|
||||||
|
- **4tube**
|
||||||
|
- **56.com**
|
||||||
|
- **5min**
|
||||||
|
- **8tracks**
|
||||||
|
- **9gag**
|
||||||
|
- **abc.net.au**
|
||||||
|
- **AcademicEarth:Course**
|
||||||
|
- **AddAnime**
|
||||||
|
- **AdobeTV**
|
||||||
|
- **AdultSwim**
|
||||||
|
- **Aftonbladet**
|
||||||
|
- **AlJazeera**
|
||||||
|
- **Allocine**
|
||||||
|
- **anitube.se**
|
||||||
|
- **AnySex**
|
||||||
|
- **Aparat**
|
||||||
|
- **AppleTrailers**
|
||||||
|
- **archive.org**: archive.org videos
|
||||||
|
- **ARD**
|
||||||
|
- **ARD:mediathek**
|
||||||
|
- **arte.tv**
|
||||||
|
- **arte.tv:+7**
|
||||||
|
- **arte.tv:concert**
|
||||||
|
- **arte.tv:creative**
|
||||||
|
- **arte.tv:ddc**
|
||||||
|
- **arte.tv:embed**
|
||||||
|
- **arte.tv:future**
|
||||||
|
- **audiomack**
|
||||||
|
- **AUEngine**
|
||||||
|
- **Azubu**
|
||||||
|
- **bambuser**
|
||||||
|
- **bambuser:channel**
|
||||||
|
- **Bandcamp**
|
||||||
|
- **Bandcamp:album**
|
||||||
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
|
- **Beeg**
|
||||||
|
- **BehindKink**
|
||||||
|
- **Bet**
|
||||||
|
- **Bild**: Bild.de
|
||||||
|
- **BiliBili**
|
||||||
|
- **blinkx**
|
||||||
|
- **blip.tv:user**
|
||||||
|
- **BlipTV**
|
||||||
|
- **Bloomberg**
|
||||||
|
- **Bpb**: Bundeszentrale für politische Bildung
|
||||||
|
- **BR**: Bayerischer Rundfunk Mediathek
|
||||||
|
- **Break**
|
||||||
|
- **Brightcove**
|
||||||
|
- **BuzzFeed**
|
||||||
|
- **BYUtv**
|
||||||
|
- **Canal13cl**
|
||||||
|
- **canalc2.tv**
|
||||||
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
|
- **CBS**
|
||||||
|
- **CBSNews**: CBS News
|
||||||
|
- **CeskaTelevize**
|
||||||
|
- **channel9**: Channel 9
|
||||||
|
- **Chilloutzone**
|
||||||
|
- **Cinchcast**
|
||||||
|
- **Cinemassacre**
|
||||||
|
- **clipfish**
|
||||||
|
- **cliphunter**
|
||||||
|
- **Clipsyndicate**
|
||||||
|
- **Cloudy**
|
||||||
|
- **Clubic**
|
||||||
|
- **cmt.com**
|
||||||
|
- **CNET**
|
||||||
|
- **CNN**
|
||||||
|
- **CNNBlogs**
|
||||||
|
- **CollegeHumor**
|
||||||
|
- **ComCarCoff**
|
||||||
|
- **ComedyCentral**
|
||||||
|
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||||
|
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
|
- **Cracked**
|
||||||
|
- **Criterion**
|
||||||
|
- **Crunchyroll**
|
||||||
|
- **crunchyroll:playlist**
|
||||||
|
- **CSpan**: C-SPAN
|
||||||
|
- **culturebox.francetvinfo.fr**
|
||||||
|
- **dailymotion**
|
||||||
|
- **dailymotion:playlist**
|
||||||
|
- **dailymotion:user**
|
||||||
|
- **daum.net**
|
||||||
|
- **DBTV**
|
||||||
|
- **DeezerPlaylist**
|
||||||
|
- **defense.gouv.fr**
|
||||||
|
- **Discovery**
|
||||||
|
- **divxstage**: DivxStage
|
||||||
|
- **Dotsub**
|
||||||
|
- **Dropbox**
|
||||||
|
- **DrTuber**
|
||||||
|
- **DRTV**
|
||||||
|
- **Dump**
|
||||||
|
- **dvtv**: http://video.aktualne.cz/
|
||||||
|
- **EbaumsWorld**
|
||||||
|
- **eHow**
|
||||||
|
- **Einthusan**
|
||||||
|
- **eitb.tv**
|
||||||
|
- **EllenTV**
|
||||||
|
- **EllenTV:clips**
|
||||||
|
- **ElPais**: El País
|
||||||
|
- **EMPFlix**
|
||||||
|
- **Engadget**
|
||||||
|
- **Eporner**
|
||||||
|
- **Escapist**
|
||||||
|
- **EveryonesMixtape**
|
||||||
|
- **exfm**: ex.fm
|
||||||
|
- **ExpoTV**
|
||||||
|
- **ExtremeTube**
|
||||||
|
- **facebook**
|
||||||
|
- **faz.net**
|
||||||
|
- **fc2**
|
||||||
|
- **fernsehkritik.tv**
|
||||||
|
- **fernsehkritik.tv:postecke**
|
||||||
|
- **Firedrive**
|
||||||
|
- **Firstpost**
|
||||||
|
- **firsttv**: Видеоархив - Первый канал
|
||||||
|
- **Flickr**
|
||||||
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
|
- **Foxgay**
|
||||||
|
- **FoxNews**
|
||||||
|
- **france2.fr:generation-quoi**
|
||||||
|
- **FranceCulture**
|
||||||
|
- **FranceInter**
|
||||||
|
- **francetv**: France 2, 3, 4, 5 and Ô
|
||||||
|
- **francetvinfo.fr**
|
||||||
|
- **Freesound**
|
||||||
|
- **freespeech.org**
|
||||||
|
- **FreeVideo**
|
||||||
|
- **FunnyOrDie**
|
||||||
|
- **Gamekings**
|
||||||
|
- **GameOne**
|
||||||
|
- **gameone:playlist**
|
||||||
|
- **GameSpot**
|
||||||
|
- **GameStar**
|
||||||
|
- **Gametrailers**
|
||||||
|
- **GDCVault**
|
||||||
|
- **generic**: Generic downloader that works on some sites
|
||||||
|
- **GiantBomb**
|
||||||
|
- **Glide**: Glide mobile video messages (glide.me)
|
||||||
|
- **Globo**
|
||||||
|
- **GodTube**
|
||||||
|
- **GoldenMoustache**
|
||||||
|
- **Golem**
|
||||||
|
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
|
||||||
|
- **Goshgay**
|
||||||
|
- **Grooveshark**
|
||||||
|
- **Groupon**
|
||||||
|
- **Hark**
|
||||||
|
- **Heise**
|
||||||
|
- **Helsinki**: helsinki.fi
|
||||||
|
- **HentaiStigma**
|
||||||
|
- **HornBunny**
|
||||||
|
- **HostingBulk**
|
||||||
|
- **HotNewHipHop**
|
||||||
|
- **Howcast**
|
||||||
|
- **HowStuffWorks**
|
||||||
|
- **HuffPost**: Huffington Post
|
||||||
|
- **Hypem**
|
||||||
|
- **Iconosquare**
|
||||||
|
- **ign.com**
|
||||||
|
- **imdb**: Internet Movie Database trailers
|
||||||
|
- **imdb:list**: Internet Movie Database lists
|
||||||
|
- **Ina**
|
||||||
|
- **InfoQ**
|
||||||
|
- **Instagram**
|
||||||
|
- **instagram:user**: Instagram user profile
|
||||||
|
- **InternetVideoArchive**
|
||||||
|
- **IPrima**
|
||||||
|
- **ivi**: ivi.ru
|
||||||
|
- **ivi:compilation**: ivi.ru compilations
|
||||||
|
- **Izlesene**
|
||||||
|
- **JadoreCettePub**
|
||||||
|
- **JeuxVideo**
|
||||||
|
- **Jove**
|
||||||
|
- **jpopsuki.tv**
|
||||||
|
- **Jukebox**
|
||||||
|
- **Kankan**
|
||||||
|
- **keek**
|
||||||
|
- **KeezMovies**
|
||||||
|
- **KhanAcademy**
|
||||||
|
- **KickStarter**
|
||||||
|
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||||
|
- **KrasView**: Красвью
|
||||||
|
- **Ku6**
|
||||||
|
- **la7.tv**
|
||||||
|
- **Laola1Tv**
|
||||||
|
- **lifenews**: LIFE | NEWS
|
||||||
|
- **LiveLeak**
|
||||||
|
- **livestream**
|
||||||
|
- **livestream:original**
|
||||||
|
- **lrt.lt**
|
||||||
|
- **lynda**: lynda.com videos
|
||||||
|
- **lynda:course**: lynda.com online courses
|
||||||
|
- **m6**
|
||||||
|
- **macgamestore**: MacGameStore trailers
|
||||||
|
- **mailru**: Видео@Mail.Ru
|
||||||
|
- **Malemotion**
|
||||||
|
- **MDR**
|
||||||
|
- **metacafe**
|
||||||
|
- **Metacritic**
|
||||||
|
- **Mgoon**
|
||||||
|
- **Minhateca**
|
||||||
|
- **MinistryGrid**
|
||||||
|
- **mitele.es**
|
||||||
|
- **mixcloud**
|
||||||
|
- **MLB**
|
||||||
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
|
- **Mofosex**
|
||||||
|
- **Mojvideo**
|
||||||
|
- **Moniker**: allmyvideos.net and vidspot.net
|
||||||
|
- **mooshare**: Mooshare.biz
|
||||||
|
- **Morningstar**: morningstar.com
|
||||||
|
- **Motherless**
|
||||||
|
- **Motorsport**: motorsport.com
|
||||||
|
- **MovieClips**
|
||||||
|
- **Moviezine**
|
||||||
|
- **movshare**: MovShare
|
||||||
|
- **MPORA**
|
||||||
|
- **MTV**
|
||||||
|
- **mtviggy.com**
|
||||||
|
- **mtvservices:embedded**
|
||||||
|
- **MuenchenTV**: münchen.tv
|
||||||
|
- **MusicPlayOn**
|
||||||
|
- **MusicVault**
|
||||||
|
- **muzu.tv**
|
||||||
|
- **MySpace**
|
||||||
|
- **MySpace:album**
|
||||||
|
- **MySpass**
|
||||||
|
- **myvideo**
|
||||||
|
- **MyVidster**
|
||||||
|
- **Naver**
|
||||||
|
- **NBA**
|
||||||
|
- **NBC**
|
||||||
|
- **NBCNews**
|
||||||
|
- **ndr**: NDR.de - Mediathek
|
||||||
|
- **NDTV**
|
||||||
|
- **NerdCubedFeed**
|
||||||
|
- **Newgrounds**
|
||||||
|
- **Newstube**
|
||||||
|
- **nfb**: National Film Board of Canada
|
||||||
|
- **nfl.com**
|
||||||
|
- **nhl.com**
|
||||||
|
- **nhl.com:videocenter**: NHL videocenter category
|
||||||
|
- **niconico**: ニコニコ動画
|
||||||
|
- **NiconicoPlaylist**
|
||||||
|
- **Noco**
|
||||||
|
- **Normalboots**
|
||||||
|
- **NosVideo**
|
||||||
|
- **novamov**: NovaMov
|
||||||
|
- **Nowness**
|
||||||
|
- **nowvideo**: NowVideo
|
||||||
|
- **npo.nl**
|
||||||
|
- **NRK**
|
||||||
|
- **NRKTV**
|
||||||
|
- **NTV**
|
||||||
|
- **Nuvid**
|
||||||
|
- **NYTimes**
|
||||||
|
- **ocw.mit.edu**
|
||||||
|
- **OktoberfestTV**
|
||||||
|
- **on.aol.com**
|
||||||
|
- **Ooyala**
|
||||||
|
- **orf:oe1**: Radio Österreich 1
|
||||||
|
- **orf:tvthek**: ORF TVthek
|
||||||
|
- **ORFFM4**: radio FM4
|
||||||
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
|
- **Patreon**
|
||||||
|
- **PBS**
|
||||||
|
- **Phoenix**
|
||||||
|
- **Photobucket**
|
||||||
|
- **PlanetaPlay**
|
||||||
|
- **play.fm**
|
||||||
|
- **played.to**
|
||||||
|
- **Playvid**
|
||||||
|
- **plus.google**: Google Plus
|
||||||
|
- **pluzz.francetv.fr**
|
||||||
|
- **podomatic**
|
||||||
|
- **PornHd**
|
||||||
|
- **PornHub**
|
||||||
|
- **Pornotube**
|
||||||
|
- **PornoXO**
|
||||||
|
- **PromptFile**
|
||||||
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
|
- **Pyvideo**
|
||||||
|
- **QuickVid**
|
||||||
|
- **radio.de**
|
||||||
|
- **radiofrance**
|
||||||
|
- **Rai**
|
||||||
|
- **RBMARadio**
|
||||||
|
- **RedTube**
|
||||||
|
- **Restudy**
|
||||||
|
- **ReverbNation**
|
||||||
|
- **RingTV**
|
||||||
|
- **RottenTomatoes**
|
||||||
|
- **Roxwel**
|
||||||
|
- **RTBF**
|
||||||
|
- **RTLnow**
|
||||||
|
- **rtlxl.nl**
|
||||||
|
- **RTP**
|
||||||
|
- **RTS**: RTS.ch
|
||||||
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
|
- **rtve.es:live**: RTVE.es live streams
|
||||||
|
- **RUHD**
|
||||||
|
- **rutube**: Rutube videos
|
||||||
|
- **rutube:channel**: Rutube channels
|
||||||
|
- **rutube:movie**: Rutube movies
|
||||||
|
- **rutube:person**: Rutube person videos
|
||||||
|
- **RUTV**: RUTV.RU
|
||||||
|
- **Sapo**: SAPO Vídeos
|
||||||
|
- **savefrom.net**
|
||||||
|
- **SBS**: sbs.com.au
|
||||||
|
- **SciVee**
|
||||||
|
- **screen.yahoo:search**: Yahoo screen search
|
||||||
|
- **Screencast**
|
||||||
|
- **ScreencastOMatic**
|
||||||
|
- **ScreenwaveMedia**
|
||||||
|
- **ServingSys**
|
||||||
|
- **Sexu**
|
||||||
|
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||||
|
- **Shared**
|
||||||
|
- **ShareSix**
|
||||||
|
- **Sina**
|
||||||
|
- **Slideshare**
|
||||||
|
- **Slutload**
|
||||||
|
- **smotri**: Smotri.com
|
||||||
|
- **smotri:broadcast**: Smotri.com broadcasts
|
||||||
|
- **smotri:community**: Smotri.com community videos
|
||||||
|
- **smotri:user**: Smotri.com user videos
|
||||||
|
- **Snotr**
|
||||||
|
- **Sockshare**
|
||||||
|
- **Sohu**
|
||||||
|
- **soundcloud**
|
||||||
|
- **soundcloud:playlist**
|
||||||
|
- **soundcloud:set**
|
||||||
|
- **soundcloud:user**
|
||||||
|
- **Soundgasm**
|
||||||
|
- **southpark.cc.com**
|
||||||
|
- **southpark.de**
|
||||||
|
- **Space**
|
||||||
|
- **Spankwire**
|
||||||
|
- **Spiegel**
|
||||||
|
- **Spiegel:Article**: Articles on spiegel.de
|
||||||
|
- **Spiegeltv**
|
||||||
|
- **Spike**
|
||||||
|
- **Sport5**
|
||||||
|
- **SportBox**
|
||||||
|
- **SportDeutschland**
|
||||||
|
- **SRMediathek**: Süddeutscher Rundfunk
|
||||||
|
- **stanfordoc**: Stanford Open ClassRoom
|
||||||
|
- **Steam**
|
||||||
|
- **streamcloud.eu**
|
||||||
|
- **StreamCZ**
|
||||||
|
- **SunPorno**
|
||||||
|
- **SWRMediathek**
|
||||||
|
- **Syfy**
|
||||||
|
- **SztvHu**
|
||||||
|
- **Tagesschau**
|
||||||
|
- **Tapely**
|
||||||
|
- **Tass**
|
||||||
|
- **teachertube**: teachertube.com videos
|
||||||
|
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||||
|
- **TeachingChannel**
|
||||||
|
- **Teamcoco**
|
||||||
|
- **TeamFour**
|
||||||
|
- **TechTalks**
|
||||||
|
- **techtv.mit.edu**
|
||||||
|
- **TED**
|
||||||
|
- **tegenlicht.vpro.nl**
|
||||||
|
- **TeleBruxelles**
|
||||||
|
- **telecinco.es**
|
||||||
|
- **TeleMB**
|
||||||
|
- **TenPlay**
|
||||||
|
- **TF1**
|
||||||
|
- **TheOnion**
|
||||||
|
- **ThePlatform**
|
||||||
|
- **TheSixtyOne**
|
||||||
|
- **ThisAV**
|
||||||
|
- **THVideo**
|
||||||
|
- **THVideoPlaylist**
|
||||||
|
- **tinypic**: tinypic.com videos
|
||||||
|
- **tlc.com**
|
||||||
|
- **tlc.de**
|
||||||
|
- **TMZ**
|
||||||
|
- **TNAFlix**
|
||||||
|
- **tou.tv**
|
||||||
|
- **Toypics**: Toypics user profile
|
||||||
|
- **ToypicsUser**: Toypics user profile
|
||||||
|
- **TrailerAddict** (Currently broken)
|
||||||
|
- **Trilulilu**
|
||||||
|
- **TruTube**
|
||||||
|
- **Tube8**
|
||||||
|
- **Tudou**
|
||||||
|
- **Tumblr**
|
||||||
|
- **TuneIn**
|
||||||
|
- **Turbo**
|
||||||
|
- **Tutv**
|
||||||
|
- **tv.dfb.de**
|
||||||
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
|
- **tvp.pl**
|
||||||
|
- **TVPlay**: TV3Play and related services
|
||||||
|
- **Twitch**
|
||||||
|
- **Ubu**
|
||||||
|
- **udemy**
|
||||||
|
- **udemy:course**
|
||||||
|
- **Unistra**
|
||||||
|
- **Urort**: NRK P3 Urørt
|
||||||
|
- **ustream**
|
||||||
|
- **ustream:channel**
|
||||||
|
- **Vbox7**
|
||||||
|
- **VeeHD**
|
||||||
|
- **Veoh**
|
||||||
|
- **Vesti**: Вести.Ru
|
||||||
|
- **Vevo**
|
||||||
|
- **VGTV**
|
||||||
|
- **vh1.com**
|
||||||
|
- **Vice**
|
||||||
|
- **Viddler**
|
||||||
|
- **video.google:search**: Google Video search
|
||||||
|
- **video.mit.edu**
|
||||||
|
- **VideoBam**
|
||||||
|
- **VideoDetective**
|
||||||
|
- **videofy.me**
|
||||||
|
- **videolectures.net**
|
||||||
|
- **VideoMega**
|
||||||
|
- **VideoPremium**
|
||||||
|
- **VideoTt**: video.tt - Your True Tube
|
||||||
|
- **videoweed**: VideoWeed
|
||||||
|
- **Vidme**
|
||||||
|
- **Vidzi**
|
||||||
|
- **viki**
|
||||||
|
- **vimeo**
|
||||||
|
- **vimeo:album**
|
||||||
|
- **vimeo:channel**
|
||||||
|
- **vimeo:group**
|
||||||
|
- **vimeo:likes**: Vimeo user likes
|
||||||
|
- **vimeo:review**: Review pages on vimeo
|
||||||
|
- **vimeo:user**
|
||||||
|
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
|
||||||
|
- **Vimple**: Vimple.ru
|
||||||
|
- **Vine**
|
||||||
|
- **vine:user**
|
||||||
|
- **vk.com**
|
||||||
|
- **vk.com:user-videos**: vk.com:All of a user's videos
|
||||||
|
- **Vodlocker**
|
||||||
|
- **Vporn**
|
||||||
|
- **VRT**
|
||||||
|
- **vube**: Vube.com
|
||||||
|
- **VuClip**
|
||||||
|
- **vulture.com**
|
||||||
|
- **Walla**
|
||||||
|
- **WashingtonPost**
|
||||||
|
- **wat.tv**
|
||||||
|
- **WayOfTheMaster**
|
||||||
|
- **WDR**
|
||||||
|
- **wdr:mobile**
|
||||||
|
- **WDRMaus**: Sendung mit der Maus
|
||||||
|
- **Weibo**
|
||||||
|
- **Wimp**
|
||||||
|
- **Wistia**
|
||||||
|
- **WorldStarHipHop**
|
||||||
|
- **wrzuta.pl**
|
||||||
|
- **XBef**
|
||||||
|
- **XboxClips**
|
||||||
|
- **XHamster**
|
||||||
|
- **XMinus**
|
||||||
|
- **XNXX**
|
||||||
|
- **XTube**
|
||||||
|
- **XTubeUser**: XTube user profile
|
||||||
|
- **XVideos**
|
||||||
|
- **Yahoo**: Yahoo screen and movies
|
||||||
|
- **YesJapan**
|
||||||
|
- **Ynet**
|
||||||
|
- **YouJizz**
|
||||||
|
- **Youku**
|
||||||
|
- **YouPorn**
|
||||||
|
- **YourUpload**
|
||||||
|
- **youtube**: YouTube.com
|
||||||
|
- **youtube:channel**: YouTube.com channels
|
||||||
|
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
||||||
|
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||||
|
- **youtube:playlist**: YouTube.com playlists
|
||||||
|
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
||||||
|
- **youtube:search**: YouTube.com searches
|
||||||
|
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||||
|
- **youtube:search_url**: YouTube.com search URLs
|
||||||
|
- **youtube:show**: YouTube.com (multi-season) shows
|
||||||
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
|
- **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
|
||||||
|
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
|
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
|
- **ZDF**
|
||||||
|
- **ZDFChannel**
|
||||||
|
- **zingmp3:album**: mp3.zing.vn albums
|
||||||
|
- **zingmp3:song**: mp3.zing.vn songs
|
@@ -1,2 +1,6 @@
|
|||||||
[wheel]
|
[wheel]
|
||||||
universal = True
|
universal = True
|
||||||
|
|
||||||
|
[flake8]
|
||||||
|
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build
|
||||||
|
ignore = E501
|
||||||
|
@@ -82,24 +82,14 @@ class FakeYDL(YoutubeDL):
|
|||||||
|
|
||||||
def gettestcases(include_onlymatching=False):
|
def gettestcases(include_onlymatching=False):
|
||||||
for ie in youtube_dl.extractor.gen_extractors():
|
for ie in youtube_dl.extractor.gen_extractors():
|
||||||
t = getattr(ie, '_TEST', None)
|
for tc in ie.get_testcases(include_onlymatching):
|
||||||
if t:
|
yield tc
|
||||||
assert not hasattr(ie, '_TESTS'), \
|
|
||||||
'%s has _TEST and _TESTS' % type(ie).__name__
|
|
||||||
tests = [t]
|
|
||||||
else:
|
|
||||||
tests = getattr(ie, '_TESTS', [])
|
|
||||||
for t in tests:
|
|
||||||
if not include_onlymatching and t.get('only_matching', False):
|
|
||||||
continue
|
|
||||||
t['name'] = type(ie).__name__[:-len('IE')]
|
|
||||||
yield t
|
|
||||||
|
|
||||||
|
|
||||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def expect_info_dict(self, expected_dict, got_dict):
|
def expect_info_dict(self, got_dict, expected_dict):
|
||||||
for info_field, expected in expected_dict.items():
|
for info_field, expected in expected_dict.items():
|
||||||
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
@@ -120,6 +110,20 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
else:
|
else:
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
got = 'md5:' + md5(got_dict.get(info_field))
|
got = 'md5:' + md5(got_dict.get(info_field))
|
||||||
|
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, list),
|
||||||
|
'Expected field %s to be a list, but it is of type %s' % (
|
||||||
|
info_field, type(got).__name__))
|
||||||
|
expected_num = int(expected.partition(':')[2])
|
||||||
|
assertGreaterEqual(
|
||||||
|
self, len(got), expected_num,
|
||||||
|
'Expected %d items in field %s, but only got %d' % (
|
||||||
|
expected_num, info_field, len(got)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertEqual(expected, got,
|
self.assertEqual(expected, got,
|
||||||
|
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||||
|
|
||||||
|
def test_html_search_meta(self):
|
||||||
|
ie = self.ie
|
||||||
|
html = '''
|
||||||
|
<meta name="a" content="1" />
|
||||||
|
<meta name='b' content='2'>
|
||||||
|
<meta name="c" content='3'>
|
||||||
|
<meta name=d content='4'>
|
||||||
|
<meta property="e" content='5' >
|
||||||
|
<meta content="6" name="f">
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(ie._html_search_meta('a', html), '1')
|
||||||
|
self.assertEqual(ie._html_search_meta('b', html), '2')
|
||||||
|
self.assertEqual(ie._html_search_meta('c', html), '3')
|
||||||
|
self.assertEqual(ie._html_search_meta('d', html), '4')
|
||||||
|
self.assertEqual(ie._html_search_meta('e', html), '5')
|
||||||
|
self.assertEqual(ie._html_search_meta('f', html), '6')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -8,6 +8,8 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import copy
|
||||||
|
|
||||||
from test.helper import FakeYDL, assertRegexpMatches
|
from test.helper import FakeYDL, assertRegexpMatches
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
@@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'vid-high')
|
self.assertEqual(downloaded['format_id'], 'vid-high')
|
||||||
|
|
||||||
|
def test_format_selection_audio_exts(self):
|
||||||
|
formats = [
|
||||||
|
{'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||||
|
{'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||||
|
{'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||||
|
{'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
|
||||||
|
{'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
|
||||||
|
]
|
||||||
|
|
||||||
|
info_dict = _make_result(formats)
|
||||||
|
ydl = YDL({'format': 'best'})
|
||||||
|
ie = YoutubeIE(ydl)
|
||||||
|
ie._sort_formats(info_dict['formats'])
|
||||||
|
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'aac-64')
|
||||||
|
|
||||||
|
ydl = YDL({'format': 'mp3'})
|
||||||
|
ie = YoutubeIE(ydl)
|
||||||
|
ie._sort_formats(info_dict['formats'])
|
||||||
|
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'mp3-64')
|
||||||
|
|
||||||
|
ydl = YDL({'prefer_free_formats': True})
|
||||||
|
ie = YoutubeIE(ydl)
|
||||||
|
ie._sort_formats(info_dict['formats'])
|
||||||
|
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(downloaded['format_id'], 'ogg-64')
|
||||||
|
|
||||||
def test_format_selection_video(self):
|
def test_format_selection_video(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
||||||
@@ -218,7 +251,7 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
# 3D
|
# 3D
|
||||||
'85', '84', '102', '83', '101', '82', '100',
|
'85', '84', '102', '83', '101', '82', '100',
|
||||||
# Dash video
|
# Dash video
|
||||||
'138', '137', '248', '136', '247', '135', '246',
|
'137', '248', '136', '247', '135', '246',
|
||||||
'245', '244', '134', '243', '133', '242', '160',
|
'245', '244', '134', '243', '133', '242', '160',
|
||||||
# Dash audio
|
# Dash audio
|
||||||
'141', '172', '140', '171', '139',
|
'141', '172', '140', '171', '139',
|
||||||
|
@@ -45,11 +45,6 @@ class TestAgeRestriction(unittest.TestCase):
|
|||||||
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||||
'505835.mp4', 2, old_age=25)
|
'505835.mp4', 2, old_age=25)
|
||||||
|
|
||||||
def test_pornotube(self):
|
|
||||||
self._assert_restricted(
|
|
||||||
'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
|
||||||
'1689755.flv', 13)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -155,7 +155,7 @@ def generator(test_case):
|
|||||||
if is_playlist:
|
if is_playlist:
|
||||||
self.assertEqual(res_dict['_type'], 'playlist')
|
self.assertEqual(res_dict['_type'], 'playlist')
|
||||||
self.assertTrue('entries' in res_dict)
|
self.assertTrue('entries' in res_dict)
|
||||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||||
|
|
||||||
if 'playlist_mincount' in test_case:
|
if 'playlist_mincount' in test_case:
|
||||||
assertGreaterEqual(
|
assertGreaterEqual(
|
||||||
@@ -204,7 +204,7 @@ def generator(test_case):
|
|||||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
|
|
||||||
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
||||||
finally:
|
finally:
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
if is_playlist and res_dict is not None and res_dict.get('entries'):
|
if is_playlist and res_dict is not None and res_dict.get('entries'):
|
||||||
|
@@ -17,6 +17,7 @@ from youtube_dl.extractor import (
|
|||||||
TEDIE,
|
TEDIE,
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
WallaIE,
|
WallaIE,
|
||||||
|
CeskaTelevizeIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -88,6 +89,14 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertTrue(subtitles['it'] is not None)
|
self.assertTrue(subtitles['it'] is not None)
|
||||||
|
|
||||||
|
def test_youtube_translated_subtitles(self):
|
||||||
|
# This video has a subtitles track, which can be translated
|
||||||
|
self.url = 'Ky9eprVWzlI'
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['it']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(subtitles['it'] is not None)
|
||||||
|
|
||||||
def test_youtube_nosubtitles(self):
|
def test_youtube_nosubtitles(self):
|
||||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
self.url = 'n5BB19UTcdA'
|
self.url = 'n5BB19UTcdA'
|
||||||
@@ -309,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(len(subtitles), 0)
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||||
|
IE = CeskaTelevizeIE
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
||||||
|
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
|
||||||
|
|
||||||
|
def test_nosubtitles(self):
|
||||||
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
|
self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -7,9 +7,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
import unittest
|
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
@@ -16,6 +16,7 @@ import json
|
|||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
clean_html,
|
clean_html,
|
||||||
DateRange,
|
DateRange,
|
||||||
@@ -402,5 +403,12 @@ Trying to open render node...
|
|||||||
Success at /dev/dri/renderD128.
|
Success at /dev/dri/renderD128.
|
||||||
ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||||
|
|
||||||
|
def test_age_restricted(self):
|
||||||
|
self.assertFalse(age_restricted(None, 10)) # unrestricted content
|
||||||
|
self.assertFalse(age_restricted(1, None)) # unrestricted policy
|
||||||
|
self.assertFalse(age_restricted(8, 10))
|
||||||
|
self.assertTrue(age_restricted(18, 14))
|
||||||
|
self.assertFalse(age_restricted(18, 18))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -63,6 +63,7 @@ from .utils import (
|
|||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
|
age_restricted,
|
||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractors
|
from .extractor import get_info_extractor, gen_extractors
|
||||||
@@ -550,13 +551,8 @@ class YoutubeDL(object):
|
|||||||
max_views = self.params.get('max_views')
|
max_views = self.params.get('max_views')
|
||||||
if max_views is not None and view_count > max_views:
|
if max_views is not None and view_count > max_views:
|
||||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
age_limit = self.params.get('age_limit')
|
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||||
if age_limit is not None:
|
return 'Skipping "%s" because it is age restricted' % title
|
||||||
actual_age_limit = info_dict.get('age_limit')
|
|
||||||
if actual_age_limit is None:
|
|
||||||
actual_age_limit = 0
|
|
||||||
if age_limit < actual_age_limit:
|
|
||||||
return 'Skipping "' + title + '" because it is age restricted'
|
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return '%s has already been recorded in archive' % video_title
|
return '%s has already been recorded in archive' % video_title
|
||||||
return None
|
return None
|
||||||
@@ -790,7 +786,7 @@ class YoutubeDL(object):
|
|||||||
if video_formats:
|
if video_formats:
|
||||||
return video_formats[0]
|
return video_formats[0]
|
||||||
else:
|
else:
|
||||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
|
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
|
||||||
if format_spec in extensions:
|
if format_spec in extensions:
|
||||||
filter_f = lambda f: f['ext'] == format_spec
|
filter_f = lambda f: f['ext'] == format_spec
|
||||||
else:
|
else:
|
||||||
@@ -917,6 +913,14 @@ class YoutubeDL(object):
|
|||||||
'requested_formats': formats_info,
|
'requested_formats': formats_info,
|
||||||
'format': rf,
|
'format': rf,
|
||||||
'ext': formats_info[0]['ext'],
|
'ext': formats_info[0]['ext'],
|
||||||
|
'width': formats_info[0].get('width'),
|
||||||
|
'height': formats_info[0].get('height'),
|
||||||
|
'resolution': formats_info[0].get('resolution'),
|
||||||
|
'fps': formats_info[0].get('fps'),
|
||||||
|
'vcodec': formats_info[0].get('vcodec'),
|
||||||
|
'vbr': formats_info[0].get('vbr'),
|
||||||
|
'acodec': formats_info[1].get('acodec'),
|
||||||
|
'abr': formats_info[1].get('abr'),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
selected_format = None
|
selected_format = None
|
||||||
@@ -1016,13 +1020,13 @@ class YoutubeDL(object):
|
|||||||
descfn = filename + '.description'
|
descfn = filename + '.description'
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
||||||
self.to_screen('[info] Video description is already present')
|
self.to_screen('[info] Video description is already present')
|
||||||
|
elif info_dict.get('description') is None:
|
||||||
|
self.report_warning('There\'s no description to write.')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video description to: ' + descfn)
|
self.to_screen('[info] Writing video description to: ' + descfn)
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
descfile.write(info_dict['description'])
|
descfile.write(info_dict['description'])
|
||||||
except (KeyError, TypeError):
|
|
||||||
self.report_warning('There\'s no description to write.')
|
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write description file ' + descfn)
|
self.report_error('Cannot write description file ' + descfn)
|
||||||
return
|
return
|
||||||
@@ -1149,8 +1153,7 @@ class YoutubeDL(object):
|
|||||||
except (PostProcessingError) as err:
|
except (PostProcessingError) as err:
|
||||||
self.report_error('postprocessing: %s' % str(err))
|
self.report_error('postprocessing: %s' % str(err))
|
||||||
return
|
return
|
||||||
|
self.record_download_archive(info_dict)
|
||||||
self.record_download_archive(info_dict)
|
|
||||||
|
|
||||||
def download(self, url_list):
|
def download(self, url_list):
|
||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
@@ -1334,7 +1337,9 @@ class YoutubeDL(object):
|
|||||||
formats = info_dict.get('formats', [info_dict])
|
formats = info_dict.get('formats', [info_dict])
|
||||||
idlen = max(len('format code'),
|
idlen = max(len('format code'),
|
||||||
max(len(f['format_id']) for f in formats))
|
max(len(f['format_id']) for f in formats))
|
||||||
formats_s = [line(f, idlen) for f in formats]
|
formats_s = [
|
||||||
|
line(f, idlen) for f in formats
|
||||||
|
if f.get('preference') is None or f['preference'] >= -1000]
|
||||||
if len(formats) > 1:
|
if len(formats) > 1:
|
||||||
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
||||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
||||||
|
@@ -38,7 +38,7 @@ from .update import update_self
|
|||||||
from .downloader import (
|
from .downloader import (
|
||||||
FileDownloader,
|
FileDownloader,
|
||||||
)
|
)
|
||||||
from .extractor import gen_extractors
|
from .extractor import gen_extractors, list_extractors
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
|
|
||||||
|
|
||||||
@@ -95,24 +95,22 @@ def _real_main(argv=None):
|
|||||||
_enc = preferredencoding()
|
_enc = preferredencoding()
|
||||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||||
|
|
||||||
extractors = gen_extractors()
|
|
||||||
|
|
||||||
if opts.list_extractors:
|
if opts.list_extractors:
|
||||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
for ie in list_extractors(opts.age_limit):
|
||||||
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
||||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||||
for mu in matchedUrls:
|
for mu in matchedUrls:
|
||||||
compat_print(' ' + mu)
|
compat_print(' ' + mu)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
if opts.list_extractor_descriptions:
|
if opts.list_extractor_descriptions:
|
||||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
for ie in list_extractors(opts.age_limit):
|
||||||
if not ie._WORKING:
|
if not ie._WORKING:
|
||||||
continue
|
continue
|
||||||
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
||||||
if desc is False:
|
if desc is False:
|
||||||
continue
|
continue
|
||||||
if hasattr(ie, 'SEARCH_KEY'):
|
if hasattr(ie, 'SEARCH_KEY'):
|
||||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny')
|
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||||
_COUNTS = ('', '5', '10', 'all')
|
_COUNTS = ('', '5', '10', 'all')
|
||||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||||
compat_print(desc)
|
compat_print(desc)
|
||||||
@@ -199,7 +197,8 @@ def _real_main(argv=None):
|
|||||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||||
' template'.format(outtmpl))
|
' template'.format(outtmpl))
|
||||||
|
|
||||||
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||||
|
any_printing = opts.print_json
|
||||||
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||||
|
|
||||||
# PostProcessors
|
# PostProcessors
|
||||||
@@ -245,7 +244,7 @@ def _real_main(argv=None):
|
|||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
'twofactor': opts.twofactor,
|
'twofactor': opts.twofactor,
|
||||||
'videopassword': opts.videopassword,
|
'videopassword': opts.videopassword,
|
||||||
'quiet': (opts.quiet or any_printing),
|
'quiet': (opts.quiet or any_getting or any_printing),
|
||||||
'no_warnings': opts.no_warnings,
|
'no_warnings': opts.no_warnings,
|
||||||
'forceurl': opts.geturl,
|
'forceurl': opts.geturl,
|
||||||
'forcetitle': opts.gettitle,
|
'forcetitle': opts.gettitle,
|
||||||
@@ -255,9 +254,9 @@ def _real_main(argv=None):
|
|||||||
'forceduration': opts.getduration,
|
'forceduration': opts.getduration,
|
||||||
'forcefilename': opts.getfilename,
|
'forcefilename': opts.getfilename,
|
||||||
'forceformat': opts.getformat,
|
'forceformat': opts.getformat,
|
||||||
'forcejson': opts.dumpjson,
|
'forcejson': opts.dumpjson or opts.print_json,
|
||||||
'dump_single_json': opts.dump_single_json,
|
'dump_single_json': opts.dump_single_json,
|
||||||
'simulate': opts.simulate or any_printing,
|
'simulate': opts.simulate or any_getting,
|
||||||
'skip_download': opts.skip_download,
|
'skip_download': opts.skip_download,
|
||||||
'format': opts.format,
|
'format': opts.format,
|
||||||
'format_limit': opts.format_limit,
|
'format_limit': opts.format_limit,
|
||||||
@@ -365,3 +364,5 @@ def main(argv=None):
|
|||||||
sys.exit('ERROR: fixed output name but more than one file to download')
|
sys.exit('ERROR: fixed output name but more than one file to download')
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
sys.exit('\nERROR: Interrupted by user')
|
sys.exit('\nERROR: Interrupted by user')
|
||||||
|
|
||||||
|
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
|
||||||
|
@@ -285,7 +285,7 @@ class FileDownloader(object):
|
|||||||
Return True on success and False otherwise
|
Return True on success and False otherwise
|
||||||
"""
|
"""
|
||||||
# Check file already present
|
# Check file already present
|
||||||
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
if filename != '-' and self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||||
self.report_file_already_downloaded(filename)
|
self.report_file_already_downloaded(filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
|
@@ -187,24 +187,34 @@ def build_fragments_list(boot_info):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def write_flv_header(stream, metadata):
|
def write_unsigned_int(stream, val):
|
||||||
"""Writes the FLV header and the metadata to stream"""
|
stream.write(struct_pack('!I', val))
|
||||||
|
|
||||||
|
|
||||||
|
def write_unsigned_int_24(stream, val):
|
||||||
|
stream.write(struct_pack('!I', val)[1:])
|
||||||
|
|
||||||
|
|
||||||
|
def write_flv_header(stream):
|
||||||
|
"""Writes the FLV header to stream"""
|
||||||
# FLV header
|
# FLV header
|
||||||
stream.write(b'FLV\x01')
|
stream.write(b'FLV\x01')
|
||||||
stream.write(b'\x05')
|
stream.write(b'\x05')
|
||||||
stream.write(b'\x00\x00\x00\x09')
|
stream.write(b'\x00\x00\x00\x09')
|
||||||
# FLV File body
|
|
||||||
stream.write(b'\x00\x00\x00\x00')
|
stream.write(b'\x00\x00\x00\x00')
|
||||||
# FLVTAG
|
|
||||||
# Script data
|
|
||||||
stream.write(b'\x12')
|
def write_metadata_tag(stream, metadata):
|
||||||
# Size of the metadata with 3 bytes
|
"""Writes optional metadata tag to stream"""
|
||||||
stream.write(struct_pack('!L', len(metadata))[1:])
|
SCRIPT_TAG = b'\x12'
|
||||||
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
FLV_TAG_HEADER_LEN = 11
|
||||||
stream.write(metadata)
|
|
||||||
# Magic numbers extracted from the output files produced by AdobeHDS.php
|
if metadata:
|
||||||
#(https://github.com/K-S-V/Scripts)
|
stream.write(SCRIPT_TAG)
|
||||||
stream.write(b'\x00\x00\x01\x73')
|
write_unsigned_int_24(stream, len(metadata))
|
||||||
|
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
||||||
|
stream.write(metadata)
|
||||||
|
write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
|
||||||
|
|
||||||
|
|
||||||
def _add_ns(prop):
|
def _add_ns(prop):
|
||||||
@@ -256,7 +266,11 @@ class F4mFD(FileDownloader):
|
|||||||
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
||||||
else:
|
else:
|
||||||
bootstrap = base64.b64decode(bootstrap_node.text)
|
bootstrap = base64.b64decode(bootstrap_node.text)
|
||||||
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
metadata_node = media.find(_add_ns('metadata'))
|
||||||
|
if metadata_node is not None:
|
||||||
|
metadata = base64.b64decode(metadata_node.text)
|
||||||
|
else:
|
||||||
|
metadata = None
|
||||||
boot_info = read_bootstrap_info(bootstrap)
|
boot_info = read_bootstrap_info(bootstrap)
|
||||||
|
|
||||||
fragments_list = build_fragments_list(boot_info)
|
fragments_list = build_fragments_list(boot_info)
|
||||||
@@ -269,7 +283,8 @@ class F4mFD(FileDownloader):
|
|||||||
|
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||||
write_flv_header(dest_stream, metadata)
|
write_flv_header(dest_stream)
|
||||||
|
write_metadata_tag(dest_stream, metadata)
|
||||||
|
|
||||||
# This dict stores the download progress, it's updated by the progress
|
# This dict stores the download progress, it's updated by the progress
|
||||||
# hook
|
# hook
|
||||||
|
@@ -11,7 +11,6 @@ from ..compat import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
check_executable,
|
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -27,16 +26,13 @@ class HlsFD(FileDownloader):
|
|||||||
'-bsf:a', 'aac_adtstoasc',
|
'-bsf:a', 'aac_adtstoasc',
|
||||||
encodeFilename(tmpfilename, for_subprocess=True)]
|
encodeFilename(tmpfilename, for_subprocess=True)]
|
||||||
|
|
||||||
for program in ['avconv', 'ffmpeg']:
|
ffpp = FFmpegPostProcessor(downloader=self)
|
||||||
if check_executable(program, ['-version']):
|
program = ffpp._executable
|
||||||
break
|
if program is None:
|
||||||
else:
|
|
||||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||||
return False
|
return False
|
||||||
cmd = [program] + args
|
|
||||||
|
|
||||||
ffpp = FFmpegPostProcessor(downloader=self)
|
|
||||||
ffpp.check_version()
|
ffpp.check_version()
|
||||||
|
cmd = [program] + args
|
||||||
|
|
||||||
retval = subprocess.call(cmd)
|
retval = subprocess.call(cmd)
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
|
@@ -4,8 +4,8 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import compat_subprocess_get_DEVNULL
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
check_executable,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -20,11 +20,7 @@ class MplayerFD(FileDownloader):
|
|||||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||||
# Check for mplayer first
|
# Check for mplayer first
|
||||||
try:
|
if not check_executable('mplayer', ['-h']):
|
||||||
subprocess.call(
|
|
||||||
['mplayer', '-h'],
|
|
||||||
stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@@ -185,7 +185,7 @@ class RtmpFD(FileDownloader):
|
|||||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
if prevsize == cursize and retval == RD_FAILED:
|
if prevsize == cursize and retval == RD_FAILED:
|
||||||
break
|
break
|
||||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||||
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
||||||
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||||
retval = RD_SUCCESS
|
retval = RD_SUCCESS
|
||||||
|
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
|
|||||||
from .abc import ABCIE
|
from .abc import ABCIE
|
||||||
from .academicearth import AcademicEarthCourseIE
|
from .academicearth import AcademicEarthCourseIE
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
|
from .adobetv import AdobeTVIE
|
||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
from .aftonbladet import AftonbladetIE
|
from .aftonbladet import AftonbladetIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
|
from .alphaporno import AlphaPornoIE
|
||||||
from .anitube import AnitubeIE
|
from .anitube import AnitubeIE
|
||||||
from .anysex import AnySexIE
|
from .anysex import AnySexIE
|
||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
@@ -23,6 +25,7 @@ from .arte import (
|
|||||||
ArteTVDDCIE,
|
ArteTVDDCIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
)
|
)
|
||||||
|
from .atresplayer import AtresPlayerIE
|
||||||
from .audiomack import AudiomackIE
|
from .audiomack import AudiomackIE
|
||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .azubu import AzubuIE
|
from .azubu import AzubuIE
|
||||||
@@ -63,10 +66,12 @@ from .cnet import CNETIE
|
|||||||
from .cnn import (
|
from .cnn import (
|
||||||
CNNIE,
|
CNNIE,
|
||||||
CNNBlogsIE,
|
CNNBlogsIE,
|
||||||
|
CNNArticleIE,
|
||||||
)
|
)
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .comcarcoff import ComCarCoffIE
|
from .comcarcoff import ComCarCoffIE
|
||||||
|
from .commonmistakes import CommonMistakesIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
@@ -88,12 +93,14 @@ from .dotsub import DotsubIE
|
|||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
from .drtv import DRTVIE
|
from .drtv import DRTVIE
|
||||||
|
from .dvtv import DVTVIE
|
||||||
from .dump import DumpIE
|
from .dump import DumpIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
from .divxstage import DivxStageIE
|
from .divxstage import DivxStageIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
|
from .echomsk import EchoMskIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
from .einthusan import EinthusanIE
|
||||||
@@ -106,6 +113,7 @@ from .elpais import ElPaisIE
|
|||||||
from .empflix import EMPFlixIE
|
from .empflix import EMPFlixIE
|
||||||
from .engadget import EngadgetIE
|
from .engadget import EngadgetIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
|
from .eroprofile import EroProfileIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
@@ -151,6 +159,7 @@ from .gametrailers import GametrailersIE
|
|||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
|
from .giga import GigaIE
|
||||||
from .glide import GlideIE
|
from .glide import GlideIE
|
||||||
from .globo import GloboIE
|
from .globo import GloboIE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
@@ -164,8 +173,10 @@ from .grooveshark import GroovesharkIE
|
|||||||
from .groupon import GrouponIE
|
from .groupon import GrouponIE
|
||||||
from .hark import HarkIE
|
from .hark import HarkIE
|
||||||
from .heise import HeiseIE
|
from .heise import HeiseIE
|
||||||
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
from .hostingbulk import HostingBulkIE
|
from .hostingbulk import HostingBulkIE
|
||||||
from .hotnewhiphop import HotNewHipHopIE
|
from .hotnewhiphop import HotNewHipHopIE
|
||||||
@@ -263,6 +274,8 @@ from .nbc import (
|
|||||||
)
|
)
|
||||||
from .ndr import NDRIE
|
from .ndr import NDRIE
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
|
from .netzkino import NetzkinoIE
|
||||||
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
@@ -289,6 +302,7 @@ from .nytimes import NYTimesIE
|
|||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
|
from .openfilm import OpenFilmIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
@@ -313,6 +327,7 @@ from .prosiebensat1 import ProSiebenSat1IE
|
|||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
from .quickvid import QuickVidIE
|
from .quickvid import QuickVidIE
|
||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
from .rai import RaiIE
|
from .rai import RaiIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
@@ -333,6 +348,7 @@ from .ruhd import RUHDIE
|
|||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
RutubeEmbedIE,
|
||||||
RutubeMovieIE,
|
RutubeMovieIE,
|
||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
)
|
)
|
||||||
@@ -342,6 +358,7 @@ from .savefrom import SaveFromIE
|
|||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
from .scivee import SciVeeIE
|
from .scivee import SciVeeIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
|
from .screencastomatic import ScreencastOMaticIE
|
||||||
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
|
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
from .sexu import SexuIE
|
from .sexu import SexuIE
|
||||||
@@ -402,6 +419,7 @@ from .ted import TEDIE
|
|||||||
from .telebruxelles import TeleBruxellesIE
|
from .telebruxelles import TeleBruxellesIE
|
||||||
from .telecinco import TelecincoIE
|
from .telecinco import TelecincoIE
|
||||||
from .telemb import TeleMBIE
|
from .telemb import TeleMBIE
|
||||||
|
from .teletask import TeleTaskIE
|
||||||
from .tenplay import TenPlayIE
|
from .tenplay import TenPlayIE
|
||||||
from .testurl import TestURLIE
|
from .testurl import TestURLIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
@@ -460,6 +478,7 @@ from .videott import VideoTtIE
|
|||||||
from .videoweed import VideoWeedIE
|
from .videoweed import VideoWeedIE
|
||||||
from .vidme import VidmeIE
|
from .vidme import VidmeIE
|
||||||
from .vidzi import VidziIE
|
from .vidzi import VidziIE
|
||||||
|
from .vier import VierIE, VierVideosIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
VimeoAlbumIE,
|
VimeoAlbumIE,
|
||||||
@@ -495,6 +514,7 @@ from .wdr import (
|
|||||||
WDRMobileIE,
|
WDRMobileIE,
|
||||||
WDRMausIE,
|
WDRMausIE,
|
||||||
)
|
)
|
||||||
|
from .webofstories import WebOfStoriesIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
@@ -507,6 +527,7 @@ from .xminus import XMinusIE
|
|||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xvideos import XVideosIE
|
from .xvideos import XVideosIE
|
||||||
from .xtube import XTubeUserIE, XTubeIE
|
from .xtube import XTubeUserIE, XTubeIE
|
||||||
|
from .xxxymovies import XXXYMoviesIE
|
||||||
from .yahoo import (
|
from .yahoo import (
|
||||||
YahooIE,
|
YahooIE,
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
@@ -529,7 +550,7 @@ from .youtube import (
|
|||||||
YoutubeSearchURLIE,
|
YoutubeSearchURLIE,
|
||||||
YoutubeShowIE,
|
YoutubeShowIE,
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTopListIE,
|
YoutubeTruncatedIDIE,
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeUserIE,
|
YoutubeUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
@@ -555,6 +576,17 @@ def gen_extractors():
|
|||||||
return [klass() for klass in _ALL_CLASSES]
|
return [klass() for klass in _ALL_CLASSES]
|
||||||
|
|
||||||
|
|
||||||
|
def list_extractors(age_limit):
|
||||||
|
"""
|
||||||
|
Return a list of extractors that are suitable for the given age,
|
||||||
|
sorted by extractor ID.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return sorted(
|
||||||
|
filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
|
||||||
|
key=lambda ie: ie.IE_NAME.lower())
|
||||||
|
|
||||||
|
|
||||||
def get_info_extractor(ie_name):
|
def get_info_extractor(ie_name):
|
||||||
"""Returns the info extractor class with the given ie_name"""
|
"""Returns the info extractor class with the given ie_name"""
|
||||||
return globals()[ie_name + 'IE']
|
return globals()[ie_name + 'IE']
|
||||||
|
70
youtube_dl/extractor/adobetv.py
Normal file
70
youtube_dl/extractor/adobetv.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
unified_strdate,
|
||||||
|
str_to_int,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AdobeTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
|
||||||
|
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
|
||||||
|
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'upload_date': '20110914',
|
||||||
|
'duration': 60,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player = self._parse_json(
|
||||||
|
self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = player.get('title') or self._search_regex(
|
||||||
|
r'data-title="([^"]+)"', webpage, 'title')
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(
|
||||||
|
self._html_search_meta('datepublished', webpage, 'upload date'))
|
||||||
|
|
||||||
|
duration = parse_duration(
|
||||||
|
self._html_search_meta('duration', webpage, 'duration')
|
||||||
|
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
|
||||||
|
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
||||||
|
webpage, 'view count'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': source['src'],
|
||||||
|
'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
|
||||||
|
'tbr': source.get('bitrate'),
|
||||||
|
} for source in player['sources']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
77
youtube_dl/extractor/alphaporno.py
Normal file
77
youtube_dl/extractor/alphaporno.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
parse_duration,
|
||||||
|
parse_filesize,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlphaPornoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
|
||||||
|
'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '258807',
|
||||||
|
'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sensual striptease porn with Samantha Alexandra',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'timestamp': 1418694611,
|
||||||
|
'upload_date': '20141216',
|
||||||
|
'duration': 387,
|
||||||
|
'filesize_approx': 54120000,
|
||||||
|
'tbr': 1145,
|
||||||
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
|
||||||
|
ext = self._html_search_meta(
|
||||||
|
'encodingFormat', webpage, 'ext', default='.mp4')[1:]
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
[r'<meta content="([^"]+)" itemprop="description">',
|
||||||
|
r'class="title" itemprop="name">([^<]+)<'],
|
||||||
|
webpage, 'title')
|
||||||
|
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
|
'uploadDate', webpage, 'upload date'))
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration'))
|
||||||
|
filesize_approx = parse_filesize(self._html_search_meta(
|
||||||
|
'contentSize', webpage, 'file size'))
|
||||||
|
bitrate = int_or_none(self._html_search_meta(
|
||||||
|
'bitrate', webpage, 'bitrate'))
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', default='').split(',')
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'filesize_approx': filesize_approx,
|
||||||
|
'tbr': bitrate,
|
||||||
|
'categories': categories,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@@ -1,42 +1,48 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import unified_strdate
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ArchiveOrgIE(InfoExtractor):
|
class ArchiveOrgIE(InfoExtractor):
|
||||||
IE_NAME = 'archive.org'
|
IE_NAME = 'archive.org'
|
||||||
IE_DESC = 'archive.org videos'
|
IE_DESC = 'archive.org videos'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
"url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
|
||||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "1968 Demo - FJCC Conference Presentation Reel #1",
|
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
"description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
|
'ext': 'ogv',
|
||||||
"upload_date": "19681210",
|
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||||
"uploader": "SRI International"
|
'description': 'md5:1780b464abaca9991d8968c877bb53ed',
|
||||||
|
'upload_date': '19681210',
|
||||||
|
'uploader': 'SRI International'
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://archive.org/details/Cops1922',
|
||||||
|
'md5': '18f2a19e6d89af8425671da1cf3d4e04',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Cops1922',
|
||||||
|
'ext': 'ogv',
|
||||||
|
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||||
|
'description': 'md5:70f72ee70882f713d4578725461ffcc3',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
json_url = url + ('?' if '?' in url else '&') + 'output=json'
|
json_url = url + ('?' if '?' in url else '&') + 'output=json'
|
||||||
json_data = self._download_webpage(json_url, video_id)
|
data = self._download_json(json_url, video_id)
|
||||||
data = json.loads(json_data)
|
|
||||||
|
|
||||||
title = data['metadata']['title'][0]
|
def get_optional(data_dict, field):
|
||||||
description = data['metadata']['description'][0]
|
return data_dict['metadata'].get(field, [None])[0]
|
||||||
uploader = data['metadata']['creator'][0]
|
|
||||||
upload_date = unified_strdate(data['metadata']['date'][0])
|
title = get_optional(data, 'title')
|
||||||
|
description = get_optional(data, 'description')
|
||||||
|
uploader = get_optional(data, 'creator')
|
||||||
|
upload_date = unified_strdate(get_optional(data, 'date'))
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
|
@@ -37,7 +37,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
config_xml_url, video_id, note='Downloading configuration')
|
config_xml_url, video_id, note='Downloading configuration')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'forma_id': q.attrib['quality'],
|
'format_id': q.attrib['quality'],
|
||||||
# The playpath starts at 'mp4:', if we don't manually
|
# The playpath starts at 'mp4:', if we don't manually
|
||||||
# split the url, rtmpdump will incorrectly parse them
|
# split the url, rtmpdump will incorrectly parse them
|
||||||
'url': q.text.split('mp4:', 1)[0],
|
'url': q.text.split('mp4:', 1)[0],
|
||||||
@@ -133,7 +133,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'width': int_or_none(f.get('width')),
|
'width': int_or_none(f.get('width')),
|
||||||
'height': int_or_none(f.get('height')),
|
'height': int_or_none(f.get('height')),
|
||||||
'tbr': int_or_none(f.get('bitrate')),
|
'tbr': int_or_none(f.get('bitrate')),
|
||||||
'quality': qfunc(f['quality']),
|
'quality': qfunc(f.get('quality')),
|
||||||
'source_preference': source_pref,
|
'source_preference': source_pref,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
114
youtube_dl/extractor/atresplayer.py
Normal file
114
youtube_dl/extractor/atresplayer.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import time
|
||||||
|
import hmac
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
xpath_text,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AtresPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||||
|
'md5': 'efd56753cda1bb64df52a3074f62e38a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'capitulo-10-especial-solidario-nochebuena',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Especial Solidario de Nochebuena',
|
||||||
|
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
|
||||||
|
'duration': 5527.6,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
|
||||||
|
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
|
||||||
|
_TIMESTAMP_SHIFT = 30000
|
||||||
|
|
||||||
|
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
|
||||||
|
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
|
||||||
|
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
|
||||||
|
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
episode_id = self._search_regex(
|
||||||
|
r'episode="([^"]+)"', webpage, 'episode id')
|
||||||
|
|
||||||
|
timestamp = int_or_none(self._download_webpage(
|
||||||
|
self._TIME_API_URL,
|
||||||
|
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
|
||||||
|
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
|
||||||
|
token = hmac.new(
|
||||||
|
self._MAGIC.encode('ascii'),
|
||||||
|
(episode_id + timestamp_shifted).encode('utf-8')
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for fmt in ['windows', 'android_tablet']:
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
||||||
|
request.add_header('Youtubedl-user-agent', self._USER_AGENT)
|
||||||
|
|
||||||
|
fmt_json = self._download_json(
|
||||||
|
request, video_id, 'Downloading %s video JSON' % fmt)
|
||||||
|
|
||||||
|
result = fmt_json.get('resultDes')
|
||||||
|
if result.lower() != 'ok':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
||||||
|
|
||||||
|
for _, video_url in fmt_json['resultObject'].items():
|
||||||
|
if video_url.endswith('/Manifest'):
|
||||||
|
formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': 'android',
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
player = self._download_json(
|
||||||
|
self._PLAYER_URL_TEMPLATE % episode_id,
|
||||||
|
episode_id)
|
||||||
|
|
||||||
|
path_data = player.get('pathData')
|
||||||
|
|
||||||
|
episode = self._download_xml(
|
||||||
|
self._EPISODE_URL_TEMPLATE % path_data,
|
||||||
|
video_id, 'Downloading episode XML')
|
||||||
|
|
||||||
|
duration = float_or_none(xpath_text(
|
||||||
|
episode, './media/asset/info/technical/contentDuration', 'duration'))
|
||||||
|
|
||||||
|
art = episode.find('./media/asset/info/art')
|
||||||
|
title = xpath_text(art, './name', 'title')
|
||||||
|
description = xpath_text(art, './description', 'description')
|
||||||
|
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
|
title = self._html_search_regex(
|
||||||
title = title.strip()
|
r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
|
||||||
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
|
||||||
links = map(compat_urllib_parse.unquote, links)
|
video_url = compat_urllib_parse.unquote(video_urls[0])
|
||||||
|
thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
|
||||||
|
thumbnail = compat_urllib_parse.unquote(thumbnails[0])
|
||||||
|
|
||||||
thumbnail = None
|
|
||||||
video_url = None
|
|
||||||
for link in links:
|
|
||||||
if link.endswith('.png'):
|
|
||||||
thumbnail = link
|
|
||||||
elif '/videos/' in link:
|
|
||||||
video_url = link
|
|
||||||
if not video_url:
|
if not video_url:
|
||||||
raise ExtractorError('Could not find video URL')
|
raise ExtractorError('Could not find video URL')
|
||||||
|
|
||||||
ext = '.' + determine_ext(video_url)
|
ext = '.' + determine_ext(video_url)
|
||||||
if ext == title[-len(ext):]:
|
title = remove_end(title, ext)
|
||||||
title = title[:-len(ext)]
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
|
|||||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b039d07m',
|
'id': 'b039d07m',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Kaleidoscope: Leonard Cohen',
|
'title': 'Kaleidoscope, Leonard Cohen',
|
||||||
'description': 'md5:db4755d7a665ae72343779f7dacb402c',
|
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||||
'duration': 1740,
|
'duration': 1740,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@@ -71,7 +71,54 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||||
},
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b04v209v',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Pete Tong, The Essential New Tune Special',
|
||||||
|
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||||
|
'duration': 10800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||||
|
'note': 'Audio',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p02frcch',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||||
|
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||||
|
'duration': 3507,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||||
|
'note': 'Video',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p025c103',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||||
|
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||||
|
'duration': 226,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _extract_asx_playlist(self, connection, programme_id):
|
def _extract_asx_playlist(self, connection, programme_id):
|
||||||
@@ -203,6 +250,59 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
|
def _download_playlist(self, playlist_id):
|
||||||
|
try:
|
||||||
|
playlist = self._download_json(
|
||||||
|
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||||
|
playlist_id, 'Downloading playlist JSON')
|
||||||
|
|
||||||
|
version = playlist.get('defaultAvailableVersion')
|
||||||
|
if version:
|
||||||
|
smp_config = version['smpConfig']
|
||||||
|
title = smp_config['title']
|
||||||
|
description = smp_config['summary']
|
||||||
|
for item in smp_config['items']:
|
||||||
|
kind = item['kind']
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
programme_id = item.get('vpid')
|
||||||
|
duration = int(item.get('duration'))
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||||
|
raise
|
||||||
|
|
||||||
|
# fallback to legacy playlist
|
||||||
|
playlist = self._download_xml(
|
||||||
|
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
|
||||||
|
playlist_id, 'Downloading legacy playlist XML')
|
||||||
|
|
||||||
|
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||||
|
if no_items is not None:
|
||||||
|
reason = no_items.get('reason')
|
||||||
|
if reason == 'preAvailability':
|
||||||
|
msg = 'Episode %s is not yet available' % playlist_id
|
||||||
|
elif reason == 'postAvailability':
|
||||||
|
msg = 'Episode %s is no longer available' % playlist_id
|
||||||
|
elif reason == 'noMedia':
|
||||||
|
msg = 'Episode %s is not currently available' % playlist_id
|
||||||
|
else:
|
||||||
|
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
for item in self._extract_items(playlist):
|
||||||
|
kind = item.get('kind')
|
||||||
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
|
continue
|
||||||
|
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||||
|
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||||
|
programme_id = item.get('identifier')
|
||||||
|
duration = int(item.get('duration'))
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
|
||||||
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
group_id = self._match_id(url)
|
group_id = self._match_id(url)
|
||||||
|
|
||||||
@@ -219,32 +319,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
duration = player['duration']
|
duration = player['duration']
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
else:
|
else:
|
||||||
playlist = self._download_xml(
|
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
|
|
||||||
group_id, 'Downloading playlist XML')
|
|
||||||
|
|
||||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
|
||||||
if no_items is not None:
|
|
||||||
reason = no_items.get('reason')
|
|
||||||
if reason == 'preAvailability':
|
|
||||||
msg = 'Episode %s is not yet available' % group_id
|
|
||||||
elif reason == 'postAvailability':
|
|
||||||
msg = 'Episode %s is no longer available' % group_id
|
|
||||||
elif reason == 'noMedia':
|
|
||||||
msg = 'Episode %s is not currently available' % group_id
|
|
||||||
else:
|
|
||||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
|
||||||
raise ExtractorError(msg, expected=True)
|
|
||||||
|
|
||||||
for item in self._extract_items(playlist):
|
|
||||||
kind = item.get('kind')
|
|
||||||
if kind != 'programme' and kind != 'radioProgramme':
|
|
||||||
continue
|
|
||||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
|
||||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
|
||||||
programme_id = item.get('identifier')
|
|
||||||
duration = int(item.get('duration'))
|
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
self._list_available_subtitles(programme_id, subtitles)
|
self._list_available_subtitles(programme_id, subtitles)
|
||||||
|
@@ -16,7 +16,7 @@ class BetIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
|
'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
|
||||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'BET News Presents: A Conversation With President Obama',
|
'title': 'BET News Presents: A Conversation With President Obama',
|
||||||
@@ -35,7 +35,7 @@ class BetIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
|
'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
|
||||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Justice for Ferguson: A Community Reacts',
|
'title': 'Justice for Ferguson: A Community Reacts',
|
||||||
@@ -55,7 +55,6 @@ class BetIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
media_url = compat_urllib_parse.unquote(self._search_regex(
|
media_url = compat_urllib_parse.unquote(self._search_regex(
|
||||||
|
@@ -4,9 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_parse_qs
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
@@ -54,45 +52,38 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
thumbnail = self._html_search_meta(
|
thumbnail = self._html_search_meta(
|
||||||
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
|
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
player_params = compat_parse_qs(self._html_search_regex(
|
cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
|
||||||
r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
|
|
||||||
webpage, 'player params'))
|
|
||||||
|
|
||||||
if 'cid' in player_params:
|
lq_doc = self._download_xml(
|
||||||
cid = player_params['cid'][0]
|
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
|
||||||
|
video_id,
|
||||||
|
note='Downloading LQ video info'
|
||||||
|
)
|
||||||
|
lq_durl = lq_doc.find('./durl')
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'lq',
|
||||||
|
'quality': 1,
|
||||||
|
'url': lq_durl.find('./url').text,
|
||||||
|
'filesize': int_or_none(
|
||||||
|
lq_durl.find('./size'), get_attr='text'),
|
||||||
|
}]
|
||||||
|
|
||||||
lq_doc = self._download_xml(
|
hq_doc = self._download_xml(
|
||||||
'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
|
'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
|
||||||
video_id,
|
video_id,
|
||||||
note='Downloading LQ video info'
|
note='Downloading HQ video info',
|
||||||
)
|
fatal=False,
|
||||||
lq_durl = lq_doc.find('.//durl')
|
)
|
||||||
formats = [{
|
if hq_doc is not False:
|
||||||
'format_id': 'lq',
|
hq_durl = hq_doc.find('./durl')
|
||||||
'quality': 1,
|
formats.append({
|
||||||
'url': lq_durl.find('./url').text,
|
'format_id': 'hq',
|
||||||
|
'quality': 2,
|
||||||
|
'ext': 'flv',
|
||||||
|
'url': hq_durl.find('./url').text,
|
||||||
'filesize': int_or_none(
|
'filesize': int_or_none(
|
||||||
lq_durl.find('./size'), get_attr='text'),
|
hq_durl.find('./size'), get_attr='text'),
|
||||||
}]
|
})
|
||||||
|
|
||||||
hq_doc = self._download_xml(
|
|
||||||
'http://interface.bilibili.cn/playurl?cid=%s' % cid,
|
|
||||||
video_id,
|
|
||||||
note='Downloading HQ video info',
|
|
||||||
fatal=False,
|
|
||||||
)
|
|
||||||
if hq_doc is not False:
|
|
||||||
hq_durl = hq_doc.find('.//durl')
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'hq',
|
|
||||||
'quality': 2,
|
|
||||||
'ext': 'flv',
|
|
||||||
'url': hq_durl.find('./url').text,
|
|
||||||
'filesize': int_or_none(
|
|
||||||
hq_durl.find('./size'), get_attr='text'),
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return {
|
return {
|
||||||
|
@@ -33,7 +33,7 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'skip_download': True, # Got enough YouTube download tests
|
'skip_download': True, # Got enough YouTube download tests
|
||||||
},
|
},
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'description': 'Munchkin the Teddy Bear is back !',
|
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
@@ -42,9 +42,9 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20141124',
|
'upload_date': '20141124',
|
||||||
'uploader_id': 'CindysMunchkin',
|
'uploader_id': 'CindysMunchkin',
|
||||||
'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
|
'description': 're:© 2014 Munchkin the Shih Tzu',
|
||||||
'uploader': 'Munchkin the Shih Tzu',
|
'uploader': 'Munchkin the Shih Tzu',
|
||||||
'title': 'Munchkin the Teddy Bear gets her exercise',
|
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
}]
|
}]
|
||||||
|
@@ -5,6 +5,8 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
HEADRequest,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_basename,
|
url_basename,
|
||||||
qualities,
|
qualities,
|
||||||
@@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):
|
|||||||
|
|
||||||
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
|
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
|
||||||
|
|
||||||
|
fmt_url = next(iter(media.find('VIDEOS'))).text
|
||||||
|
if '/geo' in fmt_url.lower():
|
||||||
|
response = self._request_webpage(
|
||||||
|
HEADRequest(fmt_url), video_id,
|
||||||
|
'Checking if the video is georestricted')
|
||||||
|
if '/blocage' in response.geturl():
|
||||||
|
raise ExtractorError(
|
||||||
|
'The video is not available in your country',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in media.find('VIDEOS'):
|
for fmt in media.find('VIDEOS'):
|
||||||
format_url = fmt.text
|
format_url = fmt.text
|
||||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@@ -11,49 +11,42 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CeskaTelevizeIE(InfoExtractor):
|
class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '213512120230004',
|
'id': '214411058091220',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'První republika: Španělská chřipka',
|
'title': 'Hyde Park Civilizace',
|
||||||
'duration': 3107.4,
|
'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 3350,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires rtmpdump
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Works only from Czech Republic.',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '20138143440',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Tsatsiki, maminka a policajt',
|
|
||||||
'duration': 6754.1,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # requires rtmpdump
|
|
||||||
},
|
|
||||||
'skip': 'Works only from Czech Republic.',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '14716',
|
'id': '14716',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||||
'duration': 90,
|
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 88.4,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires rtmpdump
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@@ -80,8 +73,9 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
'requestSource': 'iVysilani',
|
'requestSource': 'iVysilani',
|
||||||
}
|
}
|
||||||
|
|
||||||
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
|
req = compat_urllib_request.Request(
|
||||||
data=compat_urllib_parse.urlencode(data))
|
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||||
|
data=compat_urllib_parse.urlencode(data))
|
||||||
|
|
||||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
req.add_header('x-addr', '127.0.0.1')
|
req.add_header('x-addr', '127.0.0.1')
|
||||||
@@ -90,39 +84,72 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
|
|
||||||
playlistpage = self._download_json(req, video_id)
|
playlistpage = self._download_json(req, video_id)
|
||||||
|
|
||||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
|
playlist_url = playlistpage['url']
|
||||||
|
if playlist_url == 'error_region':
|
||||||
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlist = self._download_xml(req, video_id)
|
playlist = self._download_json(req, video_id)
|
||||||
|
|
||||||
|
item = playlist['playlist'][0]
|
||||||
formats = []
|
formats = []
|
||||||
for i in playlist.find('smilRoot/body'):
|
for format_id, stream_url in item['streamUrls'].items():
|
||||||
if 'AD' not in i.attrib['id']:
|
formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
|
||||||
base_url = i.attrib['base']
|
|
||||||
parsedurl = compat_urllib_parse_urlparse(base_url)
|
|
||||||
duration = i.attrib['duration']
|
|
||||||
|
|
||||||
for video in i.findall('video'):
|
|
||||||
if video.attrib['label'] != 'AD':
|
|
||||||
format_id = video.attrib['label']
|
|
||||||
play_path = video.attrib['src']
|
|
||||||
vbr = int(video.attrib['system-bitrate'])
|
|
||||||
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': base_url,
|
|
||||||
'vbr': vbr,
|
|
||||||
'play_path': play_path,
|
|
||||||
'app': parsedurl.path[1:] + '?' + parsedurl.query,
|
|
||||||
'rtmp_live': True,
|
|
||||||
'ext': 'flv',
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
duration = float_or_none(item.get('duration'))
|
||||||
|
thumbnail = item.get('previewImageUrl')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subs = item.get('subtitles')
|
||||||
|
if subs:
|
||||||
|
subtitles['cs'] = subs[0]['url']
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
|
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': episode_id,
|
'id': episode_id,
|
||||||
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
|
'title': title,
|
||||||
'duration': float(duration),
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _fix_subtitles(subtitles):
|
||||||
|
""" Convert millisecond-based subtitles to SRT """
|
||||||
|
if subtitles is None:
|
||||||
|
return subtitles # subtitles not requested
|
||||||
|
|
||||||
|
def _msectotimecode(msec):
|
||||||
|
""" Helper utility to convert milliseconds to timecode """
|
||||||
|
components = []
|
||||||
|
for divider in [1000, 60, 60, 100]:
|
||||||
|
components.append(msec % divider)
|
||||||
|
msec //= divider
|
||||||
|
return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
|
||||||
|
|
||||||
|
def _fix_subtitle(subtitle):
|
||||||
|
for line in subtitle.splitlines():
|
||||||
|
m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
|
||||||
|
if m:
|
||||||
|
yield m.group(1)
|
||||||
|
start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
|
||||||
|
yield "{0} --> {1}".format(start, stop)
|
||||||
|
else:
|
||||||
|
yield line
|
||||||
|
|
||||||
|
fixed_subtitles = {}
|
||||||
|
for k, v in subtitles.items():
|
||||||
|
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
|
||||||
|
return fixed_subtitles
|
||||||
|
@@ -11,14 +11,14 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sports_2013_06_09_nadal-1-on-1.cnn',
|
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Nadal wins 8th French Open title',
|
'title': 'Nadal wins 8th French Open title',
|
||||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||||
@@ -35,6 +35,16 @@ class CNNIE(InfoExtractor):
|
|||||||
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||||
"upload_date": "20130821",
|
"upload_date": "20130821",
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||||
|
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||||
|
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||||
|
'upload_date': '20141222',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -127,3 +137,28 @@ class CNNBlogsIE(InfoExtractor):
|
|||||||
'url': cnn_url,
|
'url': cnn_url,
|
||||||
'ie_key': CNNIE.ie_key(),
|
'ie_key': CNNIE.ie_key(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CNNArticleIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||||
|
'md5': '275b326f85d80dff7592a9820f5dc887',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Obama: We\'re not going to be intimidated',
|
||||||
|
'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
|
||||||
|
'upload_date': '20141220',
|
||||||
|
},
|
||||||
|
'add_ie': ['CNN'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
webpage = self._download_webpage(url, url_basename(url))
|
||||||
|
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'http://cnn.com/video/?/video/' + cnn_url,
|
||||||
|
'ie_key': CNNIE.ie_key(),
|
||||||
|
}
|
||||||
|
@@ -50,7 +50,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
)|
|
)|
|
||||||
(?P<interview>
|
(?P<interview>
|
||||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||||
(?:[?#].*|$)'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||||
@@ -83,6 +83,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@@ -21,6 +21,7 @@ from ..compat import (
|
|||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
age_restricted,
|
||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@@ -40,7 +41,7 @@ class InfoExtractor(object):
|
|||||||
information about the video (or videos) the URL refers to. This
|
information about the video (or videos) the URL refers to. This
|
||||||
information includes the real video URL, the video title, author and
|
information includes the real video URL, the video title, author and
|
||||||
others. The information is stored in a dictionary which is then
|
others. The information is stored in a dictionary which is then
|
||||||
passed to the FileDownloader. The FileDownloader processes this
|
passed to the YoutubeDL. The YoutubeDL processes this
|
||||||
information possibly downloading the video to the file system, among
|
information possibly downloading the video to the file system, among
|
||||||
other possible outcomes.
|
other possible outcomes.
|
||||||
|
|
||||||
@@ -92,6 +93,8 @@ class InfoExtractor(object):
|
|||||||
by this field, regardless of all other values.
|
by this field, regardless of all other values.
|
||||||
-1 for default (order by other properties),
|
-1 for default (order by other properties),
|
||||||
-2 or smaller for less than default.
|
-2 or smaller for less than default.
|
||||||
|
< -1000 to hide the format (if there is
|
||||||
|
another one which is strictly better)
|
||||||
* language_preference Is this in the correct requested
|
* language_preference Is this in the correct requested
|
||||||
language?
|
language?
|
||||||
10 if it's what the URL is about,
|
10 if it's what the URL is about,
|
||||||
@@ -144,6 +147,17 @@ class InfoExtractor(object):
|
|||||||
like_count: Number of positive ratings of the video
|
like_count: Number of positive ratings of the video
|
||||||
dislike_count: Number of negative ratings of the video
|
dislike_count: Number of negative ratings of the video
|
||||||
comment_count: Number of comments on the video
|
comment_count: Number of comments on the video
|
||||||
|
comments: A list of comments, each with one or more of the following
|
||||||
|
properties (all but one of text or html optional):
|
||||||
|
* "author" - human-readable name of the comment author
|
||||||
|
* "author_id" - user ID of the comment author
|
||||||
|
* "id" - Comment ID
|
||||||
|
* "html" - Comment as HTML
|
||||||
|
* "text" - Plain text of the comment
|
||||||
|
* "timestamp" - UNIX timestamp of comment
|
||||||
|
* "parent" - ID of the comment this one is replying to.
|
||||||
|
Set to "root" to indicate that this is a
|
||||||
|
comment to the original video.
|
||||||
age_limit: Age restriction for the video, as an integer (years)
|
age_limit: Age restriction for the video, as an integer (years)
|
||||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
@@ -589,9 +603,9 @@ class InfoExtractor(object):
|
|||||||
if display_name is None:
|
if display_name is None:
|
||||||
display_name = name
|
display_name = name
|
||||||
return self._html_search_regex(
|
return self._html_search_regex(
|
||||||
r'''(?ix)<meta
|
r'''(?isx)<meta
|
||||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||||
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
|
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
|
||||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||||
|
|
||||||
def _dc_search_uploader(self, html):
|
def _dc_search_uploader(self, html):
|
||||||
@@ -875,6 +889,35 @@ class InfoExtractor(object):
|
|||||||
None, '/', True, False, expire_time, '', None, None, None)
|
None, '/', True, False, expire_time, '', None, None, None)
|
||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self._downloader.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
|
def get_testcases(self, include_onlymatching=False):
|
||||||
|
t = getattr(self, '_TEST', None)
|
||||||
|
if t:
|
||||||
|
assert not hasattr(self, '_TESTS'), \
|
||||||
|
'%s has _TEST and _TESTS' % type(self).__name__
|
||||||
|
tests = [t]
|
||||||
|
else:
|
||||||
|
tests = getattr(self, '_TESTS', [])
|
||||||
|
for t in tests:
|
||||||
|
if not include_onlymatching and t.get('only_matching', False):
|
||||||
|
continue
|
||||||
|
t['name'] = type(self).__name__[:-len('IE')]
|
||||||
|
yield t
|
||||||
|
|
||||||
|
def is_suitable(self, age_limit):
|
||||||
|
""" Test whether the extractor is generally suitable for the given
|
||||||
|
age limit (i.e. pornographic sites are not, all others usually are) """
|
||||||
|
|
||||||
|
any_restricted = False
|
||||||
|
for tc in self.get_testcases(include_onlymatching=False):
|
||||||
|
if 'playlist' in tc:
|
||||||
|
tc = tc['playlist'][0]
|
||||||
|
is_restricted = age_restricted(
|
||||||
|
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||||
|
if not is_restricted:
|
||||||
|
return True
|
||||||
|
any_restricted = any_restricted or is_restricted
|
||||||
|
return not any_restricted
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
29
youtube_dl/extractor/commonmistakes.py
Normal file
29
youtube_dl/extractor/commonmistakes.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class CommonMistakesIE(InfoExtractor):
|
||||||
|
IE_DESC = False # Do not list
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:url|URL)
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'url',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'URL',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
msg = (
|
||||||
|
'You\'ve asked youtube-dl to download the URL "%s". '
|
||||||
|
'That doesn\'t make any sense. '
|
||||||
|
'Simply remove the parameter in your command or configuration.'
|
||||||
|
) % url
|
||||||
|
if self._downloader.params.get('verbose'):
|
||||||
|
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
|
||||||
|
raise ExtractorError(msg, expected=True)
|
@@ -29,10 +29,9 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '645513',
|
'id': '645513',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
@@ -47,7 +46,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
|||||||
# rtmp
|
# rtmp
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_FORMAT_IDS = {
|
_FORMAT_IDS = {
|
||||||
'360': ('60', '106'),
|
'360': ('60', '106'),
|
||||||
@@ -226,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
|
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||||
video_format = fmt + 'p'
|
video_format = fmt + 'p'
|
||||||
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
||||||
|
@@ -27,7 +27,6 @@ class CSpanIE(InfoExtractor):
|
|||||||
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||||
# For whatever reason, the served video alternates between
|
# For whatever reason, the served video alternates between
|
||||||
# two different ones
|
# two different ones
|
||||||
#'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '340723',
|
'id': '340723',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@@ -38,7 +38,7 @@ class DaumIE(InfoExtractor):
|
|||||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||||
webpage = self._download_webpage(canonical_url, video_id)
|
webpage = self._download_webpage(canonical_url, video_id)
|
||||||
full_id = self._search_regex(
|
full_id = self._search_regex(
|
||||||
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
|
||||||
webpage, 'full id')
|
webpage, 'full id')
|
||||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
info = self._download_xml(
|
info = self._download_xml(
|
||||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -61,7 +62,7 @@ class DBTVIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video['id'],
|
'id': compat_str(video['id']),
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video['title'],
|
'title': video['title'],
|
||||||
'description': clean_html(video['desc']),
|
'description': clean_html(video['desc']),
|
||||||
|
@@ -1,47 +1,45 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryIE(InfoExtractor):
|
class DiscoveryIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||||
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '614784',
|
'id': 'mission-impossible-outtakes',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'MythBusters: Mission Impossible Outtakes',
|
'title': 'Mission Impossible Outtakes',
|
||||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||||
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
||||||
' back.'),
|
' back.'),
|
||||||
'duration': 156,
|
'duration': 156,
|
||||||
|
'timestamp': 1303099200,
|
||||||
|
'upload_date': '20110418',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
|
info = self._parse_json(self._search_regex(
|
||||||
webpage, 'video list', flags=re.DOTALL)
|
r'(?s)<script type="application/ld\+json">(.*?)</script>',
|
||||||
video_list = json.loads(video_list_json)
|
webpage, 'video info'), video_id)
|
||||||
info = video_list['clips'][0]
|
|
||||||
formats = []
|
|
||||||
for f in info['mp4']:
|
|
||||||
formats.append(
|
|
||||||
{'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': info['contentId'],
|
'id': video_id,
|
||||||
'title': video_list['name'],
|
'title': info['name'],
|
||||||
'formats': formats,
|
'url': info['contentURL'],
|
||||||
'description': info['videoCaption'],
|
'description': info.get('description'),
|
||||||
'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
|
'thumbnail': info.get('thumbnailUrl'),
|
||||||
'duration': info['duration'],
|
'timestamp': parse_iso8601(info.get('uploadDate')),
|
||||||
|
'duration': int_or_none(info.get('duration')),
|
||||||
}
|
}
|
||||||
|
125
youtube_dl/extractor/dvtv.py
Normal file
125
youtube_dl/extractor/dvtv.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
unescapeHTML,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DVTVIE(InfoExtractor):
|
||||||
|
IE_NAME = 'dvtv'
|
||||||
|
IE_DESC = 'http://video.aktualne.cz/'
|
||||||
|
|
||||||
|
_VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
|
||||||
|
'md5': '67cb83e4a955d36e1b5d31993134a0c2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dc0768de855511e49e4b0025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
|
||||||
|
'md5': '6388f1941b48537dbd28791f712af8bf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '72c02230849211e49f60002590604f2e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
|
||||||
|
'id': '973eb3bc854e11e498be002590604f2e',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'da7ca6be4935532241fa9520b3ad91e4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b0b40906854d11e4bdad0025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '5f7652a08b05009c1292317b449ffea2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '420ad9ec854a11e4bdad0025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '95d35580846a11e4b6d20025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6fe14d66853511e4833a0025900fea04',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _parse_video_metadata(self, js, video_id):
|
||||||
|
metadata = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in metadata['sources']:
|
||||||
|
ext = video['type'][6:]
|
||||||
|
formats.append({
|
||||||
|
'url': video['file'],
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': '%s-%s' % (ext, video['label']),
|
||||||
|
'height': int(video['label'].rstrip('p')),
|
||||||
|
'fps': 25,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': metadata['mediaid'],
|
||||||
|
'title': unescapeHTML(metadata['title']),
|
||||||
|
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# single video
|
||||||
|
item = self._search_regex(
|
||||||
|
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
|
||||||
|
webpage, 'video', default=None, fatal=False)
|
||||||
|
|
||||||
|
if item:
|
||||||
|
return self._parse_video_metadata(item, video_id)
|
||||||
|
|
||||||
|
# playlist
|
||||||
|
items = re.findall(
|
||||||
|
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
if items:
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'entries': [self._parse_video_metadata(i, video_id) for i in items]
|
||||||
|
}
|
||||||
|
|
||||||
|
raise ExtractorError('Could not find neither video nor playlist')
|
46
youtube_dl/extractor/echomsk.py
Normal file
46
youtube_dl/extractor/echomsk.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class EchoMskIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.echo.msk.ru/sounds/1464134.html',
|
||||||
|
'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1464134',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Особое мнение - 29 декабря 2014, 19:08',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
audio_url = self._search_regex(
|
||||||
|
r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
|
air_date = self._html_search_regex(
|
||||||
|
r'(?s)<div class="date">(.+?)</div>',
|
||||||
|
webpage, 'date', fatal=False, default=None)
|
||||||
|
|
||||||
|
if air_date:
|
||||||
|
air_date = re.sub(r'(\s)\1+', r'\1', air_date)
|
||||||
|
if air_date:
|
||||||
|
title = '%s - %s' % (title, air_date)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': audio_url,
|
||||||
|
'title': title,
|
||||||
|
}
|
@@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -12,32 +11,49 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class EllenTVIE(InfoExtractor):
|
class EllenTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
||||||
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0-7jqrsr18',
|
'id': '0-7jqrsr18',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
||||||
|
'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
|
||||||
'timestamp': 1406876400,
|
'timestamp': 1406876400,
|
||||||
'upload_date': '20140801',
|
'upload_date': '20140801',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
|
||||||
|
'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0-dvzmabd5',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '1 year old twin sister makes her brother laugh',
|
||||||
|
'description': '1 year old twin sister makes her brother laugh',
|
||||||
|
'timestamp': 1419542075,
|
||||||
|
'upload_date': '20141225',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._html_search_meta('VideoURL', webpage, 'url')
|
||||||
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
|
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'description') or self._og_search_description(webpage)
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
r'<span class="publish-date"><time datetime="([^"]+)">',
|
r'<span class="publish-date"><time datetime="([^"]+)">',
|
||||||
webpage, 'timestamp'))
|
webpage, 'timestamp'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._og_search_title(webpage),
|
'url': video_url,
|
||||||
'url': self._html_search_meta('VideoURL', webpage, 'url'),
|
'title': title,
|
||||||
|
'description': description,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
playlist_id = self._match_id(url)
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
playlist = self._extract_playlist(webpage)
|
playlist = self._extract_playlist(webpage)
|
||||||
|
@@ -1,8 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
@@ -24,9 +22,7 @@ class ElPaisIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
prefix = self._html_search_regex(
|
prefix = self._html_search_regex(
|
||||||
|
45
youtube_dl/extractor/eroprofile.py
Normal file
45
youtube_dl/extractor/eroprofile.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class EroProfileIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
|
||||||
|
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3733775',
|
||||||
|
'display_id': 'sexy-babe-softcore',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'sexy babe softcore',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||||
|
webpage, 'video id', default=None)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'<source src="([^"]+)', webpage, 'video url')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class FKTVIE(InfoExtractor):
|
class FKTVIE(InfoExtractor):
|
||||||
IE_NAME = 'fernsehkritik.tv'
|
IE_NAME = 'fernsehkritik.tv'
|
||||||
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://fernsehkritik.tv/folge-1',
|
'url': 'http://fernsehkritik.tv/folge-1',
|
||||||
@@ -26,29 +26,32 @@ class FKTVIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
episode = int(self._match_id(url))
|
||||||
episode = int(mobj.group('ep'))
|
|
||||||
|
|
||||||
server = random.randint(2, 4)
|
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
|
||||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
|
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
|
||||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
|
|
||||||
episode)
|
episode)
|
||||||
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
||||||
'playlist', flags=re.DOTALL)
|
'playlist', flags=re.DOTALL)
|
||||||
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
||||||
# TODO: return a single multipart video
|
|
||||||
videos = []
|
videos = []
|
||||||
for i, _ in enumerate(files, 1):
|
for i, _ in enumerate(files, 1):
|
||||||
video_id = '%04d%d' % (episode, i)
|
video_id = '%04d%d' % (episode, i)
|
||||||
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
|
||||||
videos.append({
|
videos.append({
|
||||||
|
'ext': 'flv',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
|
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
|
||||||
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
|
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
|
||||||
'thumbnail': video_thumbnail
|
'thumbnail': video_thumbnail
|
||||||
})
|
})
|
||||||
return videos
|
return {
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'entries': videos,
|
||||||
|
'id': 'folge-%s' % episode,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class FKTVPosteckeIE(InfoExtractor):
|
class FKTVPosteckeIE(InfoExtractor):
|
||||||
|
@@ -6,7 +6,9 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
parse_iso8601
|
parse_iso8601,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
NAMESPACE_MAP = {
|
NAMESPACE_MAP = {
|
||||||
@@ -21,25 +23,41 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
|
|||||||
|
|
||||||
class GameOneIE(InfoExtractor):
|
class GameOneIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://www.gameone.de/tv/288',
|
{
|
||||||
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
'url': 'http://www.gameone.de/tv/288',
|
||||||
'info_dict': {
|
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
||||||
'id': '288',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '288',
|
||||||
'title': 'Game One - Folge 288',
|
'ext': 'mp4',
|
||||||
'duration': 1238,
|
'title': 'Game One - Folge 288',
|
||||||
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
'duration': 1238,
|
||||||
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
||||||
'age_limit': 16,
|
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
||||||
'upload_date': '20140513',
|
'age_limit': 16,
|
||||||
'timestamp': 1399980122,
|
'upload_date': '20140513',
|
||||||
|
'timestamp': 1399980122,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://gameone.de/tv/220',
|
||||||
|
'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '220',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20120918',
|
||||||
|
'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
|
||||||
|
'timestamp': 1347971451,
|
||||||
|
'title': 'Game One - Folge 220',
|
||||||
|
'duration': 896.62,
|
||||||
|
'age_limit': 16,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
og_video = self._og_search_video_url(webpage, secure=False)
|
og_video = self._og_search_video_url(webpage, secure=False)
|
||||||
@@ -66,13 +84,13 @@ class GameOneIE(InfoExtractor):
|
|||||||
video_id,
|
video_id,
|
||||||
'Downloading media:content')
|
'Downloading media:content')
|
||||||
rendition_items = content.findall('.//rendition')
|
rendition_items = content.findall('.//rendition')
|
||||||
duration = int(rendition_items[0].get('duration'))
|
duration = float_or_none(rendition_items[0].get('duration'))
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
||||||
'width': int(r.get('width')),
|
'width': int_or_none(r.get('width')),
|
||||||
'height': int(r.get('height')),
|
'height': int_or_none(r.get('height')),
|
||||||
'tbr': int(r.get('bitrate')),
|
'tbr': int_or_none(r.get('bitrate')),
|
||||||
}
|
}
|
||||||
for r in rendition_items
|
for r in rendition_items
|
||||||
]
|
]
|
||||||
@@ -105,7 +123,8 @@ class GameOnePlaylistIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
||||||
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
|
self.url_result('http://www.gameone.de/tv/%d' %
|
||||||
|
video_id, 'GameOne')
|
||||||
for video_id in range(max_id, 0, -1)]
|
for video_id in range(max_id, 0, -1)]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -39,7 +39,8 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
'id': '1015301',
|
'id': '1015301',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
|
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
|
||||||
}
|
},
|
||||||
|
'skip': 'Requires login',
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -23,6 +23,7 @@ from ..utils import (
|
|||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
UnsupportedError,
|
||||||
url_basename,
|
url_basename,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
@@ -130,12 +131,13 @@ class GenericIE(InfoExtractor):
|
|||||||
# ooyala video
|
# ooyala video
|
||||||
{
|
{
|
||||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||||
'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
|
'md5': '166dd577b433b4d4ebfee10b0824d8ff',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '2cc213299525360.mov', # that's what we get
|
'title': '2cc213299525360.mov', # that's what we get
|
||||||
},
|
},
|
||||||
|
'add_ie': ['Ooyala'],
|
||||||
},
|
},
|
||||||
# google redirect
|
# google redirect
|
||||||
{
|
{
|
||||||
@@ -145,7 +147,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20130224',
|
'upload_date': '20130224',
|
||||||
'uploader_id': 'TheVerge',
|
'uploader_id': 'TheVerge',
|
||||||
'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
|
'description': 're:^Chris Ziegler takes a look at the\.*',
|
||||||
'uploader': 'The Verge',
|
'uploader': 'The Verge',
|
||||||
'title': 'First Firefox OS phones side-by-side',
|
'title': 'First Firefox OS phones side-by-side',
|
||||||
},
|
},
|
||||||
@@ -180,6 +182,14 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# BBC iPlayer embeds
|
||||||
|
{
|
||||||
|
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 18,
|
||||||
|
},
|
||||||
# RUTV embed
|
# RUTV embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||||
@@ -698,9 +708,9 @@ class GenericIE(InfoExtractor):
|
|||||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||||
|
|
||||||
# Helper method
|
# Helper method
|
||||||
def _playlist_from_matches(matches, getter, ie=None):
|
def _playlist_from_matches(matches, getter=None, ie=None):
|
||||||
urlrs = orderedSet(
|
urlrs = orderedSet(
|
||||||
self.url_result(self._proto_relative_url(getter(m)), ie)
|
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||||
for m in matches)
|
for m in matches)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
@@ -904,6 +914,11 @@ class GenericIE(InfoExtractor):
|
|||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
||||||
|
|
||||||
|
# Look for BBC iPlayer embed
|
||||||
|
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||||
|
if matches:
|
||||||
|
return _playlist_from_matches(matches, ie='BBCCoUk')
|
||||||
|
|
||||||
# Look for embedded RUTV player
|
# Look for embedded RUTV player
|
||||||
rutv_url = RUTVIE._extract_url(webpage)
|
rutv_url = RUTVIE._extract_url(webpage)
|
||||||
if rutv_url:
|
if rutv_url:
|
||||||
@@ -911,7 +926,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded TED player
|
# Look for embedded TED player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'TED')
|
return self.url_result(mobj.group('url'), 'TED')
|
||||||
|
|
||||||
@@ -1057,7 +1072,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'url': new_url,
|
'url': new_url,
|
||||||
}
|
}
|
||||||
if not found:
|
if not found:
|
||||||
raise ExtractorError('Unsupported URL: %s' % url)
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for video_url in found:
|
for video_url in found:
|
||||||
|
101
youtube_dl/extractor/giga.py
Normal file
101
youtube_dl/extractor/giga.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
qualities,
|
||||||
|
compat_str,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
str_to_int,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GigaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
|
||||||
|
'md5': '6bc5535e945e724640664632055a584f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2622086',
|
||||||
|
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
|
||||||
|
'description': 'md5:afdf5862241aded4718a30dff6a57baf',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 578,
|
||||||
|
'timestamp': 1414749706,
|
||||||
|
'upload_date': '20141031',
|
||||||
|
'uploader': 'Robin Schweiger',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
[r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
|
playlist = self._download_json(
|
||||||
|
'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
|
||||||
|
% video_id, video_id)[0]
|
||||||
|
|
||||||
|
quality = qualities(['normal', 'hd720'])
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in itertools.count(0):
|
||||||
|
fmt = playlist.get(compat_str(format_id))
|
||||||
|
if not fmt:
|
||||||
|
break
|
||||||
|
formats.append({
|
||||||
|
'url': fmt['src'],
|
||||||
|
'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
|
||||||
|
'quality': quality(fmt['quality']),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'title', webpage, 'title', fatal=True)
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'description')
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
|
r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
|
||||||
|
uploader = self._search_regex(
|
||||||
|
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': uploader,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
71
youtube_dl/extractor/hellporno.py
Normal file
71
youtube_dl/extractor/hellporno.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HellPornoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
|
||||||
|
'md5': '1fee339c610d2049699ef2aa699439f1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '149116',
|
||||||
|
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dixie is posing with naked ass very erotic',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = remove_end(self._html_search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
|
||||||
|
|
||||||
|
flashvars = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
|
||||||
|
display_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
video_id = flashvars.get('video_id')
|
||||||
|
thumbnail = flashvars.get('preview_url')
|
||||||
|
ext = flashvars.get('postfix', '.mp4')[1:]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video_url_key in ['video_url', 'video_alt_url']:
|
||||||
|
video_url = flashvars.get(video_url_key)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
video_text = flashvars.get('%s_text' % video_url_key)
|
||||||
|
fmt = {
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': video_text,
|
||||||
|
}
|
||||||
|
m = re.search(r'^(?P<height>\d+)[pP]', video_text)
|
||||||
|
if m:
|
||||||
|
fmt['height'] = int(m.group('height'))
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', default='').split(',')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'age_limit': 18,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
166
youtube_dl/extractor/hitbox.py
Normal file
166
youtube_dl/extractor/hitbox.py
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
parse_iso8601,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HitboxIE(InfoExtractor):
|
||||||
|
IE_NAME = 'hitbox'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.hitbox.tv/video/203213',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '203213',
|
||||||
|
'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
|
||||||
|
'alt_title': 'hitboxlive - Aug 9th #6',
|
||||||
|
'description': '',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 215.1666,
|
||||||
|
'resolution': 'HD 720p',
|
||||||
|
'uploader': 'hitboxlive',
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1407576133,
|
||||||
|
'upload_date': '20140809',
|
||||||
|
'categories': ['Live Show'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_metadata(self, url, video_id):
|
||||||
|
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||||
|
metadata = self._download_json(
|
||||||
|
'%s/%s' % (url, video_id), video_id)
|
||||||
|
|
||||||
|
date = 'media_live_since'
|
||||||
|
media_type = 'livestream'
|
||||||
|
if metadata.get('media_type') == 'video':
|
||||||
|
media_type = 'video'
|
||||||
|
date = 'media_date_added'
|
||||||
|
|
||||||
|
video_meta = metadata.get(media_type, [])[0]
|
||||||
|
title = video_meta.get('media_status')
|
||||||
|
alt_title = video_meta.get('media_title')
|
||||||
|
description = clean_html(
|
||||||
|
video_meta.get('media_description') or
|
||||||
|
video_meta.get('media_description_md'))
|
||||||
|
duration = float_or_none(video_meta.get('media_duration'))
|
||||||
|
uploader = video_meta.get('media_user_name')
|
||||||
|
views = int_or_none(video_meta.get('media_views'))
|
||||||
|
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
||||||
|
categories = [video_meta.get('category_name')]
|
||||||
|
thumbs = [
|
||||||
|
{'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||||
|
'width': 320,
|
||||||
|
'height': 180},
|
||||||
|
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||||
|
'width': 768,
|
||||||
|
'height': 432},
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
|
'description': description,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnails': thumbs,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'view_count': views,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'categories': categories,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._extract_metadata(
|
||||||
|
'https://www.hitbox.tv/api/media/video',
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
player_config = self._download_json(
|
||||||
|
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
clip = player_config.get('clip')
|
||||||
|
video_url = clip.get('url')
|
||||||
|
res = clip.get('bitrates', [])[0].get('label')
|
||||||
|
|
||||||
|
metadata['resolution'] = res
|
||||||
|
metadata['url'] = video_url
|
||||||
|
metadata['protocol'] = 'm3u8'
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
class HitboxLiveIE(HitboxIE):
|
||||||
|
IE_NAME = 'hitbox:live'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.hitbox.tv/dimak',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dimak',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': compat_str,
|
||||||
|
'title': compat_str,
|
||||||
|
'uploader': 'Dimak',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# live
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._extract_metadata(
|
||||||
|
'https://www.hitbox.tv/api/media/live',
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
player_config = self._download_json(
|
||||||
|
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
cdns = player_config.get('cdns')
|
||||||
|
servers = []
|
||||||
|
for cdn in cdns:
|
||||||
|
base_url = cdn.get('netConnectionUrl')
|
||||||
|
host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
|
||||||
|
if base_url not in servers:
|
||||||
|
servers.append(base_url)
|
||||||
|
for stream in cdn.get('bitrates'):
|
||||||
|
label = stream.get('label')
|
||||||
|
if label != 'Auto':
|
||||||
|
formats.append({
|
||||||
|
'url': '%s/%s' % (base_url, stream.get('url')),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'vbr': stream.get('bitrate'),
|
||||||
|
'resolution': label,
|
||||||
|
'rtmp_live': True,
|
||||||
|
'format_note': host,
|
||||||
|
'page_url': url,
|
||||||
|
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
metadata['formats'] = formats
|
||||||
|
metadata['is_live'] = True
|
||||||
|
metadata['title'] = self._live_title(metadata.get('title'))
|
||||||
|
return metadata
|
@@ -39,8 +39,9 @@ class HuffPostIE(InfoExtractor):
|
|||||||
data = self._download_json(api_url, video_id)['data']
|
data = self._download_json(api_url, video_id)['data']
|
||||||
|
|
||||||
video_title = data['title']
|
video_title = data['title']
|
||||||
duration = parse_duration(data['running_time'])
|
duration = parse_duration(data.get('running_time'))
|
||||||
upload_date = unified_strdate(data['schedule']['starts_at'])
|
upload_date = unified_strdate(
|
||||||
|
data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
|
||||||
description = data.get('description')
|
description = data.get('description')
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
@@ -59,16 +60,11 @@ class HuffPostIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'url': url,
|
'url': url,
|
||||||
'vcodec': 'none' if key.startswith('audio/') else None,
|
'vcodec': 'none' if key.startswith('audio/') else None,
|
||||||
} for key, url in data['sources']['live'].items()]
|
} for key, url in data.get('sources', {}).get('live', {}).items()]
|
||||||
if data.get('fivemin_id'):
|
|
||||||
fid = data['fivemin_id']
|
if not formats and data.get('fivemin_id'):
|
||||||
fcat = str(int(fid) // 100 + 1)
|
return self.url_result('5min:%s' % data['fivemin_id'])
|
||||||
furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
|
|
||||||
formats.append({
|
|
||||||
'format': 'fivemin',
|
|
||||||
'url': furl,
|
|
||||||
'preference': 1,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -16,7 +16,6 @@ class ImdbIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
'md5': '9f34fa777ade3a6e57a054fdbcb3a068',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2524815897',
|
'id': '2524815897',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@@ -22,7 +22,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'SKYFALL',
|
'title': 'SKYFALL',
|
||||||
'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
||||||
'duration': 149,
|
'duration': 152,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor):
|
|||||||
'description': 'The perfect cipher',
|
'description': 'The perfect cipher',
|
||||||
'duration': 176,
|
'duration': 176,
|
||||||
'uploader': 'Brit Cruise',
|
'uploader': 'Brit Cruise',
|
||||||
|
'uploader_id': 'khanacademy',
|
||||||
'upload_date': '20120411',
|
'upload_date': '20120411',
|
||||||
}
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
|
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@@ -10,13 +10,14 @@ from ..utils import int_or_none
|
|||||||
class KontrTubeIE(InfoExtractor):
|
class KontrTubeIE(InfoExtractor):
|
||||||
IE_NAME = 'kontrtube'
|
IE_NAME = 'kontrtube'
|
||||||
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
IE_DESC = 'KontrTube.ru - Труба зовёт'
|
||||||
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
|
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
|
||||||
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
'md5': '975a991a4926c9a85f383a736a2e6b80',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2678',
|
'id': '2678',
|
||||||
|
'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
|
||||||
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
|
||||||
@@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
webpage = self._download_webpage(
|
||||||
|
url, display_id, 'Downloading page')
|
||||||
|
|
||||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
video_url = self._html_search_regex(
|
||||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>(.+?)</title>', webpage, 'video title')
|
r'<title>(.+?)</title>', webpage, 'video title')
|
||||||
description = self._html_search_meta('description', webpage, 'video description')
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'video description')
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
|
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||||
|
webpage)
|
||||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
view_count = self._html_search_regex(
|
view_count = self._html_search_regex(
|
||||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
|
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
|
||||||
|
webpage, 'view count', fatal=False)
|
||||||
|
|
||||||
comment_count = None
|
comment_count = None
|
||||||
comment_str = self._html_search_regex(
|
comment_str = self._html_search_regex(
|
||||||
@@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -28,7 +27,6 @@ class LRTIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # HLS download
|
'skip_download': True, # HLS download
|
||||||
},
|
},
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -44,7 +42,9 @@ class LRTIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
|
for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
|
||||||
data = json.loads(js_to_json(js))
|
data = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||||
|
if 'provider' not in data:
|
||||||
|
continue
|
||||||
if data['provider'] == 'rtmp':
|
if data['provider'] == 'rtmp':
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'rtmp',
|
'format_id': 'rtmp',
|
||||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class TechTVMITIE(InfoExtractor):
|
class TechTVMITIE(InfoExtractor):
|
||||||
IE_NAME = 'techtv.mit.edu'
|
IE_NAME = 'techtv.mit.edu'
|
||||||
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
|
_VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
||||||
@@ -31,8 +31,7 @@ class TechTVMITIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
raw_page = self._download_webpage(
|
raw_page = self._download_webpage(
|
||||||
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
||||||
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
||||||
@@ -106,7 +105,10 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
|
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
|
||||||
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
|
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
|
||||||
#'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
'upload_date': '20121109',
|
||||||
|
'uploader_id': 'MIT',
|
||||||
|
'uploader': 'MIT OpenCourseWare',
|
||||||
|
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -115,8 +117,11 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'id': '7K1sB05pE0A',
|
'id': '7K1sB05pE0A',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Session 1: Introduction to Derivatives',
|
'title': 'Session 1: Introduction to Derivatives',
|
||||||
|
'upload_date': '20090818',
|
||||||
|
'uploader_id': 'MIT',
|
||||||
|
'uploader': 'MIT OpenCourseWare',
|
||||||
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
||||||
#'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@@ -52,7 +52,8 @@ class MoeVideoIE(InfoExtractor):
|
|||||||
'height': 296,
|
'height': 296,
|
||||||
'duration': 6027,
|
'duration': 6027,
|
||||||
'filesize': 588257923,
|
'filesize': 588257923,
|
||||||
}
|
},
|
||||||
|
'skip': 'Video has been removed',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -1,63 +1,49 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_urlparse,
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MotorsportIE(InfoExtractor):
|
class MotorsportIE(InfoExtractor):
|
||||||
IE_DESC = 'motorsport.com'
|
IE_DESC = 'motorsport.com'
|
||||||
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
|
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
|
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
|
||||||
'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7063',
|
'id': '2-T3WuR-KMM',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Red Bull Racing: 2014 Rules Explained',
|
'title': 'Red Bull Racing: 2014 Rules Explained',
|
||||||
'duration': 207,
|
'duration': 208,
|
||||||
'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
|
'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
|
||||||
'uploader': 'rainiere',
|
'uploader': 'mcomstaff',
|
||||||
'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
|
'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ',
|
||||||
}
|
'upload_date': '20140903',
|
||||||
|
'thumbnail': r're:^https?://.+\.jpg$'
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
flashvars_code = self._html_search_regex(
|
iframe_path = self._html_search_regex(
|
||||||
r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
|
r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
|
||||||
flashvars = compat_parse_qs(flashvars_code)
|
'iframe path')
|
||||||
params = json.loads(flashvars['parameters'][0])
|
iframe = self._download_webpage(
|
||||||
|
compat_urlparse.urljoin(url, iframe_path), display_id,
|
||||||
e = compat_str(int(time.time()) + 24 * 60 * 60)
|
'Downloading iframe')
|
||||||
base_video_url = params['location'] + '?e=' + e
|
youtube_id = self._search_regex(
|
||||||
s = 'h3hg713fh32'
|
r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')
|
||||||
h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
|
|
||||||
video_url = base_video_url + '&h=' + h
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
|
|
||||||
'uploader', fatal=False)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': params['video_id'],
|
'_type': 'url_transparent',
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': params['title'],
|
'url': 'https://youtube.com/watch?v=%s' % youtube_id,
|
||||||
'url': video_url,
|
|
||||||
'description': params.get('description'),
|
|
||||||
'thumbnail': params.get('main_thumb'),
|
|
||||||
'duration': int_or_none(params.get('duration')),
|
|
||||||
'uploader': uploader,
|
|
||||||
}
|
}
|
||||||
|
35
youtube_dl/extractor/nerdcubed.py
Normal file
35
youtube_dl/extractor/nerdcubed.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class NerdCubedFeedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nerdcubed.co.uk/feed.json',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'nerdcubed.co.uk feed',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1300,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
feed = self._download_json(url, url, "Downloading NerdCubed JSON feed")
|
||||||
|
|
||||||
|
entries = [{
|
||||||
|
'_type': 'url',
|
||||||
|
'title': feed_entry['title'],
|
||||||
|
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
|
||||||
|
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
|
||||||
|
'url': "http://www.youtube.com/watch?v=" + feed_entry['youtube_id'],
|
||||||
|
} for feed_entry in feed]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': 'nerdcubed.co.uk feed',
|
||||||
|
'id': 'nerdcubed-feed',
|
||||||
|
'entries': entries,
|
||||||
|
}
|
86
youtube_dl/extractor/netzkino.py
Normal file
86
youtube_dl/extractor/netzkino.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NetzkinoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
|
||||||
|
'md5': '92a3f8b76f8d7220acce5377ea5d4873',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rakete-zum-mond',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
|
||||||
|
'comments': 'mincount:3',
|
||||||
|
'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
|
||||||
|
'upload_date': '20120813',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'timestamp': 1344858571,
|
||||||
|
'age_limit': 12,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
category_id = mobj.group('category')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
|
||||||
|
api_info = self._download_json(api_url, video_id)
|
||||||
|
info = next(
|
||||||
|
p for p in api_info['posts'] if p['slug'] == video_id)
|
||||||
|
custom_fields = info['custom_fields']
|
||||||
|
|
||||||
|
production_js = self._download_webpage(
|
||||||
|
'http://www.netzkino.de/beta/dist/production.min.js', video_id,
|
||||||
|
note='Downloading player code')
|
||||||
|
avo_js = self._search_regex(
|
||||||
|
r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
|
||||||
|
production_js, 'URL templates')
|
||||||
|
templates = self._parse_json(
|
||||||
|
avo_js, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
suffix = {
|
||||||
|
'hds': '.mp4/manifest.f4m',
|
||||||
|
'hls': '.mp4/master.m3u8',
|
||||||
|
'pmd': '.mp4',
|
||||||
|
}
|
||||||
|
film_fn = custom_fields['Streaming'][0]
|
||||||
|
formats = [{
|
||||||
|
'format_id': key,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'url': tpl.replace('{}', film_fn) + suffix[key],
|
||||||
|
} for key, tpl in templates.items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
comments = [{
|
||||||
|
'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
|
||||||
|
'id': c['id'],
|
||||||
|
'author': c['name'],
|
||||||
|
'html': c['content'],
|
||||||
|
'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
|
||||||
|
} for c in info.get('comments', [])]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'comments': comments,
|
||||||
|
'title': info['title'],
|
||||||
|
'age_limit': int_or_none(custom_fields.get('FSK')[0]),
|
||||||
|
'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
|
||||||
|
'description': clean_html(info.get('content')),
|
||||||
|
'thumbnail': info.get('thumbnail'),
|
||||||
|
'playlist_title': api_info.get('title'),
|
||||||
|
'playlist_id': category_id,
|
||||||
|
}
|
@@ -54,7 +54,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
class NHLIE(NHLBaseInfoExtractor):
|
class NHLIE(NHLBaseInfoExtractor):
|
||||||
IE_NAME = 'nhl.com'
|
IE_NAME = 'nhl.com'
|
||||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)'
|
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||||
|
@@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor):
|
|||||||
'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
|
'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
|
||||||
'uploader': 'JonTron',
|
'uploader': 'JonTron',
|
||||||
'upload_date': '20140125',
|
'upload_date': '20140125',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
qualities,
|
qualities,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
fix_xml_ampersands,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -51,7 +52,21 @@ class NPOIE(InfoExtractor):
|
|||||||
'upload_date': '20130225',
|
'upload_date': '20130225',
|
||||||
'duration': 3000,
|
'duration': 3000,
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'WO_VPRO_043706',
|
||||||
|
'ext': 'wmv',
|
||||||
|
'title': 'De nieuwe mens - Deel 1',
|
||||||
|
'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
|
||||||
|
'duration': 4680,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# mplayer mms download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -74,31 +89,58 @@ class NPOIE(InfoExtractor):
|
|||||||
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
|
||||||
for format_id in metadata['pubopties']:
|
pubopties = metadata.get('pubopties')
|
||||||
format_info = self._download_json(
|
if pubopties:
|
||||||
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
|
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
||||||
video_id, 'Downloading %s JSON' % format_id)
|
for format_id in pubopties:
|
||||||
if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
|
format_info = self._download_json(
|
||||||
continue
|
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s'
|
||||||
streams = format_info.get('streams')
|
% (video_id, format_id, token),
|
||||||
if streams:
|
video_id, 'Downloading %s JSON' % format_id)
|
||||||
video_info = self._download_json(
|
if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
|
||||||
streams[0] + '&type=json',
|
continue
|
||||||
video_id, 'Downloading %s stream JSON' % format_id)
|
streams = format_info.get('streams')
|
||||||
else:
|
if streams:
|
||||||
video_info = format_info
|
video_info = self._download_json(
|
||||||
video_url = video_info.get('url')
|
streams[0] + '&type=json',
|
||||||
if not video_url:
|
video_id, 'Downloading %s stream JSON' % format_id)
|
||||||
continue
|
else:
|
||||||
if format_id == 'adaptive':
|
video_info = format_info
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
video_url = video_info.get('url')
|
||||||
else:
|
if not video_url:
|
||||||
|
continue
|
||||||
|
if format_id == 'adaptive':
|
||||||
|
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': quality(format_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
streams = metadata.get('streams')
|
||||||
|
if streams:
|
||||||
|
for i, stream in enumerate(streams):
|
||||||
|
stream_url = stream.get('url')
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
asx = self._download_xml(
|
||||||
|
stream_url, video_id,
|
||||||
|
'Downloading stream %d ASX playlist' % i,
|
||||||
|
transform_source=fix_xml_ampersands)
|
||||||
|
ref = asx.find('./ENTRY/Ref')
|
||||||
|
if ref is None:
|
||||||
|
continue
|
||||||
|
video_url = ref.get('href')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'ext': stream.get('formaat', 'asf'),
|
||||||
'quality': quality(format_id),
|
'quality': stream.get('kwaliteit'),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -72,12 +72,12 @@ class NRKIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NRKTVIE(InfoExtractor):
|
class NRKTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
|
_VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||||
'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
|
'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MUHH48000314',
|
'id': 'MUHH48000314',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
@@ -85,11 +85,11 @@ class NRKTVIE(InfoExtractor):
|
|||||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||||
'upload_date': '20140523',
|
'upload_date': '20140523',
|
||||||
'duration': 1741.52,
|
'duration': 1741.52,
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
||||||
'md5': 'af01795a31f1cf7265c8657534d8077b',
|
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdfp15000514',
|
'id': 'mdfp15000514',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
@@ -97,39 +97,119 @@ class NRKTVIE(InfoExtractor):
|
|||||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||||
'upload_date': '20140524',
|
'upload_date': '20140524',
|
||||||
'duration': 4605.0,
|
'duration': 4605.0,
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# single playlist video
|
||||||
|
'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||||
|
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515-part2',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||||
|
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||||
|
'upload_date': '20150106',
|
||||||
|
},
|
||||||
|
'skip': 'Only works from Norway',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'md5': '9480285eff92d64f06e02a5367970a7a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515-part1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
|
||||||
|
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||||
|
'upload_date': '20150106',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515-part2',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||||
|
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||||
|
'upload_date': '20150106',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MSPO40010515',
|
||||||
|
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
|
||||||
|
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||||
|
'upload_date': '20150106',
|
||||||
|
'duration': 6947.5199999999995,
|
||||||
|
},
|
||||||
|
'skip': 'Only works from Norway',
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def _extract_f4m(self, manifest_url, video_id):
|
||||||
|
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
part_id = mobj.group('part_id')
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_meta('title', page, 'title')
|
title = self._html_search_meta(
|
||||||
description = self._html_search_meta('description', page, 'description')
|
'title', webpage, 'title')
|
||||||
thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
|
description = self._html_search_meta(
|
||||||
upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
|
'description', webpage, 'description')
|
||||||
duration = float_or_none(
|
|
||||||
self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
|
thumbnail = self._html_search_regex(
|
||||||
|
r'data-posterimage="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
upload_date = unified_strdate(self._html_search_meta(
|
||||||
|
'rightsfrom', webpage, 'upload date', fatal=False))
|
||||||
|
duration = float_or_none(self._html_search_regex(
|
||||||
|
r'data-duration="([^"]+)"',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
# playlist
|
||||||
|
parts = re.findall(
|
||||||
|
r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
|
||||||
|
if parts:
|
||||||
|
entries = []
|
||||||
|
for current_part_id, stream_url, part_title in parts:
|
||||||
|
if part_id and current_part_id != part_id:
|
||||||
|
continue
|
||||||
|
video_part_id = '%s-part%s' % (video_id, current_part_id)
|
||||||
|
formats = self._extract_f4m(stream_url, video_part_id)
|
||||||
|
entries.append({
|
||||||
|
'id': video_part_id,
|
||||||
|
'title': part_title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
if part_id:
|
||||||
|
if entries:
|
||||||
|
return entries[0]
|
||||||
|
else:
|
||||||
|
playlist = self.playlist_result(entries, video_id, title, description)
|
||||||
|
playlist.update({
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
})
|
||||||
|
return playlist
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
f4m_url = re.search(r'data-media="([^"]+)"', page)
|
f4m_url = re.search(r'data-media="([^"]+)"', webpage)
|
||||||
if f4m_url:
|
if f4m_url:
|
||||||
formats.append({
|
formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
|
||||||
'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
|
||||||
'format_id': 'f4m',
|
|
||||||
'ext': 'flv',
|
|
||||||
})
|
|
||||||
|
|
||||||
m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
|
m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
|
||||||
if m3u8_url:
|
if m3u8_url:
|
||||||
formats.append({
|
formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
|
||||||
'url': m3u8_url.group(1),
|
|
||||||
'format_id': 'm3u8',
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
70
youtube_dl/extractor/openfilm.py
Normal file
70
youtube_dl/extractor/openfilm.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
compat_urllib_parse,
|
||||||
|
parse_age_limit,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class OpenFilmIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P<id>.+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.openfilm.com/videos/human-resources-remastered',
|
||||||
|
'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '32736',
|
||||||
|
'display_id': 'human-resources-remastered',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Human Resources (Remastered)',
|
||||||
|
'description': 'Social Engineering in the 20th Century.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 7164,
|
||||||
|
'timestamp': 1334756988,
|
||||||
|
'upload_date': '20120418',
|
||||||
|
'uploader_id': '41117',
|
||||||
|
'view_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
player = compat_urllib_parse.unquote_plus(
|
||||||
|
self._og_search_video_url(webpage))
|
||||||
|
|
||||||
|
video = json.loads(self._search_regex(
|
||||||
|
r'\bp=({.+?})(?:&|$)', player, 'video JSON'))
|
||||||
|
|
||||||
|
video_url = '%s1.mp4' % video['location']
|
||||||
|
video_id = video.get('video_id')
|
||||||
|
display_id = video.get('alias') or display_id
|
||||||
|
title = video.get('title')
|
||||||
|
description = video.get('description')
|
||||||
|
thumbnail = video.get('main_thumb')
|
||||||
|
duration = int_or_none(video.get('duration'))
|
||||||
|
timestamp = parse_iso8601(video.get('dt_published'), ' ')
|
||||||
|
uploader_id = video.get('user_id')
|
||||||
|
view_count = int_or_none(video.get('views_count'))
|
||||||
|
age_limit = parse_age_limit(video.get('age_limit'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'view_count': view_count,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
US_RATINGS,
|
US_RATINGS,
|
||||||
)
|
)
|
||||||
@@ -151,6 +152,19 @@ class PBSIE(InfoExtractor):
|
|||||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||||
info = self._download_json(info_url, display_id)
|
info = self._download_json(info_url, display_id)
|
||||||
|
|
||||||
|
redirect_url = info['alternate_encoding']['url']
|
||||||
|
redirect_info = self._download_json(
|
||||||
|
redirect_url + '?format=json', display_id,
|
||||||
|
'Downloading video url info')
|
||||||
|
if redirect_info['status'] == 'error':
|
||||||
|
if redirect_info['http_code'] == 403:
|
||||||
|
message = (
|
||||||
|
'The video is not available in your region due to '
|
||||||
|
'right restrictions')
|
||||||
|
else:
|
||||||
|
message = redirect_info['message']
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
|
||||||
rating_str = info.get('rating')
|
rating_str = info.get('rating')
|
||||||
if rating_str is not None:
|
if rating_str is not None:
|
||||||
rating_str = rating_str.rpartition('-')[2]
|
rating_str = rating_str.rpartition('-')[2]
|
||||||
@@ -160,7 +174,7 @@ class PBSIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'url': info['alternate_encoding']['url'],
|
'url': redirect_info['url'],
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': info['program'].get('description'),
|
'description': info['program'].get('description'),
|
||||||
'thumbnail': info.get('image_url'),
|
'thumbnail': info.get('image_url'),
|
||||||
|
@@ -26,6 +26,7 @@ class PlayedIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'youtube-dl_test_video.mp4',
|
'title': 'youtube-dl_test_video.mp4',
|
||||||
},
|
},
|
||||||
|
'skip': 'Removed for copyright infringement.', # oh wow
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
63
youtube_dl/extractor/radiobremen.py
Normal file
63
youtube_dl/extractor/radiobremen.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_duration
|
||||||
|
|
||||||
|
|
||||||
|
class RadioBremenIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)'
|
||||||
|
IE_NAME = 'radiobremen'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '114720',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 1685,
|
||||||
|
'width': 512,
|
||||||
|
'title': 'buten un binnen vom 22. Dezember',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id
|
||||||
|
meta_doc = self._download_webpage(
|
||||||
|
meta_url, video_id, 'Downloading metadata')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title")
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False)
|
||||||
|
duration = parse_duration(self._html_search_regex(
|
||||||
|
r"Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>",
|
||||||
|
meta_doc, "duration", fatal=False))
|
||||||
|
|
||||||
|
page_doc = self._download_webpage(
|
||||||
|
url, video_id, 'Downloading video information')
|
||||||
|
mobj = re.search(
|
||||||
|
r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)",
|
||||||
|
page_doc)
|
||||||
|
video_url = (
|
||||||
|
"http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" %
|
||||||
|
(video_id, video_id, mobj.group("secret"), mobj.group('width')))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'width': int(mobj.group("width")),
|
||||||
|
}]
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': mobj.group('thumbnail'),
|
||||||
|
}
|
@@ -8,7 +8,7 @@ from ..utils import parse_duration
|
|||||||
|
|
||||||
class RtlXlIE(InfoExtractor):
|
class RtlXlIE(InfoExtractor):
|
||||||
IE_NAME = 'rtlxl.nl'
|
IE_NAME = 'rtlxl.nl'
|
||||||
_VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
_VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||||
|
@@ -12,7 +12,7 @@ class RTPIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '174042',
|
'id': 'e174042',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Paixões Cruzadas',
|
'title': 'Paixões Cruzadas',
|
||||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||||
|
@@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RutubeEmbedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'rutube:embed'
|
||||||
|
IE_DESC = 'Rutube embedded videos'
|
||||||
|
_VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20131223',
|
||||||
|
'uploader_id': '297833',
|
||||||
|
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
||||||
|
'uploader': 'subziro89 ILya',
|
||||||
|
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Requires ffmpeg',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
embed_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, embed_id)
|
||||||
|
|
||||||
|
canonical_url = self._html_search_regex(
|
||||||
|
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
||||||
|
'Canonical URL')
|
||||||
|
return self.url_result(canonical_url, 'Rutube')
|
||||||
|
|
||||||
|
|
||||||
class RutubeChannelIE(InfoExtractor):
|
class RutubeChannelIE(InfoExtractor):
|
||||||
IE_NAME = 'rutube:channel'
|
IE_NAME = 'rutube:channel'
|
||||||
IE_DESC = 'Rutube channels'
|
IE_DESC = 'Rutube channels'
|
||||||
|
49
youtube_dl/extractor/screencastomatic.py
Normal file
49
youtube_dl/extractor/screencastomatic.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ScreencastOMaticIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
|
||||||
|
'md5': '483583cb80d92588f15ccbedd90f0c18',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c2lD3BeOPl',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Welcome to 3-4 Philosophy @ DECV!',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
setup_js = self._search_regex(
|
||||||
|
r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
|
||||||
|
webpage, 'setup code')
|
||||||
|
data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
|
||||||
|
try:
|
||||||
|
video_data = next(
|
||||||
|
m for m in data['modes'] if m.get('type') == 'html5')
|
||||||
|
except StopIteration:
|
||||||
|
raise ExtractorError('Could not find any video entries!')
|
||||||
|
video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
|
||||||
|
thumbnail = data.get('image')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@@ -24,7 +24,7 @@ class SexyKarmaIE(InfoExtractor):
|
|||||||
'title': 'Taking a quick pee.',
|
'title': 'Taking a quick pee.',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'uploader': 'wildginger7',
|
'uploader': 'wildginger7',
|
||||||
'upload_date': '20141007',
|
'upload_date': '20141008',
|
||||||
'duration': 22,
|
'duration': 22,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
@@ -45,6 +45,7 @@ class SexyKarmaIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
|
'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
|
||||||
@@ -61,6 +62,7 @@ class SexyKarmaIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@@ -114,4 +116,5 @@ class SexyKarmaIE(InfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
|
|||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
slideshare_obj = self._search_regex(
|
slideshare_obj = self._search_regex(
|
||||||
r'var slideshare_object = ({.*?}); var user_info =',
|
r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
|
||||||
webpage, 'slideshare object')
|
webpage, 'slideshare object')
|
||||||
info = json.loads(slideshare_obj)
|
info = json.loads(slideshare_obj)
|
||||||
if info['slideshow']['type'] != 'video':
|
if info['slideshow']['type'] != 'video':
|
||||||
@@ -41,7 +41,7 @@ class SlideshareIE(InfoExtractor):
|
|||||||
ext = info['jsplayer']['video_extension']
|
ext = info['jsplayer']['video_extension']
|
||||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
|
r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
|
||||||
'description', fatal=False)
|
'description', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -69,6 +69,7 @@ class SmotriIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'videopassword': 'qwerty',
|
'videopassword': 'qwerty',
|
||||||
},
|
},
|
||||||
|
'skip': 'Video is not approved by moderator',
|
||||||
},
|
},
|
||||||
# age limit + video-password
|
# age limit + video-password
|
||||||
{
|
{
|
||||||
@@ -86,7 +87,8 @@ class SmotriIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'videopassword': '333'
|
'videopassword': '333'
|
||||||
}
|
},
|
||||||
|
'skip': 'Video is not approved by moderator',
|
||||||
},
|
},
|
||||||
# swf player
|
# swf player
|
||||||
{
|
{
|
||||||
|
@@ -1,11 +1,10 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from .common import compat_str
|
||||||
|
|
||||||
|
|
||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
@@ -29,60 +28,73 @@ class SohuIE(InfoExtractor):
|
|||||||
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
|
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
|
||||||
else:
|
else:
|
||||||
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||||
data_url = base_data_url + str(vid_id)
|
|
||||||
data_json = self._download_webpage(
|
return self._download_json(
|
||||||
data_url, video_id,
|
base_data_url + vid_id, video_id,
|
||||||
note='Downloading JSON data for ' + str(vid_id))
|
'Downloading JSON data for %s' % vid_id)
|
||||||
return json.loads(data_json)
|
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
mytv = mobj.group('mytv') is not None
|
mytv = mobj.group('mytv') is not None
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
|
raw_title = self._html_search_regex(
|
||||||
webpage, 'video title')
|
r'(?s)<title>(.+?)</title>',
|
||||||
|
webpage, 'video title')
|
||||||
title = raw_title.partition('-')[0].strip()
|
title = raw_title.partition('-')[0].strip()
|
||||||
|
|
||||||
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
|
vid = self._html_search_regex(
|
||||||
'video path')
|
r'var vid ?= ?["\'](\d+)["\']',
|
||||||
data = _fetch_data(vid, mytv)
|
webpage, 'video path')
|
||||||
|
vid_data = _fetch_data(vid, mytv)
|
||||||
|
|
||||||
QUALITIES = ('ori', 'super', 'high', 'nor')
|
formats_json = {}
|
||||||
vid_ids = [data['data'][q + 'Vid']
|
for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
|
||||||
for q in QUALITIES
|
vid_id = vid_data['data'].get('%sVid' % format_id)
|
||||||
if data['data'][q + 'Vid'] != 0]
|
if not vid_id:
|
||||||
if not vid_ids:
|
continue
|
||||||
raise ExtractorError('No formats available for this video')
|
vid_id = compat_str(vid_id)
|
||||||
|
formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
|
||||||
|
|
||||||
# For now, we just pick the highest available quality
|
part_count = vid_data['data']['totalBlocks']
|
||||||
vid_id = vid_ids[-1]
|
|
||||||
|
|
||||||
format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
|
|
||||||
part_count = format_data['data']['totalBlocks']
|
|
||||||
allot = format_data['allot']
|
|
||||||
prot = format_data['prot']
|
|
||||||
clipsURL = format_data['data']['clipsURL']
|
|
||||||
su = format_data['data']['su']
|
|
||||||
|
|
||||||
playlist = []
|
playlist = []
|
||||||
for i in range(part_count):
|
for i in range(part_count):
|
||||||
part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
|
formats = []
|
||||||
(allot, prot, clipsURL[i], su[i]))
|
for format_id, format_data in formats_json.items():
|
||||||
part_str = self._download_webpage(
|
allot = format_data['allot']
|
||||||
part_url, video_id,
|
prot = format_data['prot']
|
||||||
note='Downloading part %d of %d' % (i + 1, part_count))
|
|
||||||
|
|
||||||
part_info = part_str.split('|')
|
data = format_data['data']
|
||||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
clips_url = data['clipsURL']
|
||||||
|
su = data['su']
|
||||||
|
|
||||||
video_info = {
|
part_str = self._download_webpage(
|
||||||
'id': '%s_part%02d' % (video_id, i + 1),
|
'http://%s/?prot=%s&file=%s&new=%s' %
|
||||||
|
(allot, prot, clips_url[i], su[i]),
|
||||||
|
video_id,
|
||||||
|
'Downloading %s video URL part %d of %d'
|
||||||
|
% (format_id, i + 1, part_count))
|
||||||
|
|
||||||
|
part_info = part_str.split('|')
|
||||||
|
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'filesize': data['clipsBytes'][i],
|
||||||
|
'width': data['width'],
|
||||||
|
'height': data['height'],
|
||||||
|
'fps': data['fps'],
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
playlist.append({
|
||||||
|
'id': '%s_part%d' % (video_id, i + 1),
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'duration': vid_data['data']['clipsDuration'][i],
|
||||||
'ext': 'mp4',
|
'formats': formats,
|
||||||
}
|
})
|
||||||
playlist.append(video_info)
|
|
||||||
|
|
||||||
if len(playlist) == 1:
|
if len(playlist) == 1:
|
||||||
info = playlist[0]
|
info = playlist[0]
|
||||||
|
80
youtube_dl/extractor/soulanime.py
Normal file
80
youtube_dl/extractor/soulanime.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
HEADRequest,
|
||||||
|
urlhandle_detect_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SoulAnimeWatchingIE(InfoExtractor):
|
||||||
|
IE_NAME = "soulanime:watching"
|
||||||
|
IE_DESC = "SoulAnime video"
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
|
||||||
|
'md5': '05fae04abf72298098b528e98abf4298',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'seirei-tsukai-no-blade-dance-episode-9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'seirei-tsukai-no-blade-dance-episode-9',
|
||||||
|
'description': 'seirei-tsukai-no-blade-dance-episode-9'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
domain = mobj.group('domain')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url_encoded = self._html_search_regex(
|
||||||
|
r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
|
||||||
|
video_url = "http://www.soul-anime." + domain + video_url_encoded
|
||||||
|
|
||||||
|
ext_req = HEADRequest(video_url)
|
||||||
|
ext_handle = self._request_webpage(
|
||||||
|
ext_req, video_id, note='Determining extension')
|
||||||
|
ext = urlhandle_detect_ext(ext_handle)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'title': video_id,
|
||||||
|
'description': video_id
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SoulAnimeSeriesIE(InfoExtractor):
|
||||||
|
IE_NAME = "soulanime:series"
|
||||||
|
IE_DESC = "SoulAnime Series"
|
||||||
|
|
||||||
|
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'black-rock-shooter-tv'
|
||||||
|
},
|
||||||
|
'playlist_count': 8
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
series_id = mobj.group('id')
|
||||||
|
domain = mobj.group('domain')
|
||||||
|
|
||||||
|
pattern = re.compile(self._EPISODE_REGEX)
|
||||||
|
|
||||||
|
page = self._download_webpage(url, series_id, "Downloading series page")
|
||||||
|
mobj = pattern.findall(page)
|
||||||
|
|
||||||
|
entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, series_id)
|
@@ -60,9 +60,10 @@ class SportDeutschlandIE(InfoExtractor):
|
|||||||
|
|
||||||
categories = list(data.get('section', {}).get('tags', {}).values())
|
categories = list(data.get('section', {}).get('tags', {}).values())
|
||||||
asset = data['asset']
|
asset = data['asset']
|
||||||
|
assets_info = self._download_json(asset['url'], video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
smil_url = asset['video']
|
smil_url = assets_info['video']
|
||||||
if '.smil' in smil_url:
|
if '.smil' in smil_url:
|
||||||
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
||||||
formats.extend(
|
formats.extend(
|
||||||
|
@@ -28,23 +28,27 @@ class SunPornoIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
title = self._html_search_regex(
|
||||||
description = self._html_search_meta('description', webpage, 'description')
|
r'<title>([^<]+)</title>', webpage, 'title')
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'description')
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._search_regex(
|
||||||
r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False))
|
r'itemprop="duration">\s*(\d+:\d+)\s*<',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False))
|
r'class="views">\s*(\d+)\s*<',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = int_or_none(self._html_search_regex(
|
comment_count = int_or_none(self._html_search_regex(
|
||||||
r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
|
r'(\d+)</b> Comments?',
|
||||||
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
quality = qualities(['mp4', 'flv'])
|
quality = qualities(['mp4', 'flv'])
|
||||||
|
@@ -57,9 +57,7 @@ class TeacherTubeIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_meta('title', webpage, 'title', fatal=True)
|
title = self._html_search_meta('title', webpage, 'title', fatal=True)
|
||||||
|
@@ -13,7 +13,7 @@ from ..compat import (
|
|||||||
class TEDIE(SubtitlesInfoExtractor):
|
class TEDIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?P<proto>https?://)
|
(?P<proto>https?://)
|
||||||
(?P<type>www|embed)(?P<urlmain>\.ted\.com/
|
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
|
||||||
(
|
(
|
||||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||||
|
|
|
|
||||||
@@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
if m.group('type') == 'embed':
|
if m.group('type').startswith('embed'):
|
||||||
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
|
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
|
||||||
return self.url_result(desktop_url, 'TED')
|
return self.url_result(desktop_url, 'TED')
|
||||||
name = m.group('name')
|
name = m.group('name')
|
||||||
|
@@ -6,7 +6,7 @@ from .mitele import MiTeleIE
|
|||||||
|
|
||||||
class TelecincoIE(MiTeleIE):
|
class TelecincoIE(MiTeleIE):
|
||||||
IE_NAME = 'telecinco.es'
|
IE_NAME = 'telecinco.es'
|
||||||
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html'
|
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||||
|
53
youtube_dl/extractor/teletask.py
Normal file
53
youtube_dl/extractor/teletask.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class TeleTaskIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Duplicate Detection',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': '290ef69fb2792e481169c3958dbfbd57',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '26168-speaker',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Duplicate Detection',
|
||||||
|
'upload_date': '20141218',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '26168-slides',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Duplicate Detection',
|
||||||
|
'upload_date': '20141218',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lecture_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, lecture_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'itemprop="name">([^<]+)</a>', webpage, 'title')
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
|
||||||
|
|
||||||
|
entries = [{
|
||||||
|
'id': '%s-%s' % (lecture_id, format_id),
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
} for format_id, video_url in re.findall(
|
||||||
|
r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, lecture_id, title)
|
@@ -8,7 +8,6 @@ class TenPlayIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
|
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
|
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
|
||||||
#'md5': 'd68703d9f73dc8fccf3320ab34202590',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2695695426001',
|
'id': '2695695426001',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
|
@@ -1,15 +1,13 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class TF1IE(InfoExtractor):
|
class TF1IE(InfoExtractor):
|
||||||
"""TF1 uses the wat.tv player."""
|
"""TF1 uses the wat.tv player."""
|
||||||
_VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
|
_VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
|
||||||
_TEST = {
|
_TESTS = {
|
||||||
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10635995',
|
'id': '10635995',
|
||||||
@@ -21,14 +19,26 @@ class TF1IE(InfoExtractor):
|
|||||||
# Sometimes wat serves the whole file with the --test option
|
# Sometimes wat serves the whole file with the --test option
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12043945',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Le grand Mystérioso - Chuggington',
|
||||||
|
'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
|
||||||
|
'upload_date': '20150103',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Sometimes wat serves the whole file with the --test option
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
embed_url = self._html_search_regex(
|
embed_url = self._html_search_regex(
|
||||||
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
|
r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
|
||||||
embed_page = self._download_webpage(embed_url, video_id,
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
'Downloading embed player page')
|
'Downloading embed player page')
|
||||||
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
||||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
@@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||||
|
|
||||||
|
|
||||||
class ThePlatformIE(InfoExtractor):
|
class ThePlatformIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
||||||
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
||||||
@@ -66,6 +66,20 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
info_json = self._download_webpage(info_url, video_id)
|
info_json = self._download_webpage(info_url, video_id)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
captions = info.get('captions')
|
||||||
|
if isinstance(captions, list):
|
||||||
|
for caption in captions:
|
||||||
|
lang, src = caption.get('lang'), caption.get('src')
|
||||||
|
if lang and src:
|
||||||
|
subtitles[lang] = src
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
|
subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
|
||||||
head = meta.find(_x('smil:head'))
|
head = meta.find(_x('smil:head'))
|
||||||
body = meta.find(_x('smil:body'))
|
body = meta.find(_x('smil:body'))
|
||||||
|
|
||||||
@@ -117,6 +131,7 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
'thumbnail': info['defaultThumbnailUrl'],
|
'thumbnail': info['defaultThumbnailUrl'],
|
||||||
|
@@ -15,7 +15,7 @@ class TMZIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
|
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
|
||||||
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
|
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
|
||||||
'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640',
|
'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
class TNAFlixIE(InfoExtractor):
|
class TNAFlixIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
|
||||||
|
|
||||||
_TITLE_REGEX = None
|
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
|
||||||
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
|
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
|
||||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
||||||
|
|
||||||
@@ -49,8 +49,8 @@ class TNAFlixIE(InfoExtractor):
|
|||||||
if duration:
|
if duration:
|
||||||
duration = parse_duration(duration[1:])
|
duration = parse_duration(duration[1:])
|
||||||
|
|
||||||
cfg_url = self._html_search_regex(
|
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||||
self._CONFIG_REGEX, webpage, 'flashvars.config')
|
self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
|
||||||
|
|
||||||
cfg_xml = self._download_xml(
|
cfg_xml = self._download_xml(
|
||||||
cfg_url, display_id, note='Downloading metadata',
|
cfg_url, display_id, note='Downloading metadata',
|
||||||
|
@@ -9,7 +9,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class TudouIE(InfoExtractor):
|
class TudouIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
|
_VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||||
'md5': '140a49ed444bd22f93330985d8475fcb',
|
'md5': '140a49ed444bd22f93330985d8475fcb',
|
||||||
@@ -27,13 +27,6 @@ class TudouIE(InfoExtractor):
|
|||||||
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}, {
|
|
||||||
'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'todo.mp4',
|
|
||||||
},
|
|
||||||
'add_ie': ['Youku'],
|
|
||||||
'skip': 'Only works from China'
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _url_for_id(self, id, quality=None):
|
def _url_for_id(self, id, quality=None):
|
||||||
@@ -45,8 +38,7 @@ class TudouIE(InfoExtractor):
|
|||||||
return final_url
|
return final_url
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(2)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
|
m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
|
||||||
@@ -87,4 +79,9 @@ class TudouIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
result.append(part_info)
|
result.append(part_info)
|
||||||
|
|
||||||
return result
|
return {
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'entries': result,
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user