Compare commits
312 Commits
2014.09.14
...
2014.10.23
Author | SHA1 | Date | |
---|---|---|---|
4661e243f8 | |||
f3cd403c2b | |||
ad5f53ac72 | |||
75da98e9e1 | |||
281d3f1d68 | |||
6283c10b1c | |||
85d7b76586 | |||
2399535fd1 | |||
52cffcb186 | |||
8f3b5397a7 | |||
9bbec55255 | |||
6b445558ff | |||
6bf6962062 | |||
40bca5f927 | |||
74214d35c5 | |||
d24a2b20b4 | |||
cc98a3f096 | |||
16efb3695f | |||
4510d14f0a | |||
0f175a932f | |||
849b269273 | |||
95fa5fb569 | |||
77c3c5c5ed | |||
159444a668 | |||
f9befee1f5 | |||
9471c44405 | |||
013bfdd84c | |||
46fd0dd5a5 | |||
4698f0d858 | |||
355d074ff9 | |||
7da224c907 | |||
1723edb1a5 | |||
4740864508 | |||
09a42738fc | |||
df928d500f | |||
a72cbfacf0 | |||
62a164e713 | |||
5f58165def | |||
bd4e40df1a | |||
1419fafd36 | |||
9b36dcbd65 | |||
2aefb886fa | |||
72961c2a8a | |||
4c1ce987b0 | |||
8a2300a597 | |||
1cc887cbf0 | |||
203fb43f36 | |||
4d7b03f1f2 | |||
72ebb5e4b4 | |||
8450c15c25 | |||
b88b45e46c | |||
2417dc1715 | |||
23d83ad4d5 | |||
772ece3571 | |||
2c9f31188b | |||
d18be55533 | |||
ac20fc047a | |||
b4c3c8c172 | |||
3357110a4c | |||
e29fdedb45 | |||
4828703f14 | |||
afe08e0d4a | |||
071420e136 | |||
f4cf848d1d | |||
b7b2ca6e2b | |||
1409704afa | |||
c8e390c2b0 | |||
823f1e015a | |||
3c06d3715e | |||
762958d5af | |||
53d9009bdb | |||
1b725173a5 | |||
0ca41c3d9c | |||
fc6861b175 | |||
b097b5f246 | |||
385009fc44 | |||
ced659bb4d | |||
842cca7d56 | |||
b3826f6c8d | |||
7bc8780c57 | |||
c59c3c84ed | |||
24f7fb5e1e | |||
3b700f8d43 | |||
31d06400ec | |||
642b76ac15 | |||
4c4de296d4 | |||
b10609d98c | |||
3ae165aa10 | |||
e4b85e35d0 | |||
bb0c206f59 | |||
b81f484b60 | |||
5e69192ef7 | |||
e9be9a6acd | |||
f47754f061 | |||
d838b1bd4a | |||
fe506288bd | |||
d397c0b3dd | |||
146c80e256 | |||
f78c01f68b | |||
8489578df4 | |||
10606050bc | |||
d9bf465200 | |||
01384d6e4b | |||
08d5230945 | |||
852f8641e8 | |||
18937a50a4 | |||
e4d6cca0c1 | |||
d5feab9aaa | |||
9e77c60c9a | |||
1414df5ce2 | |||
e80f40e5ca | |||
d3c9af84fc | |||
59d206ca2d | |||
e7b6d12254 | |||
410f3e73ab | |||
07e764439a | |||
f8fb4a7ca8 | |||
e497a7f2ca | |||
a3b6be104d | |||
b7bb0df21e | |||
4dc19c0982 | |||
58ea7ec81e | |||
c0f64ac689 | |||
7a08ad7d59 | |||
2d29ac4f23 | |||
a7a747d687 | |||
fdb4d278bf | |||
59c03a9bfb | |||
e7db973328 | |||
99b67fecc5 | |||
89294b5f50 | |||
72d53356f6 | |||
9e1e67fc15 | |||
1e60e5546e | |||
457749a703 | |||
937f935db0 | |||
80bcefcd77 | |||
8c23945c72 | |||
989b4b2b86 | |||
2a7b4681c6 | |||
8157ae3904 | |||
e50e2fcd4d | |||
6be451f422 | |||
5e4f06197f | |||
761e1645e0 | |||
8ff14175e2 | |||
dbe3043cd6 | |||
a8eb5a8e61 | |||
6043f1df4e | |||
12548cd933 | |||
2593039522 | |||
35d3e63d24 | |||
27aede9074 | |||
f5b7e6a842 | |||
a1f934b171 | |||
a43ee88c6f | |||
e2dce53781 | |||
1770ed9e86 | |||
457ac58cc7 | |||
9c44d2429b | |||
d2e32f7df5 | |||
67077b182b | |||
5f4c318844 | |||
dfee83234b | |||
7f5c0c4a19 | |||
4bc77c8417 | |||
22dd3fad86 | |||
d6e6a42256 | |||
76e7d1e74b | |||
38c4d41b74 | |||
f0b8e3607d | |||
51ee08c4bb | |||
c841789772 | |||
c121a75b36 | |||
5a8b77551d | |||
0217aee154 | |||
b14f3a4c1d | |||
92f7963f6e | |||
88fbe4c2cc | |||
394599f422 | |||
ed9266db90 | |||
f4b1c7adb8 | |||
c95eeb7b80 | |||
5e43e3803c | |||
a89435a7a8 | |||
a0a90b3ba1 | |||
c664182323 | |||
6be1cd4ddb | |||
ee0d90707a | |||
f776d8f608 | |||
b3ac3a51ac | |||
0b75c2a88b | |||
7b7518124e | |||
68b0973046 | |||
3a203b8bfa | |||
70752ccefd | |||
0155549d6c | |||
b66745288e | |||
2a1325fdde | |||
2f9e8776df | |||
497339fa0e | |||
8e6f8051f0 | |||
11b3ce8509 | |||
6a5af6acb9 | |||
9a0d98bb40 | |||
fbd3162e49 | |||
54e9a4af95 | |||
8a32b82e46 | |||
fec02bcc90 | |||
c6e90caaa6 | |||
4bbf157794 | |||
6b08cdf626 | |||
b686fc18da | |||
0b97f3a936 | |||
eb73f2649f | |||
f0b5d6af74 | |||
2f771f6c99 | |||
3b2f933b01 | |||
cc746841e7 | |||
ac7553d031 | |||
cdc628a498 | |||
69ea8ca42c | |||
4bc3a23ec5 | |||
bd5650ac64 | |||
86916dae4b | |||
f7d159cf95 | |||
632e5684ce | |||
094d42fe44 | |||
63cddb6477 | |||
273dea4248 | |||
f90d95edeb | |||
45c85d7ba1 | |||
d0df92928b | |||
df8f53f752 | |||
e35cb78c40 | |||
3ef7d11acd | |||
224ce0d872 | |||
dd41e8c82b | |||
b509a4b176 | |||
b28c8403b2 | |||
7bd4b4229a | |||
72e450c555 | |||
522c55b7f2 | |||
58e7071a2c | |||
516812df41 | |||
752297631f | |||
34e14a9beb | |||
ffb5b05db1 | |||
3e8fcd9fa1 | |||
746c67d72f | |||
5aa38e75b2 | |||
532f5bff70 | |||
f566d9f1d5 | |||
7267bd536f | |||
589d3d7c7a | |||
46f74bcf5c | |||
37bfe8ace4 | |||
0529eef5a4 | |||
fd78a4d3e6 | |||
1de33fafd9 | |||
e2e5dae64d | |||
09b23c902b | |||
109a540e7a | |||
2914e5f00f | |||
2f834e9381 | |||
9296738f20 | |||
0e59b9fffb | |||
67abbe9527 | |||
944a3de278 | |||
5a13fe9ed2 | |||
6b6096d0b7 | |||
d0246d07f1 | |||
727a98c3ee | |||
997987d568 | |||
c001f939e4 | |||
e825c38082 | |||
a04aa7a9e6 | |||
7cdd5339b3 | |||
38349518f1 | |||
64892c0b79 | |||
dc9f356846 | |||
ed86ee3b4a | |||
7bb5df1cda | |||
37a81dff04 | |||
fc96eb4e21 | |||
ae369738b0 | |||
e2037b3f7d | |||
5419033935 | |||
2eebf060af | |||
acd9db5902 | |||
d0e8b3d59b | |||
c15dd15388 | |||
0003a5c416 | |||
21f2927f70 | |||
e5a79071a5 | |||
ca0e7a2b17 | |||
b523bb71ab | |||
a020a0dc20 | |||
6d1f2431bd | |||
fdea3abdf8 | |||
59d284c316 | |||
98703c7fbf | |||
b04c8f7358 | |||
56d1912f1d | |||
eb3bd7ba8d | |||
2bca84e345 | |||
984e8e14ea | |||
d05cfe0600 | |||
37419b4f99 | |||
a8aa99442f | |||
94b539d155 | |||
b8874d4d4e |
2
.gitignore
vendored
2
.gitignore
vendored
@ -11,6 +11,7 @@ MANIFEST
|
|||||||
README.txt
|
README.txt
|
||||||
youtube-dl.1
|
youtube-dl.1
|
||||||
youtube-dl.bash-completion
|
youtube-dl.bash-completion
|
||||||
|
youtube-dl.fish
|
||||||
youtube-dl
|
youtube-dl
|
||||||
youtube-dl.exe
|
youtube-dl.exe
|
||||||
youtube-dl.tar.gz
|
youtube-dl.tar.gz
|
||||||
@ -29,3 +30,4 @@ updates_key.pem
|
|||||||
*.swp
|
*.swp
|
||||||
test/testdata
|
test/testdata
|
||||||
.tox
|
.tox
|
||||||
|
youtube-dl.zsh
|
||||||
|
@ -2,5 +2,6 @@ include README.md
|
|||||||
include test/*.py
|
include test/*.py
|
||||||
include test/*.json
|
include test/*.json
|
||||||
include youtube-dl.bash-completion
|
include youtube-dl.bash-completion
|
||||||
|
include youtube-dl.fish
|
||||||
include youtube-dl.1
|
include youtube-dl.1
|
||||||
recursive-include docs Makefile conf.py *.rst
|
recursive-include docs Makefile conf.py *.rst
|
||||||
|
30
Makefile
30
Makefile
@ -1,7 +1,7 @@
|
|||||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish
|
||||||
|
|
||||||
cleanall: clean
|
cleanall: clean
|
||||||
rm -f youtube-dl youtube-dl.exe
|
rm -f youtube-dl youtube-dl.exe
|
||||||
@ -9,6 +9,7 @@ cleanall: clean
|
|||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
BINDIR ?= $(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
MANDIR ?= $(PREFIX)/man
|
MANDIR ?= $(PREFIX)/man
|
||||||
|
SHAREDIR ?= $(PREFIX)/share
|
||||||
PYTHON ?= /usr/bin/env python
|
PYTHON ?= /usr/bin/env python
|
||||||
|
|
||||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||||
@ -22,13 +23,17 @@ else
|
|||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||||
install -d $(DESTDIR)$(BINDIR)
|
install -d $(DESTDIR)$(BINDIR)
|
||||||
install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
|
install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
|
||||||
install -d $(DESTDIR)$(MANDIR)/man1
|
install -d $(DESTDIR)$(MANDIR)/man1
|
||||||
install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
|
install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
|
||||||
install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
|
install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
|
||||||
install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
|
install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
|
||||||
|
install -d $(DESTDIR)$(SHAREDIR)/zsh/site-functions
|
||||||
|
install -m 644 youtube-dl.zsh $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_youtube-dl
|
||||||
|
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
|
||||||
|
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
|
||||||
|
|
||||||
test:
|
test:
|
||||||
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
||||||
@ -36,9 +41,9 @@ test:
|
|||||||
|
|
||||||
tar: youtube-dl.tar.gz
|
tar: youtube-dl.tar.gz
|
||||||
|
|
||||||
.PHONY: all clean install test tar bash-completion pypi-files
|
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion
|
||||||
|
|
||||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1
|
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
||||||
|
|
||||||
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||||
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
|
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
|
||||||
@ -64,7 +69,17 @@ youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-co
|
|||||||
|
|
||||||
bash-completion: youtube-dl.bash-completion
|
bash-completion: youtube-dl.bash-completion
|
||||||
|
|
||||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
youtube-dl.zsh: youtube_dl/*.py youtube_dl/*/*.py devscripts/zsh-completion.in
|
||||||
|
python devscripts/zsh-completion.py
|
||||||
|
|
||||||
|
zsh-completion: youtube-dl.zsh
|
||||||
|
|
||||||
|
youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
|
||||||
|
python devscripts/fish-completion.py
|
||||||
|
|
||||||
|
fish-completion: youtube-dl.fish
|
||||||
|
|
||||||
|
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||||
--exclude '*.DS_Store' \
|
--exclude '*.DS_Store' \
|
||||||
--exclude '*.kate-swp' \
|
--exclude '*.kate-swp' \
|
||||||
@ -78,5 +93,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
|||||||
-- \
|
-- \
|
||||||
bin devscripts test youtube_dl docs \
|
bin devscripts test youtube_dl docs \
|
||||||
LICENSE README.md README.txt \
|
LICENSE README.md README.txt \
|
||||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
||||||
|
youtube-dl.zsh youtube-dl.fish setup.py \
|
||||||
youtube-dl
|
youtube-dl
|
||||||
|
58
README.md
58
README.md
@ -99,8 +99,6 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
downloaded videos in it.
|
downloaded videos in it.
|
||||||
--include-ads Download advertisements as well
|
--include-ads Download advertisements as well
|
||||||
(experimental)
|
(experimental)
|
||||||
--youtube-include-dash-manifest Try to download the DASH manifest on
|
|
||||||
YouTube videos (experimental)
|
|
||||||
|
|
||||||
## Download Options:
|
## Download Options:
|
||||||
-r, --rate-limit LIMIT maximum download rate in bytes per second
|
-r, --rate-limit LIMIT maximum download rate in bytes per second
|
||||||
@ -158,7 +156,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
downloads if possible.
|
downloads if possible.
|
||||||
--no-continue do not resume partially downloaded files
|
--no-continue do not resume partially downloaded files
|
||||||
(restart from beginning)
|
(restart from beginning)
|
||||||
--no-part do not use .part files
|
--no-part do not use .part files - write directly
|
||||||
|
into output file
|
||||||
--no-mtime do not use the Last-modified header to set
|
--no-mtime do not use the Last-modified header to set
|
||||||
the file modification time
|
the file modification time
|
||||||
--write-description write video description to a .description
|
--write-description write video description to a .description
|
||||||
@ -216,7 +215,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
information about the video. (Currently
|
information about the video. (Currently
|
||||||
supported only for YouTube)
|
supported only for YouTube)
|
||||||
--user-agent UA specify a custom user agent
|
--user-agent UA specify a custom user agent
|
||||||
--referer REF specify a custom referer, use if the video
|
--referer URL specify a custom referer, use if the video
|
||||||
access is restricted to one domain
|
access is restricted to one domain
|
||||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||||
separated by a colon ':'. You can use this
|
separated by a colon ':'. You can use this
|
||||||
@ -227,17 +226,22 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
## Video Format Options:
|
## Video Format Options:
|
||||||
-f, --format FORMAT video format code, specify the order of
|
-f, --format FORMAT video format code, specify the order of
|
||||||
preference using slashes: "-f 22/17/18".
|
preference using slashes: -f 22/17/18 . -f
|
||||||
"-f mp4" and "-f flv" are also supported.
|
mp4 , -f m4a and -f flv are also
|
||||||
You can also use the special names "best",
|
supported. You can also use the special
|
||||||
"bestvideo", "bestaudio", "worst",
|
names "best", "bestvideo", "bestaudio",
|
||||||
"worstvideo" and "worstaudio". By default,
|
"worst", "worstvideo" and "worstaudio". By
|
||||||
youtube-dl will pick the best quality.
|
default, youtube-dl will pick the best
|
||||||
|
quality. Use commas to download multiple
|
||||||
|
audio formats, such as -f
|
||||||
|
136/137/mp4/bestvideo,140/m4a/bestaudio
|
||||||
--all-formats download all available video formats
|
--all-formats download all available video formats
|
||||||
--prefer-free-formats prefer free video formats unless a specific
|
--prefer-free-formats prefer free video formats unless a specific
|
||||||
one is requested
|
one is requested
|
||||||
--max-quality FORMAT highest quality format to download
|
--max-quality FORMAT highest quality format to download
|
||||||
-F, --list-formats list all available formats
|
-F, --list-formats list all available formats
|
||||||
|
--youtube-skip-dash-manifest Do not download the DASH manifest on
|
||||||
|
YouTube videos
|
||||||
|
|
||||||
## Subtitle Options:
|
## Subtitle Options:
|
||||||
--write-sub write subtitle file
|
--write-sub write subtitle file
|
||||||
@ -253,7 +257,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
language tags like 'en,pt'
|
language tags like 'en,pt'
|
||||||
|
|
||||||
## Authentication Options:
|
## Authentication Options:
|
||||||
-u, --username USERNAME account username
|
-u, --username USERNAME login with this account ID
|
||||||
-p, --password PASSWORD account password
|
-p, --password PASSWORD account password
|
||||||
-2, --twofactor TWOFACTOR two-factor auth code
|
-2, --twofactor TWOFACTOR two-factor auth code
|
||||||
-n, --netrc use .netrc authentication data
|
-n, --netrc use .netrc authentication data
|
||||||
@ -264,7 +268,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
(requires ffmpeg or avconv and ffprobe or
|
(requires ffmpeg or avconv and ffprobe or
|
||||||
avprobe)
|
avprobe)
|
||||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
|
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
|
||||||
"opus", or "wav"; best by default
|
"opus", or "wav"; "best" by default
|
||||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification,
|
--audio-quality QUALITY ffmpeg/avconv audio quality specification,
|
||||||
insert a value between 0 (better) and 9
|
insert a value between 0 (better) and 9
|
||||||
(worse) for VBR or a specific bitrate like
|
(worse) for VBR or a specific bitrate like
|
||||||
@ -345,21 +349,34 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
|
|||||||
|
|
||||||
# FAQ
|
# FAQ
|
||||||
|
|
||||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
### How do I update youtube-dl?
|
||||||
|
|
||||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
|
||||||
|
|
||||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version.
|
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
|
||||||
|
|
||||||
Alternatively, uninstall the youtube-dl package and follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html). In a pinch, this should do if you used `apt-get` before to install youtube-dl:
|
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distributions serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
|
||||||
|
|
||||||
|
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
|
||||||
|
|
||||||
|
sudo apt-get remove -y youtube-dl
|
||||||
|
|
||||||
|
Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
|
||||||
|
|
||||||
```
|
```
|
||||||
sudo apt-get remove -y youtube-dl
|
|
||||||
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
sudo chmod a+x /usr/local/bin/youtube-dl
|
||||||
hash -r
|
hash -r
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
|
||||||
|
|
||||||
|
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
||||||
|
|
||||||
|
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||||
|
|
||||||
|
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||||
|
|
||||||
### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
|
### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
|
||||||
|
|
||||||
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`.
|
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`.
|
||||||
@ -439,8 +456,6 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -448,7 +463,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://yourextractor.com/watch/42',
|
'url': 'http://yourextractor.com/watch/42',
|
||||||
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
|
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '42',
|
'id': '42',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -463,8 +478,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
# TODO more code goes here, for example ...
|
# TODO more code goes here, for example ...
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
5
devscripts/fish-completion.in
Normal file
5
devscripts/fish-completion.in
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
{{commands}}
|
||||||
|
|
||||||
|
|
||||||
|
complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
|
48
devscripts/fish-completion.py
Executable file
48
devscripts/fish-completion.py
Executable file
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import optparse
|
||||||
|
import os
|
||||||
|
from os.path import dirname as dirn
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||||
|
import youtube_dl
|
||||||
|
from youtube_dl.utils import shell_quote
|
||||||
|
|
||||||
|
FISH_COMPLETION_FILE = 'youtube-dl.fish'
|
||||||
|
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
||||||
|
|
||||||
|
EXTRA_ARGS = {
|
||||||
|
'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'],
|
||||||
|
|
||||||
|
# Options that need a file parameter
|
||||||
|
'download-archive': ['--require-parameter'],
|
||||||
|
'cookies': ['--require-parameter'],
|
||||||
|
'load-info': ['--require-parameter'],
|
||||||
|
'batch-file': ['--require-parameter'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_completion(opt_parser):
|
||||||
|
commands = []
|
||||||
|
|
||||||
|
for group in opt_parser.option_groups:
|
||||||
|
for option in group.option_list:
|
||||||
|
long_option = option.get_opt_string().strip('-')
|
||||||
|
help_msg = shell_quote([option.help])
|
||||||
|
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
|
||||||
|
if option._short_opts:
|
||||||
|
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
|
||||||
|
if option.help != optparse.SUPPRESS_HELP:
|
||||||
|
complete_cmd += ['--description', option.help]
|
||||||
|
complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
|
||||||
|
commands.append(shell_quote(complete_cmd))
|
||||||
|
|
||||||
|
with open(FISH_COMPLETION_TEMPLATE) as f:
|
||||||
|
template = f.read()
|
||||||
|
filled_template = template.replace('{{commands}}', '\n'.join(commands))
|
||||||
|
with open(FISH_COMPLETION_FILE, 'w') as f:
|
||||||
|
f.write(filled_template)
|
||||||
|
|
||||||
|
parser = youtube_dl.parseOpts()[0]
|
||||||
|
build_completion(parser)
|
28
devscripts/zsh-completion.in
Normal file
28
devscripts/zsh-completion.in
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#compdef youtube-dl
|
||||||
|
|
||||||
|
__youtube_dl() {
|
||||||
|
local curcontext="$curcontext" fileopts diropts cur prev
|
||||||
|
typeset -A opt_args
|
||||||
|
fileopts="{{fileopts}}"
|
||||||
|
diropts="{{diropts}}"
|
||||||
|
cur=$words[CURRENT]
|
||||||
|
case $cur in
|
||||||
|
:)
|
||||||
|
_arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)'
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
prev=$words[CURRENT-1]
|
||||||
|
if [[ ${prev} =~ ${fileopts} ]]; then
|
||||||
|
_path_files
|
||||||
|
elif [[ ${prev} =~ ${diropts} ]]; then
|
||||||
|
_path_files -/
|
||||||
|
elif [[ ${prev} == "--recode-video" ]]; then
|
||||||
|
_arguments '*: :(mp4 flv ogg webm mkv)'
|
||||||
|
else
|
||||||
|
_arguments '*: :({{flags}})'
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
__youtube_dl
|
46
devscripts/zsh-completion.py
Executable file
46
devscripts/zsh-completion.py
Executable file
@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import os
|
||||||
|
from os.path import dirname as dirn
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||||
|
import youtube_dl
|
||||||
|
|
||||||
|
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
|
||||||
|
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
|
||||||
|
|
||||||
|
|
||||||
|
def build_completion(opt_parser):
|
||||||
|
opts = [opt for group in opt_parser.option_groups
|
||||||
|
for opt in group.option_list]
|
||||||
|
opts_file = [opt for opt in opts if opt.metavar == "FILE"]
|
||||||
|
opts_dir = [opt for opt in opts if opt.metavar == "DIR"]
|
||||||
|
|
||||||
|
fileopts = []
|
||||||
|
for opt in opts_file:
|
||||||
|
if opt._short_opts:
|
||||||
|
fileopts.extend(opt._short_opts)
|
||||||
|
if opt._long_opts:
|
||||||
|
fileopts.extend(opt._long_opts)
|
||||||
|
|
||||||
|
diropts = []
|
||||||
|
for opt in opts_dir:
|
||||||
|
if opt._short_opts:
|
||||||
|
diropts.extend(opt._short_opts)
|
||||||
|
if opt._long_opts:
|
||||||
|
diropts.extend(opt._long_opts)
|
||||||
|
|
||||||
|
flags = [opt.get_opt_string() for opt in opts]
|
||||||
|
|
||||||
|
with open(ZSH_COMPLETION_TEMPLATE) as f:
|
||||||
|
template = f.read()
|
||||||
|
|
||||||
|
template = template.replace("{{fileopts}}", "|".join(fileopts))
|
||||||
|
template = template.replace("{{diropts}}", "|".join(diropts))
|
||||||
|
template = template.replace("{{flags}}", " ".join(flags))
|
||||||
|
|
||||||
|
with open(ZSH_COMPLETION_FILE, "w") as f:
|
||||||
|
f.write(template)
|
||||||
|
|
||||||
|
parser = youtube_dl.parseOpts()[0]
|
||||||
|
build_completion(parser)
|
1
setup.py
1
setup.py
@ -48,6 +48,7 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
|||||||
else:
|
else:
|
||||||
files_spec = [
|
files_spec = [
|
||||||
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
||||||
|
('etc/fish/completions', ['youtube-dl.fish']),
|
||||||
('share/doc/youtube_dl', ['README.txt']),
|
('share/doc/youtube_dl', ['README.txt']),
|
||||||
('share/man/man1', ['youtube-dl.1'])
|
('share/man/man1', ['youtube-dl.1'])
|
||||||
]
|
]
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
import io
|
import io
|
||||||
import hashlib
|
import hashlib
|
||||||
@ -12,6 +14,7 @@ from youtube_dl import YoutubeDL
|
|||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
|
write_string,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -40,10 +43,10 @@ def report_warning(message):
|
|||||||
If stderr is a tty file the 'WARNING:' will be colored
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
'''
|
'''
|
||||||
if sys.stderr.isatty() and os.name != 'nt':
|
if sys.stderr.isatty() and os.name != 'nt':
|
||||||
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = u'WARNING:'
|
_msg_header = 'WARNING:'
|
||||||
output = u'%s %s\n' % (_msg_header, message)
|
output = '%s %s\n' % (_msg_header, message)
|
||||||
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
|
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
|
||||||
output = output.encode(preferredencoding())
|
output = output.encode(preferredencoding())
|
||||||
sys.stderr.write(output)
|
sys.stderr.write(output)
|
||||||
@ -103,22 +106,22 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
|
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
isinstance(got, compat_str),
|
isinstance(got, compat_str),
|
||||||
u'Expected a %s object, but got %s for field %s' % (
|
'Expected a %s object, but got %s for field %s' % (
|
||||||
compat_str.__name__, type(got).__name__, info_field))
|
compat_str.__name__, type(got).__name__, info_field))
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
match_rex.match(got),
|
match_rex.match(got),
|
||||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||||
elif isinstance(expected, type):
|
elif isinstance(expected, type):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertTrue(isinstance(got, expected),
|
self.assertTrue(isinstance(got, expected),
|
||||||
u'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
||||||
else:
|
else:
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
got = 'md5:' + md5(got_dict.get(info_field))
|
got = 'md5:' + md5(got_dict.get(info_field))
|
||||||
else:
|
else:
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertEqual(expected, got,
|
self.assertEqual(expected, got,
|
||||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||||
|
|
||||||
# Check for the presence of mandatory fields
|
# Check for the presence of mandatory fields
|
||||||
if got_dict.get('_type') != 'playlist':
|
if got_dict.get('_type') != 'playlist':
|
||||||
@ -126,7 +129,7 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||||
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
|
self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
|
||||||
|
|
||||||
# Are checkable fields missing from the test case definition?
|
# Are checkable fields missing from the test case definition?
|
||||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||||
@ -134,7 +137,15 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||||
if missing_keys:
|
if missing_keys:
|
||||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
def _repr(v):
|
||||||
|
if isinstance(v, compat_str):
|
||||||
|
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'")
|
||||||
|
else:
|
||||||
|
return repr(v)
|
||||||
|
info_dict_str = ''.join(
|
||||||
|
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||||
|
for k, v in test_info_dict.items())
|
||||||
|
write_string('\n"info_dict": {' + info_dict_str + '}\n', out=sys.stderr)
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
missing_keys,
|
missing_keys,
|
||||||
'Missing keys in test definition: %s' % (
|
'Missing keys in test definition: %s' % (
|
||||||
|
@ -14,7 +14,7 @@ from test.helper import gettestcases
|
|||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
FacebookIE,
|
FacebookIE,
|
||||||
gen_extractors,
|
gen_extractors,
|
||||||
JustinTVIE,
|
TwitchIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -72,21 +72,17 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_justin_tv_channelid_matching(self):
|
def test_twitch_channelid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv'))
|
self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv'))
|
self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv'))
|
self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv'))
|
||||||
self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv'))
|
self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/'))
|
||||||
self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv'))
|
|
||||||
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv'))
|
|
||||||
self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/'))
|
|
||||||
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/'))
|
|
||||||
|
|
||||||
def test_justintv_videoid_matching(self):
|
def test_twitch_videoid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
|
self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
|
||||||
|
|
||||||
def test_justin_tv_chapterid_matching(self):
|
def test_twitch_chapterid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
|
self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
|
@ -139,7 +139,9 @@ def generator(test_case):
|
|||||||
|
|
||||||
if is_playlist:
|
if is_playlist:
|
||||||
self.assertEqual(res_dict['_type'], 'playlist')
|
self.assertEqual(res_dict['_type'], 'playlist')
|
||||||
|
self.assertTrue('entries' in res_dict)
|
||||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
||||||
|
|
||||||
if 'playlist_mincount' in test_case:
|
if 'playlist_mincount' in test_case:
|
||||||
assertGreaterEqual(
|
assertGreaterEqual(
|
||||||
self,
|
self,
|
||||||
@ -188,7 +190,7 @@ def generator(test_case):
|
|||||||
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
||||||
finally:
|
finally:
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
if is_playlist and res_dict is not None:
|
if is_playlist and res_dict is not None and res_dict.get('entries'):
|
||||||
# Remove all other files that may have been extracted if the
|
# Remove all other files that may have been extracted if the
|
||||||
# extractor returns full results even with extract_flat
|
# extractor returns full results even with extract_flat
|
||||||
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
||||||
|
@ -15,6 +15,7 @@ from youtube_dl.extractor import (
|
|||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
TEDIE,
|
TEDIE,
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
|
WallaIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -279,5 +280,32 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
|||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestWallaSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
||||||
|
IE = WallaIE
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['heb']))
|
||||||
|
self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920')
|
||||||
|
|
||||||
|
def test_nosubtitles(self):
|
||||||
|
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||||
|
self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -22,7 +22,8 @@ from youtube_dl.utils import (
|
|||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
get_meta_content,
|
get_meta_content,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
PagedList,
|
OnDemandPagedList,
|
||||||
|
InAdvancePagedList,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@ -40,6 +41,10 @@ from youtube_dl.utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
|
limit_length,
|
||||||
|
escape_rfc3986,
|
||||||
|
escape_url,
|
||||||
|
js_to_json,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -134,6 +139,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||||
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||||
|
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
@ -243,10 +249,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
for i in range(firstid, upto):
|
for i in range(firstid, upto):
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
pl = PagedList(get_page, pagesize)
|
pl = OnDemandPagedList(get_page, pagesize)
|
||||||
got = pl.getslice(*sliceargs)
|
got = pl.getslice(*sliceargs)
|
||||||
self.assertEqual(got, expected)
|
self.assertEqual(got, expected)
|
||||||
|
|
||||||
|
iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize)
|
||||||
|
got = iapl.getslice(*sliceargs)
|
||||||
|
self.assertEqual(got, expected)
|
||||||
|
|
||||||
testPL(5, 2, (), [0, 1, 2, 3, 4])
|
testPL(5, 2, (), [0, 1, 2, 3, 4])
|
||||||
testPL(5, 2, (1,), [1, 2, 3, 4])
|
testPL(5, 2, (1,), [1, 2, 3, 4])
|
||||||
testPL(5, 2, (2,), [2, 3, 4])
|
testPL(5, 2, (2,), [2, 3, 4])
|
||||||
@ -286,5 +296,64 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||||
|
|
||||||
|
def test_limit_length(self):
|
||||||
|
self.assertEqual(limit_length(None, 12), None)
|
||||||
|
self.assertEqual(limit_length('foo', 12), 'foo')
|
||||||
|
self.assertTrue(
|
||||||
|
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||||
|
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||||
|
|
||||||
|
def test_escape_rfc3986(self):
|
||||||
|
reserved = "!*'();:@&=+$,/?#[]"
|
||||||
|
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||||
|
self.assertEqual(escape_rfc3986(reserved), reserved)
|
||||||
|
self.assertEqual(escape_rfc3986(unreserved), unreserved)
|
||||||
|
self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82')
|
||||||
|
self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82')
|
||||||
|
self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
|
||||||
|
self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
|
||||||
|
|
||||||
|
def test_escape_url(self):
|
||||||
|
self.assertEqual(
|
||||||
|
escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
|
||||||
|
'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
|
||||||
|
'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
escape_url('http://тест.рф/фрагмент'),
|
||||||
|
'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
escape_url('http://тест.рф/абв?абв=абв#абв'),
|
||||||
|
'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
|
||||||
|
)
|
||||||
|
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||||
|
|
||||||
|
def test_js_to_json_realworld(self):
|
||||||
|
inp = '''{
|
||||||
|
'clip':{'provider':'pseudo'}
|
||||||
|
}'''
|
||||||
|
self.assertEqual(js_to_json(inp), '''{
|
||||||
|
"clip":{"provider":"pseudo"}
|
||||||
|
}''')
|
||||||
|
json.loads(js_to_json(inp))
|
||||||
|
|
||||||
|
inp = '''{
|
||||||
|
'playlist':[{'controls':{'all':null}}]
|
||||||
|
}'''
|
||||||
|
self.assertEqual(js_to_json(inp), '''{
|
||||||
|
"playlist":[{"controls":{"all":null}}]
|
||||||
|
}''')
|
||||||
|
|
||||||
|
def test_js_to_json_edgecases(self):
|
||||||
|
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||||
|
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||||
|
|
||||||
|
on = js_to_json('{"abc": true}')
|
||||||
|
self.assertEqual(json.loads(on), {'abc': True})
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -10,7 +10,6 @@ from test.helper import FakeYDL
|
|||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
YoutubeUserIE,
|
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||||
|
|
||||||
def test_youtube_channel(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeChannelIE(dl)
|
|
||||||
#test paginated channel
|
|
||||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
|
||||||
self.assertTrue(len(result['entries']) > 90)
|
|
||||||
#test autogenerated channel
|
|
||||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
|
||||||
self.assertTrue(len(result['entries']) >= 18)
|
|
||||||
|
|
||||||
def test_youtube_user(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeUserIE(dl)
|
|
||||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
|
||||||
self.assertTrue(len(result['entries']) >= 320)
|
|
||||||
|
|
||||||
def test_youtube_show(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeShowIE(dl)
|
|
||||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
|
||||||
self.assertTrue(len(result) >= 3)
|
|
||||||
|
|
||||||
def test_youtube_mix(self):
|
def test_youtube_mix(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(len(entries), 100)
|
self.assertEqual(len(entries), 100)
|
||||||
|
|
||||||
def test_youtube_toplist(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeTopListIE(dl)
|
|
||||||
result = ie.extract('yttoplist:music:Trending')
|
|
||||||
entries = result['entries']
|
|
||||||
self.assertTrue(len(entries) >= 5)
|
|
||||||
|
|
||||||
def test_youtube_search_url(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeSearchURLIE(dl)
|
|
||||||
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
|
||||||
entries = result['entries']
|
|
||||||
self.assertIsPlaylist(result)
|
|
||||||
self.assertEqual(result['title'], 'youtube-dl test video')
|
|
||||||
self.assertTrue(len(entries) >= 5)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -47,18 +47,6 @@ _TESTS = [
|
|||||||
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||||
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||||
),
|
),
|
||||||
(
|
|
||||||
'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
|
|
||||||
'swf',
|
|
||||||
86,
|
|
||||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
|
|
||||||
),
|
|
||||||
(
|
|
||||||
'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
|
|
||||||
'swf',
|
|
||||||
'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
|
|
||||||
'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||||
'js',
|
'js',
|
||||||
|
@ -28,6 +28,7 @@ from .utils import (
|
|||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
escape_url,
|
||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
date_from_str,
|
date_from_str,
|
||||||
DateRange,
|
DateRange,
|
||||||
@ -227,11 +228,11 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||||
and not params['restrictfilenames']):
|
and not params.get('restrictfilenames', False)):
|
||||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Assuming --restrict-filenames since file system encoding '
|
'Assuming --restrict-filenames since file system encoding '
|
||||||
'cannot encode all charactes. '
|
'cannot encode all characters. '
|
||||||
'Set the LC_ALL environment variable to fix this.')
|
'Set the LC_ALL environment variable to fix this.')
|
||||||
self.params['restrictfilenames'] = True
|
self.params['restrictfilenames'] = True
|
||||||
|
|
||||||
@ -707,7 +708,7 @@ class YoutubeDL(object):
|
|||||||
if video_formats:
|
if video_formats:
|
||||||
return video_formats[0]
|
return video_formats[0]
|
||||||
else:
|
else:
|
||||||
extensions = ['mp4', 'flv', 'webm', '3gp']
|
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
|
||||||
if format_spec in extensions:
|
if format_spec in extensions:
|
||||||
filter_f = lambda f: f['ext'] == format_spec
|
filter_f = lambda f: f['ext'] == format_spec
|
||||||
else:
|
else:
|
||||||
@ -808,9 +809,10 @@ class YoutubeDL(object):
|
|||||||
if req_format in ('-1', 'all'):
|
if req_format in ('-1', 'all'):
|
||||||
formats_to_download = formats
|
formats_to_download = formats
|
||||||
else:
|
else:
|
||||||
|
for rfstr in req_format.split(','):
|
||||||
# We can accept formats requested in the format: 34/5/best, we pick
|
# We can accept formats requested in the format: 34/5/best, we pick
|
||||||
# the first that is available, starting from left
|
# the first that is available, starting from left
|
||||||
req_formats = req_format.split('/')
|
req_formats = rfstr.split('/')
|
||||||
for rf in req_formats:
|
for rf in req_formats:
|
||||||
if re.match(r'.+?\+.+?', rf) is not None:
|
if re.match(r'.+?\+.+?', rf) is not None:
|
||||||
# Two formats have been requested like '137+139'
|
# Two formats have been requested like '137+139'
|
||||||
@ -828,7 +830,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
selected_format = self.select_format(rf, formats)
|
selected_format = self.select_format(rf, formats)
|
||||||
if selected_format is not None:
|
if selected_format is not None:
|
||||||
formats_to_download = [selected_format]
|
formats_to_download.append(selected_format)
|
||||||
break
|
break
|
||||||
if not formats_to_download:
|
if not formats_to_download:
|
||||||
raise ExtractorError('requested format not available',
|
raise ExtractorError('requested format not available',
|
||||||
@ -1241,6 +1243,26 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def urlopen(self, req):
|
def urlopen(self, req):
|
||||||
""" Start an HTTP download """
|
""" Start an HTTP download """
|
||||||
|
|
||||||
|
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||||
|
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||||
|
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||||
|
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||||
|
# To work around aforementioned issue we will replace request's original URL with
|
||||||
|
# percent-encoded one
|
||||||
|
req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
|
||||||
|
url = req if req_is_string else req.get_full_url()
|
||||||
|
url_escaped = escape_url(url)
|
||||||
|
|
||||||
|
# Substitute URL if any change after escaping
|
||||||
|
if url != url_escaped:
|
||||||
|
if req_is_string:
|
||||||
|
req = url_escaped
|
||||||
|
else:
|
||||||
|
req = compat_urllib_request.Request(
|
||||||
|
url_escaped, data=req.data, headers=req.headers,
|
||||||
|
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||||
|
|
||||||
return self._opener.open(req, timeout=self._socket_timeout)
|
return self._opener.open(req, timeout=self._socket_timeout)
|
||||||
|
|
||||||
def print_debug_header(self):
|
def print_debug_header(self):
|
||||||
|
@ -75,6 +75,10 @@ __authors__ = (
|
|||||||
'Ole Ernst',
|
'Ole Ernst',
|
||||||
'Aaron McDaniel (mcd1992)',
|
'Aaron McDaniel (mcd1992)',
|
||||||
'Magnus Kolstad',
|
'Magnus Kolstad',
|
||||||
|
'Hari Padmanaban',
|
||||||
|
'Carlos Ramos',
|
||||||
|
'5moufl',
|
||||||
|
'lenaten',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .hls import HlsFD
|
from .hls import HlsFD
|
||||||
|
from .hls import NativeHlsFD
|
||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from .mplayer import MplayerFD
|
from .mplayer import MplayerFD
|
||||||
from .rtmp import RtmpFD
|
from .rtmp import RtmpFD
|
||||||
@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict):
|
|||||||
|
|
||||||
if url.startswith('rtmp'):
|
if url.startswith('rtmp'):
|
||||||
return RtmpFD
|
return RtmpFD
|
||||||
|
if protocol == 'm3u8_native':
|
||||||
|
return NativeHlsFD
|
||||||
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
||||||
return HlsFD
|
return HlsFD
|
||||||
if url.startswith('mms') or url.startswith('rtsp'):
|
if url.startswith('mms') or url.startswith('rtsp'):
|
||||||
|
@ -42,6 +42,7 @@ class FileDownloader(object):
|
|||||||
Subclasses of this one must re-define the real_download method.
|
Subclasses of this one must re-define the real_download method.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_TEST_FILE_SIZE = 10241
|
||||||
params = None
|
params = None
|
||||||
|
|
||||||
def __init__(self, ydl, params):
|
def __init__(self, ydl, params):
|
||||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
|||||||
format_bytes,
|
format_bytes,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -251,6 +252,8 @@ class F4mFD(FileDownloader):
|
|||||||
# We only download the first fragment
|
# We only download the first fragment
|
||||||
fragments_list = fragments_list[:1]
|
fragments_list = fragments_list[:1]
|
||||||
total_frags = len(fragments_list)
|
total_frags = len(fragments_list)
|
||||||
|
# For some akamai manifests we'll need to add a query to the fragment url
|
||||||
|
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||||
|
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||||
@ -290,6 +293,8 @@ class F4mFD(FileDownloader):
|
|||||||
for (seg_i, frag_i) in fragments_list:
|
for (seg_i, frag_i) in fragments_list:
|
||||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||||
url = base_url + name
|
url = base_url + name
|
||||||
|
if akamai_pv:
|
||||||
|
url += '?' + akamai_pv.strip(';')
|
||||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||||
success = http_dl.download(frag_filename, {'url': url})
|
success = http_dl.download(frag_filename, {'url': url})
|
||||||
if not success:
|
if not success:
|
||||||
|
@ -1,8 +1,13 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
compat_urllib_request,
|
||||||
check_executable,
|
check_executable,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
)
|
)
|
||||||
@ -43,3 +48,57 @@ class HlsFD(FileDownloader):
|
|||||||
self.to_stderr(u"\n")
|
self.to_stderr(u"\n")
|
||||||
self.report_error(u'%s exited with code %d' % (program, retval))
|
self.report_error(u'%s exited with code %d' % (program, retval))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class NativeHlsFD(FileDownloader):
|
||||||
|
""" A more limited implementation that does not require ffmpeg """
|
||||||
|
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
url = info_dict['url']
|
||||||
|
self.report_destination(filename)
|
||||||
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
|
self.to_screen(
|
||||||
|
'[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
|
||||||
|
data = self.ydl.urlopen(url).read()
|
||||||
|
s = data.decode('utf-8', 'ignore')
|
||||||
|
segment_urls = []
|
||||||
|
for line in s.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line and not line.startswith('#'):
|
||||||
|
segment_url = (
|
||||||
|
line
|
||||||
|
if re.match(r'^https?://', line)
|
||||||
|
else compat_urlparse.urljoin(url, line))
|
||||||
|
segment_urls.append(segment_url)
|
||||||
|
|
||||||
|
is_test = self.params.get('test', False)
|
||||||
|
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||||
|
byte_counter = 0
|
||||||
|
with open(tmpfilename, 'wb') as outf:
|
||||||
|
for i, segurl in enumerate(segment_urls):
|
||||||
|
self.to_screen(
|
||||||
|
'[hlsnative] %s: Downloading segment %d / %d' %
|
||||||
|
(info_dict['id'], i + 1, len(segment_urls)))
|
||||||
|
seg_req = compat_urllib_request.Request(segurl)
|
||||||
|
if remaining_bytes is not None:
|
||||||
|
seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||||
|
|
||||||
|
segment = self.ydl.urlopen(seg_req).read()
|
||||||
|
if remaining_bytes is not None:
|
||||||
|
segment = segment[:remaining_bytes]
|
||||||
|
remaining_bytes -= len(segment)
|
||||||
|
outf.write(segment)
|
||||||
|
byte_counter += len(segment)
|
||||||
|
if remaining_bytes is not None and remaining_bytes <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': byte_counter,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'finished',
|
||||||
|
})
|
||||||
|
self.try_rename(tmpfilename, filename)
|
||||||
|
return True
|
||||||
|
|
||||||
|
@ -14,8 +14,6 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class HttpFD(FileDownloader):
|
class HttpFD(FileDownloader):
|
||||||
_TEST_FILE_SIZE = 10241
|
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
@ -25,6 +25,7 @@ from .bambuser import BambuserIE, BambuserChannelIE
|
|||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
from .bbccouk import BBCCoUkIE
|
from .bbccouk import BBCCoUkIE
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
|
from .behindkink import BehindKinkIE
|
||||||
from .bilibili import BiliBiliIE
|
from .bilibili import BiliBiliIE
|
||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
@ -83,6 +84,7 @@ from .dropbox import DropboxIE
|
|||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
|
from .einthusan import EinthusanIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
from .ellentv import (
|
from .ellentv import (
|
||||||
EllenTVIE,
|
EllenTVIE,
|
||||||
@ -132,13 +134,16 @@ from .gamestar import GameStarIE
|
|||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
|
from .globo import GloboIE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
|
from .golem import GolemIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
from .googlesearch import GoogleSearchIE
|
from .googlesearch import GoogleSearchIE
|
||||||
from .gorillavid import GorillaVidIE
|
from .gorillavid import GorillaVidIE
|
||||||
from .goshgay import GoshgayIE
|
from .goshgay import GoshgayIE
|
||||||
from .grooveshark import GroovesharkIE
|
from .grooveshark import GroovesharkIE
|
||||||
from .hark import HarkIE
|
from .hark import HarkIE
|
||||||
|
from .heise import HeiseIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
@ -168,7 +173,6 @@ from .jadorecettepub import JadoreCettePubIE
|
|||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .justintv import JustinTVIE
|
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
from .kankan import KankanIE
|
from .kankan import KankanIE
|
||||||
from .keezmovies import KeezMoviesIE
|
from .keezmovies import KeezMoviesIE
|
||||||
@ -186,6 +190,7 @@ from .livestream import (
|
|||||||
LivestreamOriginalIE,
|
LivestreamOriginalIE,
|
||||||
LivestreamShortenerIE,
|
LivestreamShortenerIE,
|
||||||
)
|
)
|
||||||
|
from .lrt import LRTIE
|
||||||
from .lynda import (
|
from .lynda import (
|
||||||
LyndaIE,
|
LyndaIE,
|
||||||
LyndaCourseIE
|
LyndaCourseIE
|
||||||
@ -197,6 +202,7 @@ from .malemotion import MalemotionIE
|
|||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
|
from .mgoon import MgoonIE
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||||
from .mitele import MiTeleIE
|
from .mitele import MiTeleIE
|
||||||
@ -206,6 +212,7 @@ from .mpora import MporaIE
|
|||||||
from .moevideo import MoeVideoIE
|
from .moevideo import MoeVideoIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
from .mojvideo import MojvideoIE
|
from .mojvideo import MojvideoIE
|
||||||
|
from .moniker import MonikerIE
|
||||||
from .mooshare import MooshareIE
|
from .mooshare import MooshareIE
|
||||||
from .morningstar import MorningstarIE
|
from .morningstar import MorningstarIE
|
||||||
from .motherless import MotherlessIE
|
from .motherless import MotherlessIE
|
||||||
@ -218,6 +225,7 @@ from .mtv import (
|
|||||||
MTVServicesEmbeddedIE,
|
MTVServicesEmbeddedIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
)
|
)
|
||||||
|
from .muenchentv import MuenchenTVIE
|
||||||
from .musicplayon import MusicPlayOnIE
|
from .musicplayon import MusicPlayOnIE
|
||||||
from .musicvault import MusicVaultIE
|
from .musicvault import MusicVaultIE
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
@ -235,6 +243,7 @@ from .ndtv import NDTVIE
|
|||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
|
from .nfl import NFLIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
@ -244,7 +253,10 @@ from .nosvideo import NosVideoIE
|
|||||||
from .novamov import NovaMovIE
|
from .novamov import NovaMovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .npo import NPOIE
|
from .npo import (
|
||||||
|
NPOIE,
|
||||||
|
TegenlichtVproIE,
|
||||||
|
)
|
||||||
from .nrk import (
|
from .nrk import (
|
||||||
NRKIE,
|
NRKIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
@ -252,6 +264,7 @@ from .nrk import (
|
|||||||
from .ntv import NTVIE
|
from .ntv import NTVIE
|
||||||
from .nytimes import NYTimesIE
|
from .nytimes import NYTimesIE
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
@ -262,6 +275,8 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
|||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .planetaplay import PlanetaPlayIE
|
||||||
|
from .played import PlayedIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
@ -300,6 +315,7 @@ from .sbs import SBSIE
|
|||||||
from .scivee import SciVeeIE
|
from .scivee import SciVeeIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
|
from .sexykarma import SexyKarmaIE
|
||||||
from .shared import SharedIE
|
from .shared import SharedIE
|
||||||
from .sharesix import ShareSixIE
|
from .sharesix import ShareSixIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
@ -330,6 +346,8 @@ from .spankwire import SpankwireIE
|
|||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
from .spiegeltv import SpiegeltvIE
|
||||||
from .spike import SpikeIE
|
from .spike import SpikeIE
|
||||||
|
from .sport5 import Sport5IE
|
||||||
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
@ -340,6 +358,7 @@ from .swrmediathek import SWRMediathekIE
|
|||||||
from .syfy import SyfyIE
|
from .syfy import SyfyIE
|
||||||
from .sztvhu import SztvHuIE
|
from .sztvhu import SztvHuIE
|
||||||
from .tagesschau import TagesschauIE
|
from .tagesschau import TagesschauIE
|
||||||
|
from .tapely import TapelyIE
|
||||||
from .teachertube import (
|
from .teachertube import (
|
||||||
TeacherTubeIE,
|
TeacherTubeIE,
|
||||||
TeacherTubeUserIE,
|
TeacherTubeUserIE,
|
||||||
@ -348,15 +367,22 @@ from .teachingchannel import TeachingChannelIE
|
|||||||
from .teamcoco import TeamcocoIE
|
from .teamcoco import TeamcocoIE
|
||||||
from .techtalks import TechTalksIE
|
from .techtalks import TechTalksIE
|
||||||
from .ted import TEDIE
|
from .ted import TEDIE
|
||||||
|
from .telecinco import TelecincoIE
|
||||||
from .telemb import TeleMBIE
|
from .telemb import TeleMBIE
|
||||||
from .tenplay import TenPlayIE
|
from .tenplay import TenPlayIE
|
||||||
from .testurl import TestURLIE
|
from .testurl import TestURLIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
|
from .theonion import TheOnionIE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
|
from .thesixtyone import TheSixtyOneIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .tlc import TlcIE, TlcDeIE
|
from .tlc import TlcIE, TlcDeIE
|
||||||
from .tnaflix import TNAFlixIE
|
from .tnaflix import TNAFlixIE
|
||||||
|
from .thvideo import (
|
||||||
|
THVideoIE,
|
||||||
|
THVideoPlaylistIE
|
||||||
|
)
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .toypics import ToypicsUserIE, ToypicsIE
|
from .toypics import ToypicsUserIE, ToypicsIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
@ -365,10 +391,12 @@ from .trutube import TruTubeIE
|
|||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tudou import TudouIE
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
|
from .turbo import TurboIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE
|
from .tvp import TvpIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
|
from .twitch import TwitchIE
|
||||||
from .ubu import UbuIE
|
from .ubu import UbuIE
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
@ -389,17 +417,19 @@ from .videobam import VideoBamIE
|
|||||||
from .videodetective import VideoDetectiveIE
|
from .videodetective import VideoDetectiveIE
|
||||||
from .videolecturesnet import VideoLecturesNetIE
|
from .videolecturesnet import VideoLecturesNetIE
|
||||||
from .videofyme import VideofyMeIE
|
from .videofyme import VideofyMeIE
|
||||||
|
from .videomega import VideoMegaIE
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
from .videott import VideoTtIE
|
from .videott import VideoTtIE
|
||||||
from .videoweed import VideoWeedIE
|
from .videoweed import VideoWeedIE
|
||||||
from .vidme import VidmeIE
|
from .vidme import VidmeIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
VimeoChannelIE,
|
|
||||||
VimeoUserIE,
|
|
||||||
VimeoAlbumIE,
|
VimeoAlbumIE,
|
||||||
|
VimeoChannelIE,
|
||||||
VimeoGroupsIE,
|
VimeoGroupsIE,
|
||||||
|
VimeoLikesIE,
|
||||||
VimeoReviewIE,
|
VimeoReviewIE,
|
||||||
|
VimeoUserIE,
|
||||||
VimeoWatchLaterIE,
|
VimeoWatchLaterIE,
|
||||||
)
|
)
|
||||||
from .vimple import VimpleIE
|
from .vimple import VimpleIE
|
||||||
@ -414,6 +444,7 @@ from .vporn import VpornIE
|
|||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
from .vuclip import VuClipIE
|
from .vuclip import VuClipIE
|
||||||
from .vulture import VultureIE
|
from .vulture import VultureIE
|
||||||
|
from .walla import WallaIE
|
||||||
from .washingtonpost import WashingtonPostIE
|
from .washingtonpost import WashingtonPostIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .wayofthemaster import WayOfTheMasterIE
|
from .wayofthemaster import WayOfTheMasterIE
|
||||||
@ -435,12 +466,13 @@ from .xvideos import XVideosIE
|
|||||||
from .xtube import XTubeUserIE, XTubeIE
|
from .xtube import XTubeUserIE, XTubeIE
|
||||||
from .yahoo import (
|
from .yahoo import (
|
||||||
YahooIE,
|
YahooIE,
|
||||||
YahooNewsIE,
|
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
)
|
)
|
||||||
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
from .youku import YoukuIE
|
from .youku import YoukuIE
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
|
from .yourupload import YourUploadIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
|
@ -22,8 +22,7 @@ class ABCIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
urls_info_json = self._search_regex(
|
urls_info_json = self._search_regex(
|
||||||
|
@ -35,7 +35,7 @@ class AnySexIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False)
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
|
r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
@ -43,7 +43,7 @@ class AnySexIE(InfoExtractor):
|
|||||||
r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
|
r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
|
||||||
|
|
||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._search_regex(
|
||||||
r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False))
|
r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False))
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
|
r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
@ -8,8 +8,6 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
qualities,
|
qualities,
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
compat_urllib_parse,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -51,9 +49,6 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
video_id = m.group('video_id')
|
video_id = m.group('video_id')
|
||||||
|
|
||||||
urlp = compat_urllib_parse_urlparse(url)
|
|
||||||
url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||||
|
@ -10,8 +10,8 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
compat_str,
|
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
# There are different sources of video in arte.tv, the extraction process
|
||||||
@ -86,15 +86,28 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
info = self._download_json(json_url, video_id)
|
info = self._download_json(json_url, video_id)
|
||||||
player_info = info['videoJsonPlayer']
|
player_info = info['videoJsonPlayer']
|
||||||
|
|
||||||
|
upload_date_str = player_info.get('shootingDate')
|
||||||
|
if not upload_date_str:
|
||||||
|
upload_date_str = player_info.get('VDA', '').split(' ')[0]
|
||||||
|
|
||||||
|
title = player_info['VTI'].strip()
|
||||||
|
subtitle = player_info.get('VSU', '').strip()
|
||||||
|
if subtitle:
|
||||||
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': player_info['VID'],
|
'id': player_info['VID'],
|
||||||
'title': player_info['VTI'],
|
'title': title,
|
||||||
'description': player_info.get('VDE'),
|
'description': player_info.get('VDE'),
|
||||||
'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]),
|
'upload_date': unified_strdate(upload_date_str),
|
||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
}
|
}
|
||||||
|
|
||||||
all_formats = player_info['VSR'].values()
|
all_formats = []
|
||||||
|
for format_id, format_dict in player_info['VSR'].items():
|
||||||
|
fmt = dict(format_dict)
|
||||||
|
fmt['format_id'] = format_id
|
||||||
|
all_formats.append(fmt)
|
||||||
# Some formats use the m3u8 protocol
|
# Some formats use the m3u8 protocol
|
||||||
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
|
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
|
||||||
def _match_lang(f):
|
def _match_lang(f):
|
||||||
@ -145,22 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
)
|
)
|
||||||
formats = sorted(formats, key=sort_key)
|
formats = sorted(formats, key=sort_key)
|
||||||
def _format(format_info):
|
def _format(format_info):
|
||||||
quality = ''
|
|
||||||
height = format_info.get('height')
|
|
||||||
if height is not None:
|
|
||||||
quality = compat_str(height)
|
|
||||||
bitrate = format_info.get('bitrate')
|
|
||||||
if bitrate is not None:
|
|
||||||
quality += '-%d' % bitrate
|
|
||||||
if format_info.get('versionCode') is not None:
|
|
||||||
format_id = '%s-%s' % (quality, format_info['versionCode'])
|
|
||||||
else:
|
|
||||||
format_id = quality
|
|
||||||
info = {
|
info = {
|
||||||
'format_id': format_id,
|
'format_id': format_info['format_id'],
|
||||||
'format_note': format_info.get('versionLibelle'),
|
'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
|
||||||
'width': format_info.get('width'),
|
'width': int_or_none(format_info.get('width')),
|
||||||
'height': height,
|
'height': int_or_none(format_info.get('height')),
|
||||||
|
'tbr': int_or_none(format_info.get('bitrate')),
|
||||||
}
|
}
|
||||||
if format_info['mediaType'] == 'rtmp':
|
if format_info['mediaType'] == 'rtmp':
|
||||||
info['url'] = format_info['streamer']
|
info['url'] = format_info['streamer']
|
||||||
|
@ -15,13 +15,23 @@ class BandcampIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
'file': '1812978515.mp3',
|
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
'id': '1812978515',
|
||||||
"duration": 9.8485,
|
'ext': 'mp3',
|
||||||
|
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
||||||
|
'duration': 9.8485,
|
||||||
},
|
},
|
||||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||||
|
}, {
|
||||||
|
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||||
|
'md5': '2b68e5851514c20efdff2afc5603b8b4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2650410135',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Lanius (Battle)',
|
||||||
|
'uploader': 'Ben Prunty Music',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -59,9 +69,9 @@ class BandcampIE(InfoExtractor):
|
|||||||
raise ExtractorError('No free songs found')
|
raise ExtractorError('No free songs found')
|
||||||
|
|
||||||
download_link = m_download.group(1)
|
download_link = m_download.group(1)
|
||||||
video_id = re.search(
|
video_id = self._search_regex(
|
||||||
r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
|
r'var TralbumData = {.*?id: (?P<id>\d+),?$',
|
||||||
webpage, re.MULTILINE | re.DOTALL).group('id')
|
webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
|
||||||
|
|
||||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||||
# We get the dictionary of the track from some javascript code
|
# We get the dictionary of the track from some javascript code
|
||||||
|
53
youtube_dl/extractor/behindkink.py
Normal file
53
youtube_dl/extractor/behindkink.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import url_basename
|
||||||
|
|
||||||
|
|
||||||
|
class BehindKinkIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
|
||||||
|
'md5': '41ad01222b8442089a55528fec43ec01',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36370',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
|
||||||
|
'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
|
||||||
|
'upload_date': '20140814',
|
||||||
|
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
year = mobj.group('year')
|
||||||
|
month = mobj.group('month')
|
||||||
|
day = mobj.group('day')
|
||||||
|
upload_date = year + month + day
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r"'file':\s*'([^']+)'",
|
||||||
|
webpage, 'URL base')
|
||||||
|
|
||||||
|
video_id = url_basename(video_url)
|
||||||
|
video_id = video_id.split('_')[0]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'display_id': display_id,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@ -26,6 +26,8 @@ class BRIE(InfoExtractor):
|
|||||||
'title': 'Wenn das Traditions-Theater wackelt',
|
'title': 'Wenn das Traditions-Theater wackelt',
|
||||||
'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
|
'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
|
||||||
'duration': 34,
|
'duration': 34,
|
||||||
|
'uploader': 'BR',
|
||||||
|
'upload_date': '20140802',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -66,8 +68,7 @@ class BRIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
display_id = mobj.group('id')
|
|
||||||
page = self._download_webpage(url, display_id)
|
page = self._download_webpage(url, display_id)
|
||||||
xml_url = self._search_regex(
|
xml_url = self._search_regex(
|
||||||
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
|
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
|
||||||
|
@ -4,37 +4,61 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_age_limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BreakIE(InfoExtractor):
|
class BreakIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?break\.com/video/([^/]+)'
|
_VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||||
'md5': 'a3513fb1547fba4fb6cfac1bffc6c46b',
|
'md5': '33aa4ff477ecd124d18d7b5d23b87ce5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2468056',
|
'id': '2468056',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'When Girls Act Like D-Bags',
|
'title': 'When Girls Act Like D-Bags',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1).split("-")[-1]
|
webpage = self._download_webpage(
|
||||||
embed_url = 'http://www.break.com/embed/%s' % video_id
|
'http://www.break.com/embed/%s' % video_id, video_id)
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
info = json.loads(self._search_regex(
|
||||||
info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
|
r'var embedVars = ({.*})\s*?</script>',
|
||||||
webpage, 'info json', flags=re.DOTALL)
|
webpage, 'info json', flags=re.DOTALL))
|
||||||
info = json.loads(info_json)
|
|
||||||
video_url = info['videoUri']
|
|
||||||
youtube_id = info.get('youtubeId')
|
youtube_id = info.get('youtubeId')
|
||||||
if youtube_id:
|
if youtube_id:
|
||||||
return self.url_result(youtube_id, 'Youtube')
|
return self.url_result(youtube_id, 'Youtube')
|
||||||
|
|
||||||
final_url = video_url + '?' + info['AuthToken']
|
formats = [{
|
||||||
|
'url': media['uri'] + '?' + info['AuthToken'],
|
||||||
|
'tbr': media['bitRate'],
|
||||||
|
'width': media['width'],
|
||||||
|
'height': media['height'],
|
||||||
|
} for media in info['media']]
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
formats.append({
|
||||||
|
'url': info['videoUri']
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = int_or_none(info.get('videoLengthInSeconds'))
|
||||||
|
age_limit = parse_age_limit(info.get('audienceRating'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': final_url,
|
|
||||||
'title': info['contentName'],
|
'title': info['contentName'],
|
||||||
'thumbnail': info['thumbUri'],
|
'thumbnail': info['thumbUri'],
|
||||||
|
'duration': duration,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -87,6 +87,15 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# playlist test
|
||||||
|
# from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
|
||||||
|
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Sealife',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 7,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
raise ExtractorError('Can\'t extract embed url and video id')
|
raise ExtractorError('Can\'t extract embed url and video id')
|
||||||
playerdata_url = mobj.group('embed_url')
|
playerdata_url = mobj.group('embed_url')
|
||||||
@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
video_description = self._html_search_regex(
|
video_description = self._html_search_regex(
|
||||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||||
|
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
|
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
|
||||||
video_thumbnail = self._search_regex(
|
|
||||||
r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
|
|
||||||
sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
|
||||||
videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
|
|
||||||
|
|
||||||
|
vidurl = self._search_regex(
|
||||||
|
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
|
||||||
|
vidid = self._search_regex(
|
||||||
|
r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid')
|
||||||
|
videoserver = self._html_search_regex(
|
||||||
|
r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver')
|
||||||
|
|
||||||
|
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
|
||||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
baseurl = sd_url[:sd_url.rfind('/')+1]
|
baseurl = vidurl[:vidurl.rfind('/')+1]
|
||||||
for video in videolist.findall('.//video'):
|
for video in videolist.findall('.//video'):
|
||||||
src = video.get('src')
|
src = video.get('src')
|
||||||
if not src:
|
if not src:
|
||||||
|
@ -35,7 +35,6 @@ class CliphunterIE(InfoExtractor):
|
|||||||
'title': 'Fun Jynx Maze solo',
|
'title': 'Fun Jynx Maze solo',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'duration': 1317,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,14 +85,11 @@ class CliphunterIE(InfoExtractor):
|
|||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r"var\s+mov_thumb\s*=\s*'([^']+)';",
|
r"var\s+mov_thumb\s*=\s*'([^']+)';",
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
duration = int_or_none(self._search_regex(
|
|
||||||
r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': duration,
|
|
||||||
'age_limit': self._rta_search(webpage),
|
'age_limit': self._rta_search(webpage),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,8 @@ from ..utils import (
|
|||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
HEADRequest,
|
||||||
|
compat_HTTPError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -21,6 +23,7 @@ class CloudyIE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
||||||
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
||||||
|
_MAX_TRIES = 2
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||||
@ -42,24 +45,30 @@ class CloudyIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_host = mobj.group('host')
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
url = self._EMBED_URL % (video_host, video_id)
|
if try_num > self._MAX_TRIES - 1:
|
||||||
webpage = self._download_webpage(url, video_id)
|
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||||
|
|
||||||
file_key = self._search_regex(
|
form = {
|
||||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
|
||||||
data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode({
|
|
||||||
'file': video_id,
|
'file': video_id,
|
||||||
'key': file_key,
|
'key': file_key,
|
||||||
}))
|
}
|
||||||
|
|
||||||
|
if error_url:
|
||||||
|
form.update({
|
||||||
|
'numOfErrors': try_num,
|
||||||
|
'errorCode': '404',
|
||||||
|
'errorUrl': error_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))
|
||||||
player_data = self._download_webpage(
|
player_data = self._download_webpage(
|
||||||
data_url, video_id, 'Downloading player data')
|
data_url, video_id, 'Downloading player data')
|
||||||
data = compat_parse_qs(player_data)
|
data = compat_parse_qs(player_data)
|
||||||
|
|
||||||
|
try_num += 1
|
||||||
|
|
||||||
if 'error' in data:
|
if 'error' in data:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
|
'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
|
||||||
@ -69,16 +78,31 @@ class CloudyIE(InfoExtractor):
|
|||||||
if title:
|
if title:
|
||||||
title = remove_end(title, '&asdasdas').strip()
|
title = remove_end(title, '&asdasdas').strip()
|
||||||
|
|
||||||
formats = []
|
|
||||||
video_url = data.get('url', [None])[0]
|
video_url = data.get('url', [None])[0]
|
||||||
|
|
||||||
if video_url:
|
if video_url:
|
||||||
formats.append({
|
try:
|
||||||
'format_id': 'sd',
|
self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
|
||||||
'url': video_url,
|
except ExtractorError as e:
|
||||||
})
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||||
|
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||||
|
return self._extract_video(video_host, video_id, file_key, video_url, try_num)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_host = mobj.group('host')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
url = self._EMBED_URL % (video_host, video_id)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
file_key = self._search_regex(
|
||||||
|
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
||||||
|
|
||||||
|
return self._extract_video(video_host, video_id, file_key)
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import netrc
|
import netrc
|
||||||
@ -15,11 +16,13 @@ from ..utils import (
|
|||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urlparse,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
|
||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@ -130,9 +133,13 @@ class InfoExtractor(object):
|
|||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
categories: A list of categories that the video falls in, for example
|
categories: A list of categories that the video falls in, for example
|
||||||
["Sports", "Berlin"]
|
["Sports", "Berlin"]
|
||||||
|
is_live: True, False, or None (=unknown). Whether this video is a
|
||||||
|
live stream that goes on instead of a fixed-length video.
|
||||||
|
|
||||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||||
|
|
||||||
|
Unless mentioned otherwise, None is equivalent to absence of information.
|
||||||
|
|
||||||
Subclasses of this one should re-define the _real_initialize() and
|
Subclasses of this one should re-define the _real_initialize() and
|
||||||
_real_extract() methods and define a _VALID_URL regexp.
|
_real_extract() methods and define a _VALID_URL regexp.
|
||||||
Probably, they should also be added to the list of extractors.
|
Probably, they should also be added to the list of extractors.
|
||||||
@ -161,6 +168,14 @@ class InfoExtractor(object):
|
|||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
return cls._VALID_URL_RE.match(url) is not None
|
return cls._VALID_URL_RE.match(url) is not None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _match_id(cls, url):
|
||||||
|
if '_VALID_URL_RE' not in cls.__dict__:
|
||||||
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
|
m = cls._VALID_URL_RE.match(url)
|
||||||
|
assert m
|
||||||
|
return m.group('id')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def working(cls):
|
def working(cls):
|
||||||
"""Getter method for _WORKING."""
|
"""Getter method for _WORKING."""
|
||||||
@ -266,6 +281,12 @@ class InfoExtractor(object):
|
|||||||
raw_filename = basen + '.dump'
|
raw_filename = basen + '.dump'
|
||||||
filename = sanitize_filename(raw_filename, restricted=True)
|
filename = sanitize_filename(raw_filename, restricted=True)
|
||||||
self.to_screen('Saving request to ' + filename)
|
self.to_screen('Saving request to ' + filename)
|
||||||
|
# Working around MAX_PATH limitation on Windows (see
|
||||||
|
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||||
|
if os.name == 'nt':
|
||||||
|
absfilepath = os.path.abspath(filename)
|
||||||
|
if len(absfilepath) > 259:
|
||||||
|
filename = '\\\\?\\' + absfilepath
|
||||||
with open(filename, 'wb') as outf:
|
with open(filename, 'wb') as outf:
|
||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
|
|
||||||
@ -321,7 +342,11 @@ class InfoExtractor(object):
|
|||||||
try:
|
try:
|
||||||
return json.loads(json_string)
|
return json.loads(json_string)
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
raise ExtractorError('Failed to download JSON', cause=ve)
|
errmsg = '%s: Failed to parse JSON ' % video_id
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(errmsg, cause=ve)
|
||||||
|
else:
|
||||||
|
self.report_warning(errmsg + str(ve))
|
||||||
|
|
||||||
def report_warning(self, msg, video_id=None):
|
def report_warning(self, msg, video_id=None):
|
||||||
idstr = '' if video_id is None else '%s: ' % video_id
|
idstr = '' if video_id is None else '%s: ' % video_id
|
||||||
@ -638,7 +663,9 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
|
entry_protocol='m3u8', preference=None):
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'm3u8-meta',
|
'format_id': 'm3u8-meta',
|
||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
@ -649,6 +676,11 @@ class InfoExtractor(object):
|
|||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
format_url = lambda u: (
|
||||||
|
u
|
||||||
|
if re.match(r'^https?://', u)
|
||||||
|
else compat_urlparse.urljoin(m3u8_url, u))
|
||||||
|
|
||||||
m3u8_doc = self._download_webpage(m3u8_url, video_id)
|
m3u8_doc = self._download_webpage(m3u8_url, video_id)
|
||||||
last_info = None
|
last_info = None
|
||||||
kv_rex = re.compile(
|
kv_rex = re.compile(
|
||||||
@ -665,15 +697,17 @@ class InfoExtractor(object):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if last_info is None:
|
if last_info is None:
|
||||||
formats.append({'url': line})
|
formats.append({'url': format_url(line)})
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||||
|
|
||||||
f = {
|
f = {
|
||||||
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
||||||
'url': line.strip(),
|
'url': format_url(line.strip()),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
|
'protocol': entry_protocol,
|
||||||
|
'preference': preference,
|
||||||
}
|
}
|
||||||
codecs = last_info.get('CODECS')
|
codecs = last_info.get('CODECS')
|
||||||
if codecs:
|
if codecs:
|
||||||
@ -693,6 +727,34 @@ class InfoExtractor(object):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _live_title(self, name):
|
||||||
|
""" Generate the title for a live video """
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
now_str = now.strftime("%Y-%m-%d %H:%M")
|
||||||
|
return name + ' ' + now_str
|
||||||
|
|
||||||
|
def _int(self, v, name, fatal=False, **kwargs):
|
||||||
|
res = int_or_none(v, **kwargs)
|
||||||
|
if 'get_attr' in kwargs:
|
||||||
|
print(getattr(v, kwargs['get_attr']))
|
||||||
|
if res is None:
|
||||||
|
msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(msg)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning(msg)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _float(self, v, name, fatal=False, **kwargs):
|
||||||
|
res = float_or_none(v, **kwargs)
|
||||||
|
if res is None:
|
||||||
|
msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(msg)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning(msg)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@ -34,6 +34,8 @@ class CondeNastIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||||
|
|
||||||
|
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys())
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||||
'md5': '1921f713ed48aabd715691f774c451f7',
|
'md5': '1921f713ed48aabd715691f774c451f7',
|
||||||
|
@ -9,7 +9,7 @@ import xml.etree.ElementTree
|
|||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from math import pow, sqrt, floor
|
from math import pow, sqrt, floor
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@ -26,7 +26,7 @@ from ..aes import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(InfoExtractor):
|
class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
@ -271,6 +271,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
else:
|
else:
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
|
@ -82,11 +82,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract id and simplified title from URL
|
video_id = self._match_id(url)
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
url = 'http://www.dailymotion.com/video/%s' % video_id
|
url = 'http://www.dailymotion.com/video/%s' % video_id
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
@ -147,17 +143,22 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
self._list_available_subtitles(video_id, webpage)
|
self._list_available_subtitles(video_id, webpage)
|
||||||
return
|
return
|
||||||
|
|
||||||
view_count = self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, 'view count', fatal=False)
|
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
||||||
if view_count is not None:
|
webpage, 'view count', fatal=False))
|
||||||
view_count = str_to_int(view_count)
|
|
||||||
|
title = self._og_search_title(webpage, default=None)
|
||||||
|
if title is None:
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
|
||||||
|
'title')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': info['owner.screenname'],
|
'uploader': info['owner.screenname'],
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': self._og_search_title(webpage),
|
'title': title,
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'thumbnail': info['thumbnail_url'],
|
'thumbnail': info['thumbnail_url'],
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
@ -11,10 +11,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class DaumIE(InfoExtractor):
|
class DaumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P<id>[^?#&]+)'
|
||||||
IE_NAME = 'daum.net'
|
IE_NAME = 'daum.net'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '52554690',
|
'id': '52554690',
|
||||||
@ -24,11 +24,17 @@ class DaumIE(InfoExtractor):
|
|||||||
'upload_date': '20130831',
|
'upload_date': '20130831',
|
||||||
'duration': 3868,
|
'duration': 3868,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||||
webpage = self._download_webpage(canonical_url, video_id)
|
webpage = self._download_webpage(canonical_url, video_id)
|
||||||
full_id = self._search_regex(
|
full_id = self._search_regex(
|
||||||
@ -42,7 +48,6 @@ class DaumIE(InfoExtractor):
|
|||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||||
video_id, 'Downloading video formats info')
|
video_id, 'Downloading video formats info')
|
||||||
|
|
||||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_el in urls.findall('result/output_list/output_list'):
|
for format_el in urls.findall('result/output_list/output_list'):
|
||||||
profile = format_el.attrib['profile']
|
profile = format_el.attrib['profile']
|
||||||
@ -52,7 +57,7 @@ class DaumIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
url_doc = self._download_xml(
|
url_doc = self._download_xml(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||||
video_id, note=False)
|
video_id, note='Downloading video data for %s format' % profile)
|
||||||
format_url = url_doc.find('result/url').text
|
format_url = url_doc.find('result/url').text
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
|
@ -7,7 +7,7 @@ class DivxStageIE(NovaMovIE):
|
|||||||
IE_NAME = 'divxstage'
|
IE_NAME = 'divxstage'
|
||||||
IE_DESC = 'DivxStage'
|
IE_DESC = 'DivxStage'
|
||||||
|
|
||||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
|
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'}
|
||||||
|
|
||||||
_HOST = 'www.divxstage.eu'
|
_HOST = 'www.divxstage.eu'
|
||||||
|
|
||||||
|
@ -5,28 +5,32 @@ import os.path
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import compat_urllib_parse_unquote
|
from ..utils import compat_urllib_parse_unquote, url_basename
|
||||||
|
|
||||||
|
|
||||||
class DropboxIE(InfoExtractor):
|
class DropboxIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
|
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nelirfsxnmcfbfh',
|
'id': 'nelirfsxnmcfbfh',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
fn = compat_urllib_parse_unquote(mobj.group('title'))
|
fn = compat_urllib_parse_unquote(url_basename(url))
|
||||||
title = os.path.splitext(fn)[0]
|
title = os.path.splitext(fn)[0]
|
||||||
video_url = (
|
video_url = re.sub(r'[?&]dl=0', '', url)
|
||||||
re.sub(r'[?&]dl=0', '', url) +
|
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
|
||||||
('?' if '?' in url else '&') + 'dl=1')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -19,7 +19,7 @@ class DrTuberIE(InfoExtractor):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'categories': list, # NSFW
|
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
|
||||||
'thumbnail': 're:https?://.*\.jpg$',
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
@ -52,9 +52,9 @@ class DrTuberIE(InfoExtractor):
|
|||||||
r'<span class="comments_count">([\d,\.]+)</span>',
|
r'<span class="comments_count">([\d,\.]+)</span>',
|
||||||
webpage, 'comment count', fatal=False))
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
cats_str = self._html_search_regex(
|
cats_str = self._search_regex(
|
||||||
r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
|
r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False)
|
||||||
categories = None if cats_str is None else cats_str.split(' ')
|
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
61
youtube_dl/extractor/einthusan.py
Normal file
61
youtube_dl/extractor/einthusan.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class EinthusanIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||||
|
'md5': 'af244f4458cd667205e513d75da5b8b1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2447',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ek Villain',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||||
|
'md5': 'ef63c7a803e22315880ed182c10d1c5c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1671',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Soodhu Kavvuum',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(
|
||||||
|
r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''',
|
||||||
|
webpage, 'video url')
|
||||||
|
|
||||||
|
description = self._html_search_meta('description', webpage)
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
|
||||||
|
webpage, "thumbnail url", fatal=False)
|
||||||
|
if thumbnail is not None:
|
||||||
|
thumbnail = thumbnail.replace('..', 'http://www.einthusan.com')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
}
|
@ -1,4 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -7,20 +9,20 @@ from ..utils import ExtractorError
|
|||||||
|
|
||||||
|
|
||||||
class EitbIE(InfoExtractor):
|
class EitbIE(InfoExtractor):
|
||||||
IE_NAME = u'eitb.tv'
|
IE_NAME = 'eitb.tv'
|
||||||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'add_ie': ['Brightcove'],
|
'add_ie': ['Brightcove'],
|
||||||
u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||||
u'md5': u'edf4436247185adee3ea18ce64c47998',
|
'md5': 'edf4436247185adee3ea18ce64c47998',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'2743577154001',
|
'id': '2743577154001',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'60 minutos (Lasa y Zabala, 30 años)',
|
'title': '60 minutos (Lasa y Zabala, 30 años)',
|
||||||
# All videos from eitb has this description in the brightcove info
|
# All videos from eitb has this description in the brightcove info
|
||||||
u'description': u'.',
|
'description': '.',
|
||||||
u'uploader': u'Euskal Telebista',
|
'uploader': 'Euskal Telebista',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30,7 +32,7 @@ class EitbIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, chapter_id)
|
webpage = self._download_webpage(url, chapter_id)
|
||||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||||
if bc_url is None:
|
if bc_url is None:
|
||||||
raise ExtractorError(u'Could not extract the Brightcove url')
|
raise ExtractorError('Could not extract the Brightcove url')
|
||||||
# The BrightcoveExperience object doesn't contain the video id, we set
|
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||||
# it manually
|
# it manually
|
||||||
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
||||||
|
@ -14,11 +14,11 @@ class EpornerIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||||
'md5': '3b427ae4b9d60619106de3185c2987cd',
|
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '95008',
|
'id': '95008',
|
||||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||||
'duration': 194,
|
'duration': 194,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
@ -7,6 +7,7 @@ from ..utils import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -20,6 +21,7 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||||
'uploader': 'unknown',
|
'uploader': 'unknown',
|
||||||
|
'view_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
@ -39,8 +41,12 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
|
r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
|
||||||
fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
view_count = str_to_int(self._html_search_regex(
|
||||||
|
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||||
r'video_url=(.+?)&', webpage, 'video_url'))
|
r'video_url=(.+?)&', webpage, 'video_url'))
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
@ -51,6 +57,7 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
'view_count': view_count,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format': format,
|
'format': format,
|
||||||
'format_id': format,
|
'format_id': format,
|
||||||
|
@ -12,8 +12,8 @@ from ..utils import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
limit_length,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -35,7 +35,15 @@ class FacebookIE(InfoExtractor):
|
|||||||
'id': '637842556329505',
|
'id': '637842556329505',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 38,
|
'duration': 38,
|
||||||
'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...',
|
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Video without discernible title',
|
||||||
|
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '274175099429670',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Facebook video #274175099429670',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||||
@ -131,8 +139,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||||
webpage, 'alternative title', default=None)
|
webpage, 'alternative title', default=None)
|
||||||
if len(video_title) > 80 + 3:
|
video_title = limit_length(video_title, 80)
|
||||||
video_title = video_title[:80] + '...'
|
|
||||||
if not video_title:
|
if not video_title:
|
||||||
video_title = 'Facebook video #%s' % video_id
|
video_title = 'Facebook video #%s' % video_id
|
||||||
|
|
||||||
|
@ -10,13 +10,13 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class FlickrIE(InfoExtractor):
|
class FlickrIE(InfoExtractor):
|
||||||
"""Information Extractor for Flickr videos"""
|
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
|
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
|
||||||
'file': '5645318632.mp4',
|
|
||||||
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
|
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '5645318632',
|
||||||
|
'ext': 'mp4',
|
||||||
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
||||||
"uploader_id": "forestwander-nature-pictures",
|
"uploader_id": "forestwander-nature-pictures",
|
||||||
"title": "Dark Hollow Waterfalls"
|
"title": "Dark Hollow Waterfalls"
|
||||||
@ -49,7 +49,7 @@ class FlickrIE(InfoExtractor):
|
|||||||
raise ExtractorError('Unable to extract video url')
|
raise ExtractorError('Unable to extract video url')
|
||||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -57,4 +57,4 @@ class FlickrIE(InfoExtractor):
|
|||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
}]
|
}
|
||||||
|
@ -4,16 +4,21 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class FranceInterIE(InfoExtractor):
|
class FranceInterIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})'
|
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
|
'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
|
||||||
'file': '793962.mp3',
|
|
||||||
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
"title": "L’Histoire dans les jeux vidéo",
|
'id': '793962',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'L’Histoire dans les jeux vidéo',
|
||||||
|
'description': 'md5:7e93ddb4451e7530022792240a3049c7',
|
||||||
|
'timestamp': 1387369800,
|
||||||
|
'upload_date': '20131218',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,17 +27,26 @@ class FranceInterIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title')
|
|
||||||
path = self._search_regex(
|
path = self._search_regex(
|
||||||
r'&urlAOD=(.*?)&startTime', webpage, 'video url')
|
r'<a id="player".+?href="([^"]+)"', webpage, 'video url')
|
||||||
video_url = 'http://www.franceinter.fr/' + path
|
video_url = 'http://www.franceinter.fr/' + path
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<span class="title">(.+?)</span>', webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<span class="description">(.*?)</span>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
timestamp = int_or_none(self._search_regex(
|
||||||
|
r'data-date="(\d+)"', webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}],
|
}],
|
||||||
'title': title,
|
|
||||||
}
|
}
|
||||||
|
@ -8,45 +8,68 @@ import json
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
parse_duration,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id, catalogue):
|
||||||
info = self._download_xml(
|
info = self._download_json(
|
||||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
|
||||||
'getInfosOeuvre.php?id-diffusion='
|
% (video_id, catalogue),
|
||||||
+ video_id, video_id, 'Downloading XML config')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
manifest_url = info.find('videos/video/url').text
|
if info.get('status') == 'NOK':
|
||||||
manifest_url = manifest_url.replace('/z/', '/i/')
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
|
||||||
|
|
||||||
if manifest_url.startswith('rtmp'):
|
|
||||||
formats = [{'url': manifest_url, 'ext': 'flv'}]
|
|
||||||
else:
|
|
||||||
formats = []
|
formats = []
|
||||||
available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
|
for video in info['videos']:
|
||||||
for index, format_descr in enumerate(available_formats.split(',')):
|
if video['statut'] != 'ONLINE':
|
||||||
format_info = {
|
continue
|
||||||
'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
|
video_url = video['url']
|
||||||
'ext': 'mp4',
|
if not video_url:
|
||||||
}
|
continue
|
||||||
m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
|
format_id = video['format']
|
||||||
if m_resolution is not None:
|
if video_url.endswith('.f4m'):
|
||||||
format_info.update({
|
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
||||||
'width': int(m_resolution.group('width')),
|
f4m_url = self._download_webpage(
|
||||||
'height': int(m_resolution.group('height')),
|
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||||
|
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||||
|
if f4m_url:
|
||||||
|
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||||
|
for f4m_format in f4m_formats:
|
||||||
|
f4m_format['preference'] = 1
|
||||||
|
formats.extend(f4m_formats)
|
||||||
|
elif video_url.endswith('.m3u8'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
|
||||||
|
elif video_url.startswith('rtmp'):
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': 'rtmp-%s' % format_id,
|
||||||
|
'ext': 'flv',
|
||||||
|
'preference': 1,
|
||||||
})
|
})
|
||||||
formats.append(format_info)
|
else:
|
||||||
|
formats.append({
|
||||||
thumbnail_path = info.find('image').text
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': -1,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info.find('titre').text,
|
'title': info['titre'],
|
||||||
|
'description': clean_html(info['synopsis']),
|
||||||
|
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||||
|
'duration': parse_duration(info['duree']),
|
||||||
|
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
|
||||||
'description': info.find('synopsis').text,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -61,7 +84,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
|||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'data-diffusion="(\d+)"', webpage, 'ID')
|
r'data-diffusion="(\d+)"', webpage, 'ID')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id, 'Pluzz')
|
||||||
|
|
||||||
|
|
||||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||||
@ -70,13 +93,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||||
|
'md5': '9cecf35f99c4079c199e9817882a9a1c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '84981923',
|
'id': '84981923',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Soir 3',
|
'title': 'Soir 3',
|
||||||
},
|
'upload_date': '20130826',
|
||||||
'params': {
|
'timestamp': 1377548400,
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||||
@ -88,15 +111,17 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'HLS (reqires ffmpeg)'
|
'skip_download': 'HLS (reqires ffmpeg)'
|
||||||
}
|
},
|
||||||
|
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id')
|
video_id, catalogue = self._search_regex(
|
||||||
return self._extract_video(video_id)
|
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||||
|
return self._extract_video(video_id, catalogue)
|
||||||
|
|
||||||
|
|
||||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
@ -112,91 +137,77 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
# france2
|
# france2
|
||||||
{
|
{
|
||||||
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||||
'file': '75540104.mp4',
|
'md5': 'c03fc87cb85429ffd55df32b9fc05523',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': '13h15, le samedi...',
|
'id': '109169362',
|
||||||
'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
'ext': 'flv',
|
||||||
},
|
'title': '13h15, le dimanche...',
|
||||||
'params': {
|
'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
|
||||||
# m3u8 download
|
'upload_date': '20140914',
|
||||||
'skip_download': True,
|
'timestamp': 1410693600,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france3
|
# france3
|
||||||
{
|
{
|
||||||
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||||
|
'md5': '679bb8f8921f8623bd658fa2f8364da0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Le scandale du prix des médicaments',
|
'title': 'Le scandale du prix des médicaments',
|
||||||
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||||
},
|
'upload_date': '20131113',
|
||||||
'params': {
|
'timestamp': 1384380000,
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france4
|
# france4
|
||||||
{
|
{
|
||||||
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
|
'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Hero Corp Making of - Extrait 1',
|
'title': 'Hero Corp Making of - Extrait 1',
|
||||||
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
||||||
},
|
'upload_date': '20131106',
|
||||||
'params': {
|
'timestamp': 1383766500,
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france5
|
# france5
|
||||||
{
|
{
|
||||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||||
|
'md5': '78f0f4064f9074438e660785bbf2c5d9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '92837968',
|
'id': '108961659',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'C à dire ?!',
|
'title': 'C à dire ?!',
|
||||||
'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
'description': 'md5:1a4aeab476eb657bf57c4ff122129f81',
|
||||||
},
|
'upload_date': '20140915',
|
||||||
'params': {
|
'timestamp': 1410795000,
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# franceo
|
# franceo
|
||||||
{
|
{
|
||||||
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||||
|
'md5': '52f0bfe202848b15915a2f39aaa8981b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '92327925',
|
'id': '108634970',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Infô-Afrique',
|
'title': 'Infô Afrique',
|
||||||
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||||
|
'upload_date': '20140915',
|
||||||
|
'timestamp': 1410822000,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'The id changes frequently',
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj.group('key'):
|
webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
|
||||||
webpage = self._download_webpage(url, mobj.group('key'))
|
video_id, catalogue = self._html_search_regex(
|
||||||
id_res = [
|
r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||||
(r'''(?x)<div\s+class="video-player">\s*
|
webpage, 'video ID').split('@')
|
||||||
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
|
return self._extract_video(video_id, catalogue)
|
||||||
class="francetv-video-player">'''),
|
|
||||||
(r'<a id="player_direct" href="http://info\.francetelevisions'
|
|
||||||
'\.fr/\?id-video=([^"/&]+)'),
|
|
||||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
|
||||||
]
|
|
||||||
video_id = self._html_search_regex(id_res, webpage, 'video ID')
|
|
||||||
else:
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
return self._extract_video(video_id)
|
|
||||||
|
|
||||||
|
|
||||||
class GenerationQuoiIE(InfoExtractor):
|
class GenerationQuoiIE(InfoExtractor):
|
||||||
@ -232,16 +243,15 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
|
||||||
|
'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'EV_6785',
|
'id': 'EV_22853',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Einstein on the beach au Théâtre du Châtelet',
|
'title': 'Dans les jardins de William Christie - Le Camus',
|
||||||
'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
|
||||||
},
|
'upload_date': '20140829',
|
||||||
'params': {
|
'timestamp': 1409317200,
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -249,5 +259,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
|
video_id, catalogue = self._search_regex(
|
||||||
return self._extract_video(video_id)
|
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
|
||||||
|
|
||||||
|
return self._extract_video(video_id, catalogue)
|
||||||
|
@ -21,7 +21,7 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||||
'md5': 'ff4d83318f89776ed0250634cfaa8d36',
|
'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'e402820827',
|
'id': 'e402820827',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage)
|
links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
|
||||||
if not links:
|
if not links:
|
||||||
raise ExtractorError('No media links available for %s' % video_id)
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ from .brightcove import BrightcoveIE
|
|||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .smotri import SmotriIE
|
from .smotri import SmotriIE
|
||||||
|
from .condenast import CondeNastIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -155,7 +156,6 @@ class GenericIE(InfoExtractor):
|
|||||||
# funnyordie embed
|
# funnyordie embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
|
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
|
||||||
'md5': '7cf780be104d40fea7bae52eed4a470e',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '18e820ec3f',
|
'id': '18e820ec3f',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -180,13 +180,13 @@ class GenericIE(InfoExtractor):
|
|||||||
# Embedded TED video
|
# Embedded TED video
|
||||||
{
|
{
|
||||||
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||||
'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
|
'md5': '65fdff94098e4a607385a60c5177c638',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '981',
|
'id': '1969',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'My web playroom',
|
'title': 'Hidden miracles of the natural world',
|
||||||
'uploader': 'Ze Frank',
|
'uploader': 'Louie Schwartzberg',
|
||||||
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
|
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Embeded Ustream video
|
# Embeded Ustream video
|
||||||
@ -226,21 +226,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': 'Requires rtmpdump'
|
'skip_download': 'Requires rtmpdump'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# smotri embed
|
|
||||||
{
|
|
||||||
'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
|
|
||||||
'md5': 'ec40048448e9284c9a1de77bb188108b',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'v27008541fad',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Крым и Севастополь вошли в состав России',
|
|
||||||
'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
|
|
||||||
'duration': 900,
|
|
||||||
'upload_date': '20140318',
|
|
||||||
'uploader': 'rbctv_2012_4',
|
|
||||||
'uploader_id': 'rbctv_2012_4',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# Condé Nast embed
|
# Condé Nast embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
||||||
@ -295,13 +280,13 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
|
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'jpSGZsgga_I',
|
'id': '4vAffPZIT44',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Asphalt 8: Airborne - Launch Trailer',
|
'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
|
||||||
'uploader': 'Gameloft',
|
'uploader': 'Gameloft',
|
||||||
'uploader_id': 'gameloft',
|
'uploader_id': 'gameloft',
|
||||||
'upload_date': '20130821',
|
'upload_date': '20140828',
|
||||||
'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
|
'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -382,14 +367,32 @@ class GenericIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Wistia embed
|
||||||
|
{
|
||||||
|
'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||||
|
'md5': '8788b683c777a5cf25621eaf286d0c23',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1cfaf6b7ea',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'md5:51364a8d3d009997ba99656004b5e20d',
|
||||||
|
'duration': 643.0,
|
||||||
|
'filesize': 182808282,
|
||||||
|
'uploader': 'education-portal.com',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
|
||||||
|
'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'uxjb0lwrcz',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
|
||||||
|
'duration': 1715.0,
|
||||||
|
'uploader': 'thoughtworks.wistia.com',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
|
||||||
"""Report webpage download."""
|
|
||||||
if not self._downloader.params.get('test', False):
|
|
||||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
|
||||||
super(GenericIE, self).report_download_webpage(video_id)
|
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
"""Report information extraction."""
|
"""Report information extraction."""
|
||||||
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
|
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
|
||||||
@ -489,6 +492,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
url, smuggled_data = unsmuggle_url(url)
|
url, smuggled_data = unsmuggle_url(url)
|
||||||
force_videoid = None
|
force_videoid = None
|
||||||
|
is_intentional = smuggled_data and smuggled_data.get('to_generic')
|
||||||
if smuggled_data and 'force_videoid' in smuggled_data:
|
if smuggled_data and 'force_videoid' in smuggled_data:
|
||||||
force_videoid = smuggled_data['force_videoid']
|
force_videoid = smuggled_data['force_videoid']
|
||||||
video_id = force_videoid
|
video_id = force_videoid
|
||||||
@ -531,6 +535,9 @@ class GenericIE(InfoExtractor):
|
|||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||||
|
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@ -584,7 +591,9 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Helper method
|
# Helper method
|
||||||
def _playlist_from_matches(matches, getter, ie=None):
|
def _playlist_from_matches(matches, getter, ie=None):
|
||||||
urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
|
urlrs = orderedSet(
|
||||||
|
self.url_result(self._proto_relative_url(getter(m)), ie)
|
||||||
|
for m in matches)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
@ -611,13 +620,13 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj:
|
if mobj:
|
||||||
player_url = unescapeHTML(mobj.group('url'))
|
player_url = unescapeHTML(mobj.group('url'))
|
||||||
surl = smuggle_url(player_url, {'Referer': url})
|
surl = smuggle_url(player_url, {'Referer': url})
|
||||||
return self.url_result(surl, 'Vimeo')
|
return self.url_result(surl)
|
||||||
|
|
||||||
# Look for embedded (swf embed) Vimeo player
|
# Look for embedded (swf embed) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return self.url_result(mobj.group(1), 'Vimeo')
|
return self.url_result(mobj.group(1))
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for embedded YouTube player
|
||||||
matches = re.findall(r'''(?x)
|
matches = re.findall(r'''(?x)
|
||||||
@ -629,11 +638,11 @@ class GenericIE(InfoExtractor):
|
|||||||
)
|
)
|
||||||
(["\'])
|
(["\'])
|
||||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||||
(?:embed|v)/.+?)
|
(?:embed|v|p)/.+?)
|
||||||
\1''', webpage)
|
\1''', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
|
matches, lambda m: unescapeHTML(m[1]))
|
||||||
|
|
||||||
# Look for embedded Dailymotion player
|
# Look for embedded Dailymotion player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
@ -642,19 +651,42 @@ class GenericIE(InfoExtractor):
|
|||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
matches, lambda m: unescapeHTML(m[1]))
|
matches, lambda m: unescapeHTML(m[1]))
|
||||||
|
|
||||||
|
# Look for embedded Dailymotion playlist player (#3822)
|
||||||
|
m = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
|
||||||
|
if m:
|
||||||
|
playlists = re.findall(
|
||||||
|
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
|
||||||
|
if playlists:
|
||||||
|
return _playlist_from_matches(
|
||||||
|
playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||||
|
|
||||||
# Look for embedded Wistia player
|
# Look for embedded Wistia player
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||||
if match:
|
if match:
|
||||||
|
embed_url = self._proto_relative_url(
|
||||||
|
unescapeHTML(match.group('url')))
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': unescapeHTML(match.group('url')),
|
'url': embed_url,
|
||||||
'ie_key': 'Wistia',
|
'ie_key': 'Wistia',
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||||
|
if match:
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
|
||||||
|
'ie_key': 'Wistia',
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'title': video_title,
|
||||||
|
'id': match.group('id')
|
||||||
|
}
|
||||||
|
|
||||||
# Look for embedded blip.tv player
|
# Look for embedded blip.tv player
|
||||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
@ -830,54 +862,64 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'MLB')
|
return self.url_result(mobj.group('url'), 'MLB')
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||||
|
|
||||||
|
def check_video(vurl):
|
||||||
|
vpath = compat_urlparse.urlparse(vurl).path
|
||||||
|
vext = determine_ext(vpath)
|
||||||
|
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
||||||
|
|
||||||
|
def filter_video(urls):
|
||||||
|
return list(filter(check_video, urls))
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# Look for gorilla-vid style embedding
|
# Look for gorilla-vid style embedding
|
||||||
found = re.findall(r'''(?sx)
|
found = filter_video(re.findall(r'''(?sx)
|
||||||
(?:
|
(?:
|
||||||
jw_plugins|
|
jw_plugins|
|
||||||
JWPlayerOptions|
|
JWPlayerOptions|
|
||||||
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
||||||
)
|
)
|
||||||
.*?file\s*:\s*["\'](.*?)["\']''', webpage)
|
.*?file\s*:\s*["\'](.*?)["\']''', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# Broaden the search a little bit
|
# Broaden the search a little bit
|
||||||
found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# Broaden the findall a little bit: JWPlayer JS loader
|
# Broaden the findall a little bit: JWPlayer JS loader
|
||||||
found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
found = filter_video(re.findall(
|
||||||
|
r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# Flow player
|
# Flow player
|
||||||
found = re.findall(r'''(?xs)
|
found = filter_video(re.findall(r'''(?xs)
|
||||||
flowplayer\("[^"]+",\s*
|
flowplayer\("[^"]+",\s*
|
||||||
\{[^}]+?\}\s*,
|
\{[^}]+?\}\s*,
|
||||||
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||||
["']?url["']?\s*:\s*["']([^"']+)["']
|
["']?url["']?\s*:\s*["']([^"']+)["']
|
||||||
''', webpage)
|
''', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# Try to find twitter cards info
|
# Try to find twitter cards info
|
||||||
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
found = filter_video(re.findall(
|
||||||
|
r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# We look for Open Graph info:
|
# We look for Open Graph info:
|
||||||
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
|
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
|
||||||
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||||
if m_video_type is not None:
|
if m_video_type is not None:
|
||||||
def check_video(vurl):
|
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||||
vpath = compat_urlparse.urlparse(vurl).path
|
|
||||||
vext = determine_ext(vpath)
|
|
||||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg')
|
|
||||||
found = list(filter(
|
|
||||||
check_video,
|
|
||||||
re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
|
|
||||||
if not found:
|
if not found:
|
||||||
# HTML5 video
|
# HTML5 video
|
||||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)
|
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
found = re.search(
|
found = re.search(
|
||||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
||||||
webpage)
|
webpage)
|
||||||
if found:
|
if found:
|
||||||
new_url = found.group(1)
|
new_url = found.group(1)
|
||||||
|
398
youtube_dl/extractor/globo.py
Normal file
398
youtube_dl/extractor/globo.py
Normal file
@ -0,0 +1,398 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
import math
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
compat_str,
|
||||||
|
compat_chr,
|
||||||
|
compat_ord,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GloboIE(InfoExtractor):
|
||||||
|
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
||||||
|
|
||||||
|
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||||
|
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
|
||||||
|
|
||||||
|
_VIDEOID_REGEXES = [
|
||||||
|
r'\bdata-video-id="(\d+)"',
|
||||||
|
r'\bdata-player-videosids="(\d+)"',
|
||||||
|
r'<div[^>]+\bid="(\d+)"',
|
||||||
|
]
|
||||||
|
|
||||||
|
_RESIGN_EXPIRATION = 86400
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://globotv.globo.com/sportv/futebol-nacional/v/os-gols-de-atletico-mg-3-x-2-santos-pela-24a-rodada-do-brasileirao/3654973/',
|
||||||
|
'md5': '03ebf41cb7ade43581608b7d9b71fab0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3654973',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Os gols de Atlético-MG 3 x 2 Santos pela 24ª rodada do Brasileirão',
|
||||||
|
'duration': 251.585,
|
||||||
|
'uploader': 'SporTV',
|
||||||
|
'uploader_id': 698,
|
||||||
|
'like_count': int,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
|
||||||
|
'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3607726',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
|
||||||
|
'duration': 103.204,
|
||||||
|
'uploader': 'Globo.com',
|
||||||
|
'uploader_id': 265,
|
||||||
|
'like_count': int,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
|
||||||
|
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3652183',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
|
||||||
|
'duration': 110.711,
|
||||||
|
'uploader': 'Rede Globo',
|
||||||
|
'uploader_id': 196,
|
||||||
|
'like_count': int,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
class MD5():
|
||||||
|
HEX_FORMAT_LOWERCASE = 0
|
||||||
|
HEX_FORMAT_UPPERCASE = 1
|
||||||
|
BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
|
||||||
|
BASE64_PAD_CHARACTER_RFC_COMPLIANCE = '='
|
||||||
|
PADDING = '=0xFF01DD'
|
||||||
|
hexcase = 0
|
||||||
|
b64pad = ''
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class JSArray(list):
|
||||||
|
def __getitem__(self, y):
|
||||||
|
try:
|
||||||
|
return list.__getitem__(self, y)
|
||||||
|
except IndexError:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def __setitem__(self, i, y):
|
||||||
|
try:
|
||||||
|
return list.__setitem__(self, i, y)
|
||||||
|
except IndexError:
|
||||||
|
self.extend([0] * (i - len(self) + 1))
|
||||||
|
self[-1] = y
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def hex_md5(cls, param1):
|
||||||
|
return cls.rstr2hex(cls.rstr_md5(cls.str2rstr_utf8(param1)))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def b64_md5(cls, param1, param2=None):
|
||||||
|
return cls.rstr2b64(cls.rstr_md5(cls.str2rstr_utf8(param1, param2)))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def any_md5(cls, param1, param2):
|
||||||
|
return cls.rstr2any(cls.rstr_md5(cls.str2rstr_utf8(param1)), param2)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def rstr_md5(cls, param1):
|
||||||
|
return cls.binl2rstr(cls.binl_md5(cls.rstr2binl(param1), len(param1) * 8))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def rstr2hex(cls, param1):
|
||||||
|
_loc_2 = '0123456789ABCDEF' if cls.hexcase else '0123456789abcdef'
|
||||||
|
_loc_3 = ''
|
||||||
|
for _loc_5 in range(0, len(param1)):
|
||||||
|
_loc_4 = compat_ord(param1[_loc_5])
|
||||||
|
_loc_3 += _loc_2[_loc_4 >> 4 & 15] + _loc_2[_loc_4 & 15]
|
||||||
|
return _loc_3
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def rstr2b64(cls, param1):
|
||||||
|
_loc_2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
|
||||||
|
_loc_3 = ''
|
||||||
|
_loc_4 = len(param1)
|
||||||
|
for _loc_5 in range(0, _loc_4, 3):
|
||||||
|
_loc_6_1 = compat_ord(param1[_loc_5]) << 16
|
||||||
|
_loc_6_2 = compat_ord(param1[_loc_5 + 1]) << 8 if _loc_5 + 1 < _loc_4 else 0
|
||||||
|
_loc_6_3 = compat_ord(param1[_loc_5 + 2]) if _loc_5 + 2 < _loc_4 else 0
|
||||||
|
_loc_6 = _loc_6_1 | _loc_6_2 | _loc_6_3
|
||||||
|
for _loc_7 in range(0, 4):
|
||||||
|
if _loc_5 * 8 + _loc_7 * 6 > len(param1) * 8:
|
||||||
|
_loc_3 += cls.b64pad
|
||||||
|
else:
|
||||||
|
_loc_3 += _loc_2[_loc_6 >> 6 * (3 - _loc_7) & 63]
|
||||||
|
return _loc_3
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def rstr2any(param1, param2):
|
||||||
|
_loc_3 = len(param2)
|
||||||
|
_loc_4 = []
|
||||||
|
_loc_9 = [0] * ((len(param1) >> 2) + 1)
|
||||||
|
for _loc_5 in range(0, len(_loc_9)):
|
||||||
|
_loc_9[_loc_5] = compat_ord(param1[_loc_5 * 2]) << 8 | compat_ord(param1[_loc_5 * 2 + 1])
|
||||||
|
|
||||||
|
while len(_loc_9) > 0:
|
||||||
|
_loc_8 = []
|
||||||
|
_loc_7 = 0
|
||||||
|
for _loc_5 in range(0, len(_loc_9)):
|
||||||
|
_loc_7 = (_loc_7 << 16) + _loc_9[_loc_5]
|
||||||
|
_loc_6 = math.floor(_loc_7 / _loc_3)
|
||||||
|
_loc_7 -= _loc_6 * _loc_3
|
||||||
|
if len(_loc_8) > 0 or _loc_6 > 0:
|
||||||
|
_loc_8[len(_loc_8)] = _loc_6
|
||||||
|
|
||||||
|
_loc_4[len(_loc_4)] = _loc_7
|
||||||
|
_loc_9 = _loc_8
|
||||||
|
|
||||||
|
_loc_10 = ''
|
||||||
|
_loc_5 = len(_loc_4) - 1
|
||||||
|
while _loc_5 >= 0:
|
||||||
|
_loc_10 += param2[_loc_4[_loc_5]]
|
||||||
|
_loc_5 -= 1
|
||||||
|
|
||||||
|
return _loc_10
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def str2rstr_utf8(cls, param1, param2=None):
|
||||||
|
_loc_3 = ''
|
||||||
|
_loc_4 = -1
|
||||||
|
if not param2:
|
||||||
|
param2 = cls.PADDING
|
||||||
|
param1 = param1 + param2[1:9]
|
||||||
|
while True:
|
||||||
|
_loc_4 += 1
|
||||||
|
if _loc_4 >= len(param1):
|
||||||
|
break
|
||||||
|
_loc_5 = compat_ord(param1[_loc_4])
|
||||||
|
_loc_6 = compat_ord(param1[_loc_4 + 1]) if _loc_4 + 1 < len(param1) else 0
|
||||||
|
if 55296 <= _loc_5 <= 56319 and 56320 <= _loc_6 <= 57343:
|
||||||
|
_loc_5 = 65536 + ((_loc_5 & 1023) << 10) + (_loc_6 & 1023)
|
||||||
|
_loc_4 += 1
|
||||||
|
if _loc_5 <= 127:
|
||||||
|
_loc_3 += compat_chr(_loc_5)
|
||||||
|
continue
|
||||||
|
if _loc_5 <= 2047:
|
||||||
|
_loc_3 += compat_chr(192 | _loc_5 >> 6 & 31) + compat_chr(128 | _loc_5 & 63)
|
||||||
|
continue
|
||||||
|
if _loc_5 <= 65535:
|
||||||
|
_loc_3 += compat_chr(224 | _loc_5 >> 12 & 15) + compat_chr(128 | _loc_5 >> 6 & 63) + compat_chr(
|
||||||
|
128 | _loc_5 & 63)
|
||||||
|
continue
|
||||||
|
if _loc_5 <= 2097151:
|
||||||
|
_loc_3 += compat_chr(240 | _loc_5 >> 18 & 7) + compat_chr(128 | _loc_5 >> 12 & 63) + compat_chr(
|
||||||
|
128 | _loc_5 >> 6 & 63) + compat_chr(128 | _loc_5 & 63)
|
||||||
|
return _loc_3
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def rstr2binl(param1):
|
||||||
|
_loc_2 = [0] * ((len(param1) >> 2) + 1)
|
||||||
|
for _loc_3 in range(0, len(_loc_2)):
|
||||||
|
_loc_2[_loc_3] = 0
|
||||||
|
for _loc_3 in range(0, len(param1) * 8, 8):
|
||||||
|
_loc_2[_loc_3 >> 5] |= (compat_ord(param1[_loc_3 // 8]) & 255) << _loc_3 % 32
|
||||||
|
return _loc_2
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def binl2rstr(param1):
|
||||||
|
_loc_2 = ''
|
||||||
|
for _loc_3 in range(0, len(param1) * 32, 8):
|
||||||
|
_loc_2 += compat_chr(param1[_loc_3 >> 5] >> _loc_3 % 32 & 255)
|
||||||
|
return _loc_2
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def binl_md5(cls, param1, param2):
|
||||||
|
param1 = cls.JSArray(param1)
|
||||||
|
param1[param2 >> 5] |= 128 << param2 % 32
|
||||||
|
param1[(param2 + 64 >> 9 << 4) + 14] = param2
|
||||||
|
_loc_3 = 1732584193
|
||||||
|
_loc_4 = -271733879
|
||||||
|
_loc_5 = -1732584194
|
||||||
|
_loc_6 = 271733878
|
||||||
|
for _loc_7 in range(0, len(param1), 16):
|
||||||
|
_loc_8 = _loc_3
|
||||||
|
_loc_9 = _loc_4
|
||||||
|
_loc_10 = _loc_5
|
||||||
|
_loc_11 = _loc_6
|
||||||
|
_loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 0], 7, -680876936)
|
||||||
|
_loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 1], 12, -389564586)
|
||||||
|
_loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 2], 17, 606105819)
|
||||||
|
_loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 3], 22, -1044525330)
|
||||||
|
_loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 4], 7, -176418897)
|
||||||
|
_loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 5], 12, 1200080426)
|
||||||
|
_loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 6], 17, -1473231341)
|
||||||
|
_loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 7], 22, -45705983)
|
||||||
|
_loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 8], 7, 1770035416)
|
||||||
|
_loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 9], 12, -1958414417)
|
||||||
|
_loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 10], 17, -42063)
|
||||||
|
_loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 11], 22, -1990404162)
|
||||||
|
_loc_3 = cls.md5_ff(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 12], 7, 1804603682)
|
||||||
|
_loc_6 = cls.md5_ff(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 13], 12, -40341101)
|
||||||
|
_loc_5 = cls.md5_ff(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 14], 17, -1502002290)
|
||||||
|
_loc_4 = cls.md5_ff(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 15], 22, 1236535329)
|
||||||
|
_loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 1], 5, -165796510)
|
||||||
|
_loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 6], 9, -1069501632)
|
||||||
|
_loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 11], 14, 643717713)
|
||||||
|
_loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 0], 20, -373897302)
|
||||||
|
_loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 5], 5, -701558691)
|
||||||
|
_loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 10], 9, 38016083)
|
||||||
|
_loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 15], 14, -660478335)
|
||||||
|
_loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 4], 20, -405537848)
|
||||||
|
_loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 9], 5, 568446438)
|
||||||
|
_loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 14], 9, -1019803690)
|
||||||
|
_loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 3], 14, -187363961)
|
||||||
|
_loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 8], 20, 1163531501)
|
||||||
|
_loc_3 = cls.md5_gg(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 13], 5, -1444681467)
|
||||||
|
_loc_6 = cls.md5_gg(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 2], 9, -51403784)
|
||||||
|
_loc_5 = cls.md5_gg(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 7], 14, 1735328473)
|
||||||
|
_loc_4 = cls.md5_gg(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 12], 20, -1926607734)
|
||||||
|
_loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 5], 4, -378558)
|
||||||
|
_loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 8], 11, -2022574463)
|
||||||
|
_loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 11], 16, 1839030562)
|
||||||
|
_loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 14], 23, -35309556)
|
||||||
|
_loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 1], 4, -1530992060)
|
||||||
|
_loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 4], 11, 1272893353)
|
||||||
|
_loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 7], 16, -155497632)
|
||||||
|
_loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 10], 23, -1094730640)
|
||||||
|
_loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 13], 4, 681279174)
|
||||||
|
_loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 0], 11, -358537222)
|
||||||
|
_loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 3], 16, -722521979)
|
||||||
|
_loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 6], 23, 76029189)
|
||||||
|
_loc_3 = cls.md5_hh(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 9], 4, -640364487)
|
||||||
|
_loc_6 = cls.md5_hh(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 12], 11, -421815835)
|
||||||
|
_loc_5 = cls.md5_hh(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 15], 16, 530742520)
|
||||||
|
_loc_4 = cls.md5_hh(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 2], 23, -995338651)
|
||||||
|
_loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 0], 6, -198630844)
|
||||||
|
_loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 7], 10, 1126891415)
|
||||||
|
_loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 14], 15, -1416354905)
|
||||||
|
_loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 5], 21, -57434055)
|
||||||
|
_loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 12], 6, 1700485571)
|
||||||
|
_loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 3], 10, -1894986606)
|
||||||
|
_loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 10], 15, -1051523)
|
||||||
|
_loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 1], 21, -2054922799)
|
||||||
|
_loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 8], 6, 1873313359)
|
||||||
|
_loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 15], 10, -30611744)
|
||||||
|
_loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 6], 15, -1560198380)
|
||||||
|
_loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 13], 21, 1309151649)
|
||||||
|
_loc_3 = cls.md5_ii(_loc_3, _loc_4, _loc_5, _loc_6, param1[_loc_7 + 4], 6, -145523070)
|
||||||
|
_loc_6 = cls.md5_ii(_loc_6, _loc_3, _loc_4, _loc_5, param1[_loc_7 + 11], 10, -1120210379)
|
||||||
|
_loc_5 = cls.md5_ii(_loc_5, _loc_6, _loc_3, _loc_4, param1[_loc_7 + 2], 15, 718787259)
|
||||||
|
_loc_4 = cls.md5_ii(_loc_4, _loc_5, _loc_6, _loc_3, param1[_loc_7 + 9], 21, -343485551)
|
||||||
|
_loc_3 = cls.safe_add(_loc_3, _loc_8)
|
||||||
|
_loc_4 = cls.safe_add(_loc_4, _loc_9)
|
||||||
|
_loc_5 = cls.safe_add(_loc_5, _loc_10)
|
||||||
|
_loc_6 = cls.safe_add(_loc_6, _loc_11)
|
||||||
|
return [_loc_3, _loc_4, _loc_5, _loc_6]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def md5_cmn(cls, param1, param2, param3, param4, param5, param6):
|
||||||
|
return cls.safe_add(
|
||||||
|
cls.bit_rol(cls.safe_add(cls.safe_add(param2, param1), cls.safe_add(param4, param6)), param5), param3)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def md5_ff(cls, param1, param2, param3, param4, param5, param6, param7):
|
||||||
|
return cls.md5_cmn(param2 & param3 | ~param2 & param4, param1, param2, param5, param6, param7)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def md5_gg(cls, param1, param2, param3, param4, param5, param6, param7):
|
||||||
|
return cls.md5_cmn(param2 & param4 | param3 & ~param4, param1, param2, param5, param6, param7)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def md5_hh(cls, param1, param2, param3, param4, param5, param6, param7):
|
||||||
|
return cls.md5_cmn(param2 ^ param3 ^ param4, param1, param2, param5, param6, param7)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def md5_ii(cls, param1, param2, param3, param4, param5, param6, param7):
|
||||||
|
return cls.md5_cmn(param3 ^ (param2 | ~param4), param1, param2, param5, param6, param7)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def safe_add(cls, param1, param2):
|
||||||
|
_loc_3 = (param1 & 65535) + (param2 & 65535)
|
||||||
|
_loc_4 = (param1 >> 16) + (param2 >> 16) + (_loc_3 >> 16)
|
||||||
|
return cls.lshift(_loc_4, 16) | _loc_3 & 65535
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def bit_rol(cls, param1, param2):
|
||||||
|
return cls.lshift(param1, param2) | (param1 & 0xFFFFFFFF) >> (32 - param2)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def lshift(value, count):
|
||||||
|
r = (0xFFFFFFFF & value) << count
|
||||||
|
return -(~(r - 1) & 0xFFFFFFFF) if r > 0x7FFFFFFF else r
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
duration = float_or_none(video['duration'], 1000)
|
||||||
|
like_count = video['likes']
|
||||||
|
uploader = video['channel']
|
||||||
|
uploader_id = video['channel_id']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for resource in video['resources']:
|
||||||
|
resource_id = resource.get('_id')
|
||||||
|
if not resource_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
security = self._download_json(
|
||||||
|
self._SECURITY_URL_TEMPLATE % (video_id, resource_id),
|
||||||
|
video_id, 'Downloading security hash for %s' % resource_id)
|
||||||
|
|
||||||
|
security_hash = security.get('hash')
|
||||||
|
if not security_hash:
|
||||||
|
message = security.get('message')
|
||||||
|
if message:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, message), expected=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
hash_code = security_hash[:2]
|
||||||
|
received_time = int(security_hash[2:12])
|
||||||
|
received_random = security_hash[12:22]
|
||||||
|
received_md5 = security_hash[22:]
|
||||||
|
|
||||||
|
sign_time = received_time + self._RESIGN_EXPIRATION
|
||||||
|
padding = '%010d' % random.randint(1, 10000000000)
|
||||||
|
|
||||||
|
signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
|
||||||
|
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'),
|
||||||
|
'format_id': resource_id,
|
||||||
|
'height': resource['height']
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'like_count': like_count,
|
||||||
|
'formats': formats
|
||||||
|
}
|
@ -36,16 +36,16 @@ class GodTubeIE(InfoExtractor):
|
|||||||
'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
|
'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
|
||||||
video_id, 'Downloading player config XML')
|
video_id, 'Downloading player config XML')
|
||||||
|
|
||||||
video_url = config.find('.//file').text
|
video_url = config.find('file').text
|
||||||
uploader = config.find('.//author').text
|
uploader = config.find('author').text
|
||||||
timestamp = parse_iso8601(config.find('.//date').text)
|
timestamp = parse_iso8601(config.find('date').text)
|
||||||
duration = parse_duration(config.find('.//duration').text)
|
duration = parse_duration(config.find('duration').text)
|
||||||
thumbnail = config.find('.//image').text
|
thumbnail = config.find('image').text
|
||||||
|
|
||||||
media = self._download_xml(
|
media = self._download_xml(
|
||||||
'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
|
'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
|
||||||
|
|
||||||
title = media.find('.//title').text
|
title = media.find('title').text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
69
youtube_dl/extractor/golem.py
Normal file
69
youtube_dl/extractor/golem.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GolemIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||||
|
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14095',
|
||||||
|
'format_id': 'high',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'iPhone 6 und 6 Plus - Test',
|
||||||
|
'duration': 300.44,
|
||||||
|
'filesize': 65309548,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_PREFIX = 'http://video.golem.de'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
config = self._download_xml(
|
||||||
|
'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': config.findtext('./title', 'golem'),
|
||||||
|
'duration': self._float(config.findtext('./playtime'), 'duration'),
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for e in config:
|
||||||
|
url = e.findtext('./url')
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': e.tag,
|
||||||
|
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||||
|
'height': self._int(e.get('height'), 'height'),
|
||||||
|
'width': self._int(e.get('width'), 'width'),
|
||||||
|
'filesize': self._int(e.findtext('filesize'), 'filesize'),
|
||||||
|
'ext': determine_ext(e.findtext('./filename')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for e in config.findall('.//teaser'):
|
||||||
|
url = e.findtext('./url')
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||||
|
'width': self._int(e.get('width'), 'thumbnail width'),
|
||||||
|
'height': self._int(e.get('height'), 'thumbnail height'),
|
||||||
|
})
|
||||||
|
info['thumbnails'] = thumbnails
|
||||||
|
|
||||||
|
return info
|
@ -1,13 +1,11 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
|
||||||
import re
|
import re
|
||||||
|
import codecs
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import unified_strdate
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GooglePlusIE(InfoExtractor):
|
class GooglePlusIE(InfoExtractor):
|
||||||
@ -19,74 +17,57 @@ class GooglePlusIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ZButuJc6CtH',
|
'id': 'ZButuJc6CtH',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
|
'title': '嘆きの天使 降臨',
|
||||||
'upload_date': '20120613',
|
'upload_date': '20120613',
|
||||||
'uploader': '井上ヨシマサ',
|
'uploader': '井上ヨシマサ',
|
||||||
'title': '嘆きの天使 降臨',
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract id from URL
|
video_id = self._match_id(url)
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
# Step 1, Retrieve post webpage to extract further information
|
# Step 1, Retrieve post webpage to extract further information
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
|
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
title = self._og_search_description(webpage).splitlines()[0]
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
# Extract update date
|
|
||||||
upload_date = self._html_search_regex(
|
|
||||||
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
||||||
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
||||||
webpage, 'upload date', fatal=False, flags=re.VERBOSE)
|
webpage, 'upload date', fatal=False, flags=re.VERBOSE))
|
||||||
if upload_date:
|
uploader = self._html_search_regex(
|
||||||
# Convert timestring to a format suitable for filename
|
r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False)
|
||||||
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
|
|
||||||
upload_date = upload_date.strftime('%Y%m%d')
|
|
||||||
|
|
||||||
# Extract uploader
|
|
||||||
uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
|
|
||||||
webpage, 'uploader', fatal=False)
|
|
||||||
|
|
||||||
# Extract title
|
|
||||||
# Get the first line for title
|
|
||||||
video_title = self._og_search_description(webpage).splitlines()[0]
|
|
||||||
|
|
||||||
# Step 2, Simulate clicking the image box to launch video
|
# Step 2, Simulate clicking the image box to launch video
|
||||||
DOMAIN = 'https://plus.google.com/'
|
DOMAIN = 'https://plus.google.com/'
|
||||||
video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
video_page = self._search_regex(
|
||||||
|
r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
||||||
webpage, 'video page URL')
|
webpage, 'video page URL')
|
||||||
if not video_page.startswith(DOMAIN):
|
if not video_page.startswith(DOMAIN):
|
||||||
video_page = DOMAIN + video_page
|
video_page = DOMAIN + video_page
|
||||||
|
|
||||||
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
|
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
|
||||||
|
|
||||||
|
def unicode_escape(s):
|
||||||
|
decoder = codecs.getdecoder('unicode_escape')
|
||||||
|
return re.sub(
|
||||||
|
r'\\u[0-9a-fA-F]{4,}',
|
||||||
|
lambda m: decoder(m.group(0))[0],
|
||||||
|
s)
|
||||||
|
|
||||||
# Extract video links all sizes
|
# Extract video links all sizes
|
||||||
pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
|
formats = [{
|
||||||
mobj = re.findall(pattern, webpage)
|
'url': unicode_escape(video_url),
|
||||||
if len(mobj) == 0:
|
'ext': 'flv',
|
||||||
raise ExtractorError('Unable to extract video links')
|
'width': int(width),
|
||||||
|
'height': int(height),
|
||||||
# Sort in resolution
|
} for width, height, video_url in re.findall(
|
||||||
links = sorted(mobj)
|
r'\d+,(\d+),(\d+),"(https?://redirector\.googlevideo\.com.*?)"', webpage)]
|
||||||
|
self._sort_formats(formats)
|
||||||
# Choose the lowest of the sort, i.e. highest resolution
|
|
||||||
video_url = links[-1]
|
|
||||||
# Only get the url. The resolution part in the tuple has no use anymore
|
|
||||||
video_url = video_url[-1]
|
|
||||||
# Treat escaped \u0026 style hex
|
|
||||||
try:
|
|
||||||
video_url = video_url.decode("unicode_escape")
|
|
||||||
except AttributeError: # Python 3
|
|
||||||
video_url = bytes(video_url, 'ascii').decode('unicode-escape')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'title': title,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'title': video_title,
|
'formats': formats,
|
||||||
'ext': 'flv',
|
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@ -12,20 +13,22 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GorillaVidIE(InfoExtractor):
|
class GorillaVidIE(InfoExtractor):
|
||||||
IE_DESC = 'GorillaVid.in and daclips.in'
|
IE_DESC = 'GorillaVid.in, daclips.in and movpod.in'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?P<host>(?:www\.)?
|
https?://(?P<host>(?:www\.)?
|
||||||
(?:daclips\.in|gorillavid\.in))/
|
(?:daclips\.in|gorillavid\.in|movpod\.in))/
|
||||||
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
_FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://gorillavid.in/06y9juieqpmi',
|
'url': 'http://gorillavid.in/06y9juieqpmi',
|
||||||
'md5': '5ae4a3580620380619678ee4875893ba',
|
'md5': '5ae4a3580620380619678ee4875893ba',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '06y9juieqpmi',
|
'id': '06y9juieqpmi',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Rebecca Black My Moment Official Music Video Reaction',
|
'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@ -46,6 +49,9 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
'title': 'Micro Pig piglets ready on 16th July 2009',
|
'title': 'Micro Pig piglets ready on 16th July 2009',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://movpod.in/0wguyyxi1yca',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -54,6 +60,9 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id)
|
webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id)
|
||||||
|
|
||||||
|
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
||||||
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
fields = dict(re.findall(r'''(?x)<input\s+
|
fields = dict(re.findall(r'''(?x)<input\s+
|
||||||
type="hidden"\s+
|
type="hidden"\s+
|
||||||
name="([^"]+)"\s+
|
name="([^"]+)"\s+
|
||||||
@ -69,14 +78,14 @@ class GorillaVidIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||||
|
|
||||||
title = self._search_regex(r'style="z-index: [0-9]+;">([0-9a-zA-Z ]+)(?:-.+)?</span>', webpage, 'title')
|
title = self._search_regex(r'style="z-index: [0-9]+;">([^<]+)</span>', webpage, 'title')
|
||||||
thumbnail = self._search_regex(r'image:\'(http[^\']+)\',', webpage, 'thumbnail')
|
video_url = self._search_regex(r'file\s*:\s*\'(http[^\']+)\',', webpage, 'file url')
|
||||||
url = self._search_regex(r'file: \'(http[^\']+)\',', webpage, 'file url')
|
thumbnail = self._search_regex(r'image\s*:\s*\'(http[^\']+)\',', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
'url': url,
|
'url': video_url,
|
||||||
'ext': determine_ext(url),
|
'ext': determine_ext(video_url),
|
||||||
'quality': 1,
|
'quality': 1,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
81
youtube_dl/extractor/heise.py
Normal file
81
youtube_dl/extractor/heise.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
get_meta_content,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HeiseIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?heise\.de/video/artikel/
|
||||||
|
.+?(?P<id>[0-9]+)\.html(?:$|[?#])
|
||||||
|
'''
|
||||||
|
_TEST = {
|
||||||
|
'url': (
|
||||||
|
'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
|
||||||
|
),
|
||||||
|
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2404147',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': (
|
||||||
|
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
||||||
|
),
|
||||||
|
'format_id': 'mp4_720',
|
||||||
|
'timestamp': 1411812600,
|
||||||
|
'upload_date': '20140927',
|
||||||
|
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
json_url = self._search_regex(
|
||||||
|
r'json_url:\s*"([^"]+)"', webpage, 'json URL')
|
||||||
|
config = self._download_json(json_url, video_id)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'thumbnail': config.get('poster'),
|
||||||
|
'timestamp': parse_iso8601(get_meta_content('date', webpage)),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
||||||
|
|
||||||
|
title = get_meta_content('fulltitle', webpage)
|
||||||
|
if title:
|
||||||
|
info['title'] = title
|
||||||
|
elif config.get('title'):
|
||||||
|
info['title'] = config['title']
|
||||||
|
else:
|
||||||
|
info['title'] = self._og_search_title(webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for t, rs in config['formats'].items():
|
||||||
|
if not rs or not hasattr(rs, 'items'):
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'formats: {0}: no resolutions'.format(t))
|
||||||
|
continue
|
||||||
|
|
||||||
|
for height_str, obj in rs.items():
|
||||||
|
format_id = '{0}_{1}'.format(t, height_str)
|
||||||
|
|
||||||
|
if not obj or not obj.get('url'):
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'formats: {0}: no url'.format(format_id))
|
||||||
|
continue
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': obj['url'],
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': self._int(height_str, 'height'),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
|
||||||
|
return info
|
@ -28,13 +28,13 @@ class HowStuffWorksIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm',
|
'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '553470',
|
'id': '453464',
|
||||||
'display_id': 'deadliest-catch-jakes-farewell-pots',
|
'display_id': 'survival-zone-food-and-water-in-the-savanna',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Deadliest Catch: Jake\'s Farewell Pots',
|
'title': 'Survival Zone: Food and Water In the Savanna',
|
||||||
'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc',
|
'description': 'md5:7e1c89f6411434970c15fa094170c371',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -33,8 +33,7 @@ class HuffPostIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
|
api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
|
||||||
data = self._download_json(api_url, video_id)['data']
|
data = self._download_json(api_url, video_id)['data']
|
||||||
|
@ -71,6 +71,7 @@ class IGNIE(InfoExtractor):
|
|||||||
|
|
||||||
def _find_video_id(self, webpage):
|
def _find_video_id(self, webpage):
|
||||||
res_id = [
|
res_id = [
|
||||||
|
r'"video_id"\s*:\s*"(.*?)"',
|
||||||
r'data-video-id="(.+?)"',
|
r'data-video-id="(.+?)"',
|
||||||
r'<object id="vid_(.+?)"',
|
r'<object id="vid_(.+?)"',
|
||||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||||
@ -85,10 +86,15 @@ class IGNIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, name_or_id)
|
webpage = self._download_webpage(url, name_or_id)
|
||||||
if page_type != 'video':
|
if page_type != 'video':
|
||||||
multiple_urls = re.findall(
|
multiple_urls = re.findall(
|
||||||
'<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||||
webpage)
|
webpage)
|
||||||
if multiple_urls:
|
if multiple_urls:
|
||||||
return [self.url_result(u, ie='IGN') for u in multiple_urls]
|
entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': name_or_id,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
video_id = self._find_video_id(webpage)
|
video_id = self._find_video_id(webpage)
|
||||||
result = self._get_video_info(video_id)
|
result = self._get_video_info(video_id)
|
||||||
@ -111,13 +117,13 @@ class IGNIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class OneUPIE(IGNIE):
|
class OneUPIE(IGNIE):
|
||||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
|
||||||
IE_NAME = '1up.com'
|
IE_NAME = '1up.com'
|
||||||
|
|
||||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://gamevideos.1up.com/video/id/34976',
|
'url': 'http://gamevideos.1up.com/video/id/34976.html',
|
||||||
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34976',
|
'id': '34976',
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -12,12 +14,13 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
|
_VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
|
'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
|
||||||
u'file': u'452693.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '452693',
|
||||||
u'title': u'SKYFALL',
|
'ext': 'mp4',
|
||||||
u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
'title': 'SKYFALL',
|
||||||
u'duration': 153,
|
'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
||||||
|
'duration': 149,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,7 +45,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
url = self._build_url(query)
|
url = self._build_url(query)
|
||||||
|
|
||||||
flashconfiguration = self._download_xml(url, video_id,
|
flashconfiguration = self._download_xml(url, video_id,
|
||||||
u'Downloading flash configuration')
|
'Downloading flash configuration')
|
||||||
file_url = flashconfiguration.find('file').text
|
file_url = flashconfiguration.find('file').text
|
||||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||||
# Replace some of the parameters in the query to get the best quality
|
# Replace some of the parameters in the query to get the best quality
|
||||||
@ -51,7 +54,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
lambda m: self._clean_query(m.group()),
|
lambda m: self._clean_query(m.group()),
|
||||||
file_url)
|
file_url)
|
||||||
info = self._download_xml(file_url, video_id,
|
info = self._download_xml(file_url, video_id,
|
||||||
u'Downloading video info')
|
'Downloading video info')
|
||||||
item = info.find('channel/item')
|
item = info.find('channel/item')
|
||||||
|
|
||||||
def _bp(p):
|
def _bp(p):
|
||||||
|
@ -63,7 +63,8 @@ class IzleseneIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._proto_relative_url(
|
||||||
|
self._og_search_thumbnail(webpage), scheme='http:')
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r"adduserUsername\s*=\s*'([^']+)';",
|
r"adduserUsername\s*=\s*'([^']+)';",
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -12,14 +10,14 @@ from ..utils import (
|
|||||||
|
|
||||||
class JpopsukiIE(InfoExtractor):
|
class JpopsukiIE(InfoExtractor):
|
||||||
IE_NAME = 'jpopsuki.tv'
|
IE_NAME = 'jpopsuki.tv'
|
||||||
_VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/video/(.*?)/(?P<id>\S+)'
|
_VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P<id>\S+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
|
'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
|
||||||
'md5': '88018c0c1a9b1387940e90ec9e7e198e',
|
'md5': '88018c0c1a9b1387940e90ec9e7e198e',
|
||||||
'file': '00be659d23b0b40508169cdee4545771.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '00be659d23b0b40508169cdee4545771',
|
'id': '00be659d23b0b40508169cdee4545771',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'ayumi hamasaki - evolution',
|
'title': 'ayumi hamasaki - evolution',
|
||||||
'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
|
'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
|
||||||
'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
|
'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
|
||||||
@ -30,8 +28,7 @@ class JpopsukiIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
@ -47,11 +44,9 @@ class JpopsukiIE(InfoExtractor):
|
|||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
|
r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
|
||||||
webpage, 'video uploader_id', fatal=False)
|
webpage, 'video uploader_id', fatal=False)
|
||||||
upload_date = self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
|
r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
|
||||||
fatal=False)
|
fatal=False))
|
||||||
if upload_date is not None:
|
|
||||||
upload_date = unified_strdate(upload_date)
|
|
||||||
view_count_str = self._html_search_regex(
|
view_count_str = self._html_search_regex(
|
||||||
r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
|
r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
|
||||||
fatal=False)
|
fatal=False)
|
||||||
|
@ -11,10 +11,9 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class JukeboxIE(InfoExtractor):
|
class JukeboxIE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<id>[a-z0-9\-]+)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||||
'md5': '1574e9b4d6438446d5b7dbcdf2786276',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'r303r',
|
'id': 'r303r',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
@ -24,8 +23,7 @@ class JukeboxIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('video_id')
|
|
||||||
|
|
||||||
html = self._download_webpage(url, video_id)
|
html = self._download_webpage(url, video_id)
|
||||||
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
|
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
|
||||||
|
@ -1,155 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import itertools
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
compat_str,
|
|
||||||
ExtractorError,
|
|
||||||
formatSeconds,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JustinTVIE(InfoExtractor):
|
|
||||||
"""Information extractor for justin.tv and twitch.tv"""
|
|
||||||
# TODO: One broadcast may be split into multiple videos. The key
|
|
||||||
# 'broadcast_id' is the same for all parts, and 'broadcast_part'
|
|
||||||
# starts at 1 and increases. Can we treat all parts as one video?
|
|
||||||
|
|
||||||
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
|
|
||||||
(?:
|
|
||||||
(?P<channelid>[^/]+)|
|
|
||||||
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
|
||||||
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
|
||||||
)
|
|
||||||
/?(?:\#.*)?$
|
|
||||||
"""
|
|
||||||
_JUSTIN_PAGE_LIMIT = 100
|
|
||||||
IE_NAME = 'justin.tv'
|
|
||||||
IE_DESC = 'justin.tv and twitch.tv'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
|
|
||||||
'md5': 'ecaa8a790c22a40770901460af191c9a',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '296128360',
|
|
||||||
'ext': 'flv',
|
|
||||||
'upload_date': '20110927',
|
|
||||||
'uploader_id': 25114803,
|
|
||||||
'uploader': 'thegamedevhub',
|
|
||||||
'title': 'Beginner Series - Scripting With Python Pt.1'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Return count of items, list of *valid* items
|
|
||||||
def _parse_page(self, url, video_id, counter):
|
|
||||||
info_json = self._download_webpage(
|
|
||||||
url, video_id,
|
|
||||||
'Downloading video info JSON on page %d' % counter,
|
|
||||||
'Unable to download video info JSON %d' % counter)
|
|
||||||
|
|
||||||
response = json.loads(info_json)
|
|
||||||
if type(response) != list:
|
|
||||||
error_text = response.get('error', 'unknown error')
|
|
||||||
raise ExtractorError('Justin.tv API: %s' % error_text)
|
|
||||||
info = []
|
|
||||||
for clip in response:
|
|
||||||
video_url = clip['video_file_url']
|
|
||||||
if video_url:
|
|
||||||
video_extension = os.path.splitext(video_url)[1][1:]
|
|
||||||
video_date = re.sub('-', '', clip['start_time'][:10])
|
|
||||||
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
|
|
||||||
video_id = clip['id']
|
|
||||||
video_title = clip.get('title', video_id)
|
|
||||||
info.append({
|
|
||||||
'id': compat_str(video_id),
|
|
||||||
'url': video_url,
|
|
||||||
'title': video_title,
|
|
||||||
'uploader': clip.get('channel_name', video_uploader_id),
|
|
||||||
'uploader_id': video_uploader_id,
|
|
||||||
'upload_date': video_date,
|
|
||||||
'ext': video_extension,
|
|
||||||
})
|
|
||||||
return (len(response), info)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
|
|
||||||
api_base = 'http://api.justin.tv'
|
|
||||||
paged = False
|
|
||||||
if mobj.group('channelid'):
|
|
||||||
paged = True
|
|
||||||
video_id = mobj.group('channelid')
|
|
||||||
api = api_base + '/channel/archives/%s.json' % video_id
|
|
||||||
elif mobj.group('chapterid'):
|
|
||||||
chapter_id = mobj.group('chapterid')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, chapter_id)
|
|
||||||
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
|
||||||
if not m:
|
|
||||||
raise ExtractorError('Cannot find archive of a chapter')
|
|
||||||
archive_id = m.group(1)
|
|
||||||
|
|
||||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
|
||||||
doc = self._download_xml(
|
|
||||||
api, chapter_id,
|
|
||||||
note='Downloading chapter information',
|
|
||||||
errnote='Chapter information download failed')
|
|
||||||
for a in doc.findall('.//archive'):
|
|
||||||
if archive_id == a.find('./id').text:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Could not find chapter in chapter information')
|
|
||||||
|
|
||||||
video_url = a.find('./video_file_url').text
|
|
||||||
video_ext = video_url.rpartition('.')[2] or 'flv'
|
|
||||||
|
|
||||||
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
|
||||||
chapter_info = self._download_json(
|
|
||||||
chapter_api_url, 'c' + chapter_id,
|
|
||||||
note='Downloading chapter metadata',
|
|
||||||
errnote='Download of chapter metadata failed')
|
|
||||||
|
|
||||||
bracket_start = int(doc.find('.//bracket_start').text)
|
|
||||||
bracket_end = int(doc.find('.//bracket_end').text)
|
|
||||||
|
|
||||||
# TODO determine start (and probably fix up file)
|
|
||||||
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
|
||||||
#video_url += '?start=' + TODO:start_timestamp
|
|
||||||
# bracket_start is 13290, but we want 51670615
|
|
||||||
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
|
|
||||||
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': 'c' + chapter_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': video_ext,
|
|
||||||
'title': chapter_info['title'],
|
|
||||||
'thumbnail': chapter_info['preview'],
|
|
||||||
'description': chapter_info['description'],
|
|
||||||
'uploader': chapter_info['channel']['display_name'],
|
|
||||||
'uploader_id': chapter_info['channel']['name'],
|
|
||||||
}
|
|
||||||
return info
|
|
||||||
else:
|
|
||||||
video_id = mobj.group('videoid')
|
|
||||||
api = api_base + '/broadcast/by_archive/%s.json' % video_id
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
offset = 0
|
|
||||||
limit = self._JUSTIN_PAGE_LIMIT
|
|
||||||
for counter in itertools.count(1):
|
|
||||||
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
|
||||||
page_count, page_info = self._parse_page(
|
|
||||||
page_url, video_id, counter)
|
|
||||||
entries.extend(page_info)
|
|
||||||
if not paged or page_count != limit:
|
|
||||||
break
|
|
||||||
offset += limit
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': video_id,
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
@ -34,7 +34,7 @@ class KontrTubeIE(InfoExtractor):
|
|||||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
|
r'<title>(.+?)</title>', webpage, 'video title')
|
||||||
description = self._html_search_meta('description', webpage, 'video description')
|
description = self._html_search_meta('description', webpage, 'video description')
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
|
69
youtube_dl/extractor/lrt.py
Normal file
69
youtube_dl/extractor/lrt.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
js_to_json,
|
||||||
|
parse_duration,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LRTIE(InfoExtractor):
|
||||||
|
IE_NAME = 'lrt.lt'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '54391',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Septynios Kauno dienos',
|
||||||
|
'description': 'Kauno miesto ir apskrities naujienos',
|
||||||
|
'duration': 1783,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # HLS download
|
||||||
|
},
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r"'duration':\s*'([^']+)',", webpage,
|
||||||
|
'duration', fatal=False, default=None))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
|
||||||
|
data = json.loads(js_to_json(js))
|
||||||
|
if data['provider'] == 'rtmp':
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'ext': determine_ext(data['file']),
|
||||||
|
'url': data['streamer'],
|
||||||
|
'play_path': 'mp4:%s' % data['file'],
|
||||||
|
'preference': -1,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.extend(
|
||||||
|
self._extract_m3u8_formats(data['file'], video_id, 'mp4'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
87
youtube_dl/extractor/mgoon.py
Normal file
87
youtube_dl/extractor/mgoon.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
qualities,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MgoonIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||||
|
(?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)|
|
||||||
|
video\.mgoon\.com)/(?P<id>[0-9]+)'''
|
||||||
|
_API_URL = 'http://mpos.mgoon.com/player/video?id={0:}'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://m.mgoon.com/ch/hi6618/v/5582148',
|
||||||
|
'md5': 'dd46bb66ab35cf6d51cc812fd82da79d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5582148',
|
||||||
|
'uploader_id': 'hi6618',
|
||||||
|
'duration': 240.419,
|
||||||
|
'upload_date': '20131220',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:543aa4c27a4931d371c3f433e8cebebc',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.mgoon.com/play/view/5582148',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://video.mgoon.com/5582148',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
data = self._download_json(self._API_URL.format(video_id), video_id)
|
||||||
|
|
||||||
|
if data.get('errorInfo', {}).get('code') != 'NONE':
|
||||||
|
raise ExtractorError('%s encountered an error: %s' % (
|
||||||
|
self.IE_NAME, data['errorInfo']['message']), expected=True)
|
||||||
|
|
||||||
|
v_info = data['videoInfo']
|
||||||
|
title = v_info.get('v_title')
|
||||||
|
thumbnail = v_info.get('v_thumbnail')
|
||||||
|
duration = v_info.get('v_duration')
|
||||||
|
upload_date = unified_strdate(v_info.get('v_reg_date'))
|
||||||
|
uploader_id = data.get('userInfo', {}).get('u_alias')
|
||||||
|
if duration:
|
||||||
|
duration /= 1000.0
|
||||||
|
|
||||||
|
age_limit = None
|
||||||
|
if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT':
|
||||||
|
age_limit = 18
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
get_quality = qualities(['360p', '480p', '720p', '1080p'])
|
||||||
|
for fmt in data['videoFiles']:
|
||||||
|
formats.append({
|
||||||
|
'format_id': fmt['label'],
|
||||||
|
'quality': get_quality(fmt['label']),
|
||||||
|
'url': fmt['url'],
|
||||||
|
'ext': fmt['format'],
|
||||||
|
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@ -39,13 +39,21 @@ class MiTeleIE(InfoExtractor):
|
|||||||
).replace('\'', '"')
|
).replace('\'', '"')
|
||||||
embed_data = json.loads(embed_data_json)
|
embed_data = json.loads(embed_data_json)
|
||||||
|
|
||||||
info_url = embed_data['flashvars']['host']
|
domain = embed_data['mediaUrl']
|
||||||
|
if not domain.startswith('http'):
|
||||||
|
# only happens in telecinco.es videos
|
||||||
|
domain = 'http://' + domain
|
||||||
|
info_url = compat_urllib_parse.urljoin(
|
||||||
|
domain,
|
||||||
|
compat_urllib_parse.unquote(embed_data['flashvars']['host'])
|
||||||
|
)
|
||||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||||
|
|
||||||
video_link = info_el.find('videoUrl/link').text
|
video_link = info_el.find('videoUrl/link').text
|
||||||
token_query = compat_urllib_parse.urlencode({'id': video_link})
|
token_query = compat_urllib_parse.urlencode({'id': video_link})
|
||||||
token_info = self._download_json(
|
token_info = self._download_json(
|
||||||
'http://token.mitele.es/?' + token_query, episode,
|
embed_data['flashvars']['ov_tk'] + '?' + token_query,
|
||||||
|
episode,
|
||||||
transform_source=strip_jsonp
|
transform_source=strip_jsonp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -33,22 +33,22 @@ class MixcloudIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def check_urls(self, url_list):
|
def _get_url(self, track_id, template_url):
|
||||||
"""Returns 1st active url from list"""
|
server_count = 30
|
||||||
for url in url_list:
|
for i in range(server_count):
|
||||||
|
url = template_url % i
|
||||||
try:
|
try:
|
||||||
# We only want to know if the request succeed
|
# We only want to know if the request succeed
|
||||||
# don't download the whole file
|
# don't download the whole file
|
||||||
self._request_webpage(HEADRequest(url), None, False)
|
self._request_webpage(
|
||||||
|
HEADRequest(url), track_id,
|
||||||
|
'Checking URL %d/%d ...' % (i + 1, server_count + 1))
|
||||||
return url
|
return url
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
url = None
|
pass
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _get_url(self, template_url):
|
|
||||||
return self.check_urls(template_url % i for i in range(30))
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader = mobj.group(1)
|
uploader = mobj.group(1)
|
||||||
@ -61,16 +61,16 @@ class MixcloudIE(InfoExtractor):
|
|||||||
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
|
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
|
||||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||||
final_song_url = self._get_url(template_url)
|
final_song_url = self._get_url(track_id, template_url)
|
||||||
if final_song_url is None:
|
if final_song_url is None:
|
||||||
self.to_screen('Trying with m4a extension')
|
self.to_screen('Trying with m4a extension')
|
||||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||||
final_song_url = self._get_url(template_url)
|
final_song_url = self._get_url(track_id, template_url)
|
||||||
if final_song_url is None:
|
if final_song_url is None:
|
||||||
raise ExtractorError('Unable to extract track url')
|
raise ExtractorError('Unable to extract track url')
|
||||||
|
|
||||||
PREFIX = (
|
PREFIX = (
|
||||||
r'<div class="cloudcast-play-button-container"'
|
r'<div class="cloudcast-play-button-container[^"]*?"'
|
||||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||||
|
@ -6,7 +6,6 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
find_xpath_attr,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -88,8 +87,9 @@ class MLBIE(InfoExtractor):
|
|||||||
duration = parse_duration(detail.find('./duration').text)
|
duration = parse_duration(detail.find('./duration').text)
|
||||||
timestamp = parse_iso8601(detail.attrib['date'][:-5])
|
timestamp = parse_iso8601(detail.attrib['date'][:-5])
|
||||||
|
|
||||||
thumbnail = find_xpath_attr(
|
thumbnails = [{
|
||||||
detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text
|
'url': thumbnail.text,
|
||||||
|
} for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')]
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for media_url in detail.findall('./url'):
|
for media_url in detail.findall('./url'):
|
||||||
@ -116,5 +116,5 @@ class MLBIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnails': thumbnails,
|
||||||
}
|
}
|
||||||
|
70
youtube_dl/extractor/moniker.py
Normal file
70
youtube_dl/extractor/moniker.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MonikerIE(InfoExtractor):
|
||||||
|
IE_DESC = 'allmyvideos.net and vidspot.net'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://allmyvideos.net/jih3nce3x6wn',
|
||||||
|
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jih3nce3x6wn',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'youtube-dl test video',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://vidspot.net/l2ngsmhs8ci5',
|
||||||
|
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'l2ngsmhs8ci5',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'youtube-dl test video',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
orig_webpage = self._download_webpage(url, video_id)
|
||||||
|
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||||
|
data = dict(fields)
|
||||||
|
|
||||||
|
post = compat_urllib_parse.urlencode(data)
|
||||||
|
headers = {
|
||||||
|
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
req = compat_urllib_request.Request(url, post, headers)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
req, video_id, note='Downloading video page ...')
|
||||||
|
|
||||||
|
title = os.path.splitext(data['fname'])[0]
|
||||||
|
|
||||||
|
#Could be several links with different quality
|
||||||
|
links = re.findall(r'"file" : "?(.+?)",', webpage)
|
||||||
|
# Assume the links are ordered in quality
|
||||||
|
formats = [{
|
||||||
|
'url': l,
|
||||||
|
'quality': i,
|
||||||
|
} for i, l in enumerate(links)]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
76
youtube_dl/extractor/muenchentv.py
Normal file
76
youtube_dl/extractor/muenchentv.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MuenchenTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream'
|
||||||
|
IE_DESC = 'münchen.tv'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.muenchen.tv/livestream/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5334',
|
||||||
|
'display_id': 'live',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = 'live'
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._live_title(self._og_search_title(webpage))
|
||||||
|
|
||||||
|
data_js = self._search_regex(
|
||||||
|
r'(?s)\nplaylist:\s*(\[.*?}\]),related:',
|
||||||
|
webpage, 'playlist configuration')
|
||||||
|
data_json = js_to_json(data_js)
|
||||||
|
data = json.loads(data_json)[0]
|
||||||
|
|
||||||
|
video_id = data['mediaid']
|
||||||
|
thumbnail = data.get('image')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_num, s in enumerate(data['sources']):
|
||||||
|
ext = determine_ext(s['file'], None)
|
||||||
|
label_str = s.get('label')
|
||||||
|
if label_str is None:
|
||||||
|
label_str = '_%d' % format_num
|
||||||
|
|
||||||
|
if ext is None:
|
||||||
|
format_id = label_str
|
||||||
|
else:
|
||||||
|
format_id = '%s-%s' % (ext, label_str)
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': s['file'],
|
||||||
|
'tbr': int_or_none(s.get('label')),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': -100 if '.smil' in s['file'] else 0,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
||||||
'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
|
# md5 checksum is not stable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'u1RInQZRN7QJ',
|
'id': 'bTmnLCvIbaaH',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'I Am a Firefighter',
|
'title': 'I Am a Firefighter',
|
||||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
||||||
|
@ -18,16 +18,16 @@ class NDRIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html',
|
'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html',
|
||||||
'md5': '4a4eeafd17c3058b65f0c8f091355855',
|
'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',
|
||||||
'note': 'Video file',
|
'note': 'Video file',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '325',
|
'id': '25866',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Blaue Bohnen aus Blocken',
|
'title': 'Kartoffeltage in der Lewitz',
|
||||||
'description': 'md5:190d71ba2ccddc805ed01547718963bc',
|
'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8',
|
||||||
'duration': 1715,
|
'duration': 166,
|
||||||
},
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ndr.de/info/audio51535.html',
|
'url': 'http://www.ndr.de/info/audio51535.html',
|
||||||
|
144
youtube_dl/extractor/nfl.py
Normal file
144
youtube_dl/extractor/nfl.py
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
int_or_none,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NFLIE(InfoExtractor):
|
||||||
|
IE_NAME = 'nfl.com'
|
||||||
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
|
||||||
|
(?:.+?/)*
|
||||||
|
(?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||||
|
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0ap3000000398478',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Week 3: Redskins vs. Eagles highlights',
|
||||||
|
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||||
|
'upload_date': '20140921',
|
||||||
|
'timestamp': 1411337580,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||||
|
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'LIVE: Post Game vs. Browns',
|
||||||
|
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
|
||||||
|
'upload_date': '20131229',
|
||||||
|
'timestamp': 1388354455,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def prepend_host(host, url):
|
||||||
|
if not url.startswith('http'):
|
||||||
|
if not url.startswith('/'):
|
||||||
|
url = '/%s' % url
|
||||||
|
url = 'http://{0:}{1:}'.format(host, url)
|
||||||
|
return url
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def format_from_stream(stream, protocol, host, path_prefix='',
|
||||||
|
preference=0, note=None):
|
||||||
|
url = '{protocol:}://{host:}/{prefix:}{path:}'.format(
|
||||||
|
protocol=protocol,
|
||||||
|
host=host,
|
||||||
|
prefix=path_prefix,
|
||||||
|
path=stream.get('path'),
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
'url': url,
|
||||||
|
'vbr': int_or_none(stream.get('rate', 0), 1000),
|
||||||
|
'preference': preference,
|
||||||
|
'format_note': note,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id, host = mobj.group('id'), mobj.group('host')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||||
|
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL'))
|
||||||
|
config = self._download_json(config_url, video_id,
|
||||||
|
note='Downloading player config')
|
||||||
|
url_template = NFLIE.prepend_host(
|
||||||
|
host, '{contentURLTemplate:}'.format(**config))
|
||||||
|
video_data = self._download_json(
|
||||||
|
url_template.format(id=video_id), video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
cdn_data = video_data.get('cdnData', {})
|
||||||
|
streams = cdn_data.get('bitrateInfo', [])
|
||||||
|
if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM':
|
||||||
|
parts = compat_urllib_parse_urlparse(cdn_data.get('uri'))
|
||||||
|
protocol, host = parts.scheme, parts.netloc
|
||||||
|
for stream in streams:
|
||||||
|
formats.append(
|
||||||
|
NFLIE.format_from_stream(stream, protocol, host))
|
||||||
|
else:
|
||||||
|
cdns = config.get('cdns')
|
||||||
|
if not cdns:
|
||||||
|
raise ExtractorError('Failed to get CDN data', expected=True)
|
||||||
|
|
||||||
|
for name, cdn in cdns.items():
|
||||||
|
# LimeLight streams don't seem to work
|
||||||
|
if cdn.get('name') == 'LIMELIGHT':
|
||||||
|
continue
|
||||||
|
|
||||||
|
protocol = cdn.get('protocol')
|
||||||
|
host = remove_end(cdn.get('host', ''), '/')
|
||||||
|
if not (protocol and host):
|
||||||
|
continue
|
||||||
|
|
||||||
|
prefix = cdn.get('pathprefix', '')
|
||||||
|
if prefix and not prefix.endswith('/'):
|
||||||
|
prefix = '%s/' % prefix
|
||||||
|
|
||||||
|
preference = 0
|
||||||
|
if protocol == 'rtmp':
|
||||||
|
preference = -2
|
||||||
|
elif 'prog' in name.lower():
|
||||||
|
preference = 1
|
||||||
|
|
||||||
|
for stream in streams:
|
||||||
|
formats.append(
|
||||||
|
NFLIE.format_from_stream(stream, protocol, host,
|
||||||
|
prefix, preference, name))
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
for q in ('xl', 'l', 'm', 's', 'xs'):
|
||||||
|
thumbnail = video_data.get('imagePaths', {}).get(q)
|
||||||
|
if thumbnail:
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_data.get('headline'),
|
||||||
|
'formats': formats,
|
||||||
|
'description': video_data.get('caption'),
|
||||||
|
'duration': video_data.get('duration'),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': int_or_none(video_data.get('posted'), 1000),
|
||||||
|
}
|
@ -46,9 +46,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
class NHLIE(NHLBaseInfoExtractor):
|
class NHLIE(NHLBaseInfoExtractor):
|
||||||
IE_NAME = 'nhl.com'
|
IE_NAME = 'nhl.com'
|
||||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '453614',
|
'id': '453614',
|
||||||
@ -58,7 +58,10 @@ class NHLIE(NHLBaseInfoExtractor):
|
|||||||
'duration': 18,
|
'duration': 18,
|
||||||
'upload_date': '20131006',
|
'upload_date': '20131006',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -39,18 +39,17 @@ class NiconicoIE(InfoExtractor):
|
|||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
# Determine whether the downloader uses authentication to download video
|
# Determine whether the downloader used authentication to download video
|
||||||
_AUTHENTICATE = False
|
_AUTHENTICATED = False
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if self._downloader.params.get('username', None) is not None:
|
|
||||||
self._AUTHENTICATE = True
|
|
||||||
|
|
||||||
if self._AUTHENTICATE:
|
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
|
# No authentication to be performed
|
||||||
|
if not username:
|
||||||
|
return True
|
||||||
|
|
||||||
# Log in
|
# Log in
|
||||||
login_form_strs = {
|
login_form_strs = {
|
||||||
@ -68,6 +67,8 @@ class NiconicoIE(InfoExtractor):
|
|||||||
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return False
|
return False
|
||||||
|
# Successful login
|
||||||
|
self._AUTHENTICATED = True
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -82,7 +83,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||||
note='Downloading video info page')
|
note='Downloading video info page')
|
||||||
|
|
||||||
if self._AUTHENTICATE:
|
if self._AUTHENTICATED:
|
||||||
# Get flv info
|
# Get flv info
|
||||||
flv_info_webpage = self._download_webpage(
|
flv_info_webpage = self._download_webpage(
|
||||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -17,6 +19,7 @@ from ..utils import (
|
|||||||
class NocoIE(InfoExtractor):
|
class NocoIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||||
|
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
|
||||||
_NETRC_MACHINE = 'noco'
|
_NETRC_MACHINE = 'noco'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -57,31 +60,50 @@ class NocoIE(InfoExtractor):
|
|||||||
if 'erreur' in login:
|
if 'erreur' in login:
|
||||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, note):
|
||||||
|
ts = compat_str(int(time.time() * 1000))
|
||||||
|
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
|
||||||
|
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
||||||
|
|
||||||
|
resp = self._download_json(url, video_id, note)
|
||||||
|
|
||||||
|
if isinstance(resp, dict) and resp.get('error'):
|
||||||
|
self._raise_error(resp['error'], resp['description'])
|
||||||
|
|
||||||
|
return resp
|
||||||
|
|
||||||
|
def _raise_error(self, error, description):
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s - %s' % (self.IE_NAME, error, description),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
medias = self._download_json(
|
medias = self._call_api(
|
||||||
'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
|
'shows/%s/medias' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
qualities = self._call_api(
|
||||||
|
'qualities',
|
||||||
|
video_id, 'Downloading qualities JSON')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for fmt in medias['fr']['video_list']['default']['quality_list']:
|
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
|
||||||
format_id = fmt['quality_key']
|
|
||||||
|
|
||||||
file = self._download_json(
|
video = self._call_api(
|
||||||
'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
|
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||||
video_id, 'Downloading %s video JSON' % format_id)
|
video_id, 'Downloading %s video JSON' % format_id)
|
||||||
|
|
||||||
file_url = file['file']
|
file_url = video['file']
|
||||||
if not file_url:
|
if not file_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if file_url == 'forbidden':
|
if file_url in ['forbidden', 'not found']:
|
||||||
raise ExtractorError(
|
popmessage = video['popmessage']
|
||||||
'%s returned error: %s - %s' % (
|
self._raise_error(popmessage['title'], popmessage['message'])
|
||||||
self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': file_url,
|
'url': file_url,
|
||||||
@ -91,20 +113,31 @@ class NocoIE(InfoExtractor):
|
|||||||
'abr': fmt['audiobitrate'],
|
'abr': fmt['audiobitrate'],
|
||||||
'vbr': fmt['videobitrate'],
|
'vbr': fmt['videobitrate'],
|
||||||
'filesize': fmt['filesize'],
|
'filesize': fmt['filesize'],
|
||||||
'format_note': fmt['quality_name'],
|
'format_note': qualities[format_id]['quality_name'],
|
||||||
'preference': fmt['priority'],
|
'preference': qualities[format_id]['priority'],
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
show = self._download_json(
|
show = self._call_api(
|
||||||
'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
|
'shows/by_id/%s' % video_id,
|
||||||
|
video_id, 'Downloading show JSON')[0]
|
||||||
|
|
||||||
upload_date = unified_strdate(show['indexed'])
|
upload_date = unified_strdate(show['online_date_start_utc'])
|
||||||
uploader = show['partner_name']
|
uploader = show['partner_name']
|
||||||
uploader_id = show['partner_key']
|
uploader_id = show['partner_key']
|
||||||
duration = show['duration_ms'] / 1000.0
|
duration = show['duration_ms'] / 1000.0
|
||||||
thumbnail = show['screenshot']
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail_key, thumbnail_url in show.items():
|
||||||
|
m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int(m.group('width')),
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
})
|
||||||
|
|
||||||
episode = show.get('show_TT') or show.get('show_OT')
|
episode = show.get('show_TT') or show.get('show_OT')
|
||||||
family = show.get('family_TT') or show.get('family_OT')
|
family = show.get('family_TT') or show.get('family_OT')
|
||||||
@ -124,7 +157,7 @@ class NocoIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnails': thumbnails,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
|
@ -8,11 +8,11 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
|
_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
|
||||||
_find = lambda el, p: el.find(_x(p)).text.strip()
|
|
||||||
|
|
||||||
|
|
||||||
class NosVideoIE(InfoExtractor):
|
class NosVideoIE(InfoExtractor):
|
||||||
@ -53,9 +53,15 @@ class NosVideoIE(InfoExtractor):
|
|||||||
playlist = self._download_xml(playlist_url, video_id)
|
playlist = self._download_xml(playlist_url, video_id)
|
||||||
|
|
||||||
track = playlist.find(_x('.//xspf:track'))
|
track = playlist.find(_x('.//xspf:track'))
|
||||||
title = _find(track, './xspf:title')
|
if track is None:
|
||||||
url = _find(track, './xspf:file')
|
raise ExtractorError(
|
||||||
thumbnail = _find(track, './xspf:image')
|
'XML playlist is missing the \'track\' element',
|
||||||
|
expected=True)
|
||||||
|
title = xpath_text(track, _x('./xspf:title'), 'title')
|
||||||
|
url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True)
|
||||||
|
thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail')
|
||||||
|
if title is not None:
|
||||||
|
title = title.strip()
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
|
@ -5,7 +5,9 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
|
url_basename,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -13,7 +15,8 @@ class NPOIE(InfoExtractor):
|
|||||||
IE_NAME = 'npo.nl'
|
IE_NAME = 'npo.nl'
|
||||||
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
|
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
|
{
|
||||||
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
|
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
|
||||||
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
|
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -23,12 +26,39 @@ class NPOIE(InfoExtractor):
|
|||||||
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
|
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
|
||||||
'upload_date': '20140622',
|
'upload_date': '20140622',
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
|
||||||
|
'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VARA_101191800',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'De Mega Mike & Mega Thomas show',
|
||||||
|
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
|
||||||
|
'upload_date': '20090227',
|
||||||
|
'duration': 2400,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
|
||||||
|
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VPWON_1169289',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'Tegenlicht',
|
||||||
|
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
|
||||||
|
'upload_date': '20130225',
|
||||||
|
'duration': 3000,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
return self._get_info(video_id)
|
||||||
|
|
||||||
|
def _get_info(self, video_id):
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
|
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
|
||||||
video_id,
|
video_id,
|
||||||
@ -43,19 +73,28 @@ class NPOIE(InfoExtractor):
|
|||||||
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std'])
|
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
||||||
for format_id in metadata['pubopties']:
|
for format_id in metadata['pubopties']:
|
||||||
streams_info = self._download_json(
|
format_info = self._download_json(
|
||||||
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
|
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
|
||||||
video_id, 'Downloading %s streams info' % format_id)
|
video_id, 'Downloading %s JSON' % format_id)
|
||||||
stream_info = self._download_json(
|
if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
|
||||||
streams_info['streams'][0] + '&type=json',
|
continue
|
||||||
video_id, 'Downloading %s stream info' % format_id)
|
streams = format_info.get('streams')
|
||||||
|
if streams:
|
||||||
|
video_info = self._download_json(
|
||||||
|
streams[0] + '&type=json',
|
||||||
|
video_id, 'Downloading %s stream JSON' % format_id)
|
||||||
|
else:
|
||||||
|
video_info = format_info
|
||||||
|
video_url = video_info.get('url')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
if format_id == 'adaptive':
|
if format_id == 'adaptive':
|
||||||
formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id))
|
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': stream_info['url'],
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': quality(format_id),
|
'quality': quality(format_id),
|
||||||
})
|
})
|
||||||
@ -65,7 +104,35 @@ class NPOIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': metadata['titel'],
|
'title': metadata['titel'],
|
||||||
'description': metadata['info'],
|
'description': metadata['info'],
|
||||||
'thumbnail': metadata['images'][-1]['url'],
|
'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
|
||||||
'upload_date': unified_strdate(metadata['gidsdatum']),
|
'upload_date': unified_strdate(metadata.get('gidsdatum')),
|
||||||
|
'duration': parse_duration(metadata.get('tijdsduur')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TegenlichtVproIE(NPOIE):
|
||||||
|
IE_NAME = 'tegenlicht.vpro.nl'
|
||||||
|
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
|
||||||
|
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VPWON_1169289',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'Tegenlicht',
|
||||||
|
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
|
||||||
|
'upload_date': '20130225',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
name = url_basename(url)
|
||||||
|
webpage = self._download_webpage(url, name)
|
||||||
|
urn = self._html_search_meta('mediaurn', webpage)
|
||||||
|
info_page = self._download_json(
|
||||||
|
'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
|
||||||
|
return self._get_info(info_page['mid'])
|
||||||
|
47
youtube_dl/extractor/oktoberfesttv.py
Normal file
47
youtube_dl/extractor/oktoberfesttv.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class OktoberfestTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hb-zelt',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._live_title(self._html_search_regex(
|
||||||
|
r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title'))
|
||||||
|
|
||||||
|
clip = self._search_regex(
|
||||||
|
r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip')
|
||||||
|
ncurl = self._search_regex(
|
||||||
|
r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base')
|
||||||
|
video_url = ncurl + clip
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage,
|
||||||
|
'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'is_live': True,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
US_RATINGS,
|
US_RATINGS,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -13,8 +14,8 @@ class PBSIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
# Direct video URL
|
# Direct video URL
|
||||||
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||||
# Article with embedded player
|
# Article with embedded player (or direct video)
|
||||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
||||||
# Player
|
# Player
|
||||||
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
|
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
|
||||||
)
|
)
|
||||||
@ -65,10 +66,31 @@ class PBSIE(InfoExtractor):
|
|||||||
'duration': 6559,
|
'duration': 6559,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
|
||||||
|
'md5': '908f3e5473a693b266b84e25e1cf9703',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2365160389',
|
||||||
|
'display_id': 'killer-typhoon',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:c741d14e979fc53228c575894094f157',
|
||||||
|
'title': 'Killer Typhoon',
|
||||||
|
'duration': 3172,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20140122',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'united-states-of-secrets',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _extract_ids(self, url):
|
def _extract_webpage(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
presumptive_id = mobj.group('presumptive_id')
|
presumptive_id = mobj.group('presumptive_id')
|
||||||
@ -76,15 +98,26 @@ class PBSIE(InfoExtractor):
|
|||||||
if presumptive_id:
|
if presumptive_id:
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
|
||||||
|
webpage, 'upload date', default=None))
|
||||||
|
|
||||||
|
# tabbed frontline videos
|
||||||
|
tabbed_videos = re.findall(
|
||||||
|
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
|
||||||
|
if tabbed_videos:
|
||||||
|
return tabbed_videos, presumptive_id, upload_date
|
||||||
|
|
||||||
MEDIA_ID_REGEXES = [
|
MEDIA_ID_REGEXES = [
|
||||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||||
r'class="coveplayerid">([^<]+)<', # coveplayer
|
r'class="coveplayerid">([^<]+)<', # coveplayer
|
||||||
|
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||||
]
|
]
|
||||||
|
|
||||||
media_id = self._search_regex(
|
media_id = self._search_regex(
|
||||||
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
|
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
|
||||||
if media_id:
|
if media_id:
|
||||||
return media_id, presumptive_id
|
return media_id, presumptive_id, upload_date
|
||||||
|
|
||||||
url = self._search_regex(
|
url = self._search_regex(
|
||||||
r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||||
@ -104,10 +137,16 @@ class PBSIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
display_id = video_id
|
display_id = video_id
|
||||||
|
|
||||||
return video_id, display_id
|
return video_id, display_id, None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, display_id = self._extract_ids(url)
|
video_id, display_id, upload_date = self._extract_webpage(url)
|
||||||
|
|
||||||
|
if isinstance(video_id, list):
|
||||||
|
entries = [self.url_result(
|
||||||
|
'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id)
|
||||||
|
for vid_id in video_id]
|
||||||
|
return self.playlist_result(entries, display_id)
|
||||||
|
|
||||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||||
info = self._download_json(info_url, display_id)
|
info = self._download_json(info_url, display_id)
|
||||||
@ -119,6 +158,7 @@ class PBSIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'url': info['alternate_encoding']['url'],
|
'url': info['alternate_encoding']['url'],
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -126,4 +166,5 @@ class PBSIE(InfoExtractor):
|
|||||||
'thumbnail': info.get('image_url'),
|
'thumbnail': info.get('image_url'),
|
||||||
'duration': info.get('duration'),
|
'duration': info.get('duration'),
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
60
youtube_dl/extractor/planetaplay.py
Normal file
60
youtube_dl/extractor/planetaplay.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class PlanetaPlayIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?planetaplay\.com/\?sng=(?P<id>[0-9]+)'
|
||||||
|
_API_URL = 'http://planetaplay.com/action/playlist/?sng={0:}'
|
||||||
|
_THUMBNAIL_URL = 'http://planetaplay.com/img/thumb/{thumb:}'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://planetaplay.com/?sng=3586',
|
||||||
|
'md5': '9d569dceb7251a4e01355d5aea60f9db',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3586',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'md5:e829428ee28b1deed00de90de49d1da1',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_SONG_FORMATS = {
|
||||||
|
'lq': (0, 'http://www.planetaplay.com/videoplayback/{med_hash:}'),
|
||||||
|
'hq': (1, 'http://www.planetaplay.com/videoplayback/hi/{med_hash:}'),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
response = self._download_json(
|
||||||
|
self._API_URL.format(video_id), video_id)['response']
|
||||||
|
try:
|
||||||
|
data = response.get('data')[0]
|
||||||
|
except IndexError:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s: failed to get the playlist' % self.IE_NAME, expected=True)
|
||||||
|
|
||||||
|
title = '{song_artists:} - {sng_name:}'.format(**data)
|
||||||
|
thumbnail = self._THUMBNAIL_URL.format(**data)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, (quality, url_template) in self._SONG_FORMATS.items():
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': url_template.format(**data),
|
||||||
|
'quality': quality,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
55
youtube_dl/extractor/played.py
Normal file
55
youtube_dl/extractor/played.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import os.path
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlayedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'played.to'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://played.to/j2f2sfiiukgt',
|
||||||
|
'md5': 'c2bd75a368e82980e7257bf500c00637',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'j2f2sfiiukgt',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'youtube-dl_test_video.mp4',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
orig_webpage = self._download_webpage(url, video_id)
|
||||||
|
fields = re.findall(
|
||||||
|
r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
|
||||||
|
data = dict(fields)
|
||||||
|
|
||||||
|
self._sleep(2, video_id)
|
||||||
|
|
||||||
|
post = compat_urllib_parse.urlencode(data)
|
||||||
|
headers = {
|
||||||
|
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
req = compat_urllib_request.Request(url, post, headers)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
req, video_id, note='Downloading video page ...')
|
||||||
|
|
||||||
|
title = os.path.splitext(data['fname'])[0]
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'file: "?(.+?)",', webpage, 'video URL')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
}
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -29,6 +30,7 @@ class PlayFMIE(InfoExtractor):
|
|||||||
'duration': 5627.428,
|
'duration': 5627.428,
|
||||||
'upload_date': '20140712',
|
'upload_date': '20140712',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -51,7 +53,8 @@ class PlayFMIE(InfoExtractor):
|
|||||||
|
|
||||||
recording = rec_doc.find('./recording')
|
recording = rec_doc.find('./recording')
|
||||||
title = recording.find('./title').text
|
title = recording.find('./title').text
|
||||||
view_count = int_or_none(recording.find('./stats/playcount').text)
|
view_count = str_to_int(recording.find('./stats/playcount').text)
|
||||||
|
comment_count = str_to_int(recording.find('./stats/comments').text)
|
||||||
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
||||||
thumbnail = recording.find('./image').text
|
thumbnail = recording.find('./image').text
|
||||||
|
|
||||||
@ -75,6 +78,7 @@ class PlayFMIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
@ -4,19 +4,27 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
qualities,
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PornHdIE(InfoExtractor):
|
class PornHdIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)'
|
_VALID_URL = r'http://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||||
'md5': '956b8ca569f7f4d8ec563e2c41598441',
|
'md5': '956b8ca569f7f4d8ec563e2c41598441',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1962',
|
'id': '1962',
|
||||||
|
'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Sierra loves doing laundry',
|
'title': 'Sierra loves doing laundry',
|
||||||
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'view_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -24,8 +32,9 @@ class PornHdIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id or video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>(.+) porn HD.+?</title>', webpage, 'title')
|
r'<title>(.+) porn HD.+?</title>', webpage, 'title')
|
||||||
@ -33,38 +42,21 @@ class PornHdIE(InfoExtractor):
|
|||||||
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'(\d+) views\s*</span>', webpage, 'view count', fatal=False))
|
r'(\d+) views\s*</span>', webpage, 'view count', fatal=False))
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
videos = re.findall(
|
quality = qualities(['SD', 'HD'])
|
||||||
r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
|
formats = [{
|
||||||
|
'url': source['file'],
|
||||||
mobj = re.search(r'flashVars = (?P<flashvars>{.+?});', webpage)
|
'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])),
|
||||||
if mobj:
|
'quality': quality(source['label']),
|
||||||
flashvars = json.loads(mobj.group('flashvars'))
|
} for source in json.loads(js_to_json(self._search_regex(
|
||||||
for key, quality in [('hashlink', 'low'), ('hd', 'high')]:
|
r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))]
|
||||||
redirect_url = flashvars.get(key)
|
|
||||||
if redirect_url:
|
|
||||||
videos.append(('flv', quality, redirect_url))
|
|
||||||
thumbnail = flashvars['urlWallpaper']
|
|
||||||
else:
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format_, quality, redirect_url in videos:
|
|
||||||
format_id = '%s-%s' % (format_.lower(), quality.lower())
|
|
||||||
video_url = self._download_webpage(
|
|
||||||
redirect_url, video_id, 'Downloading %s video link' % format_id, fatal=False)
|
|
||||||
if not video_url:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'ext': format_.lower(),
|
|
||||||
'format_id': format_id,
|
|
||||||
'quality': 1 if quality.lower() == 'high' else 0,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -144,7 +144,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||||||
'id': '2156342',
|
'id': '2156342',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Kurztrips zum Valentinstag',
|
'title': 'Kurztrips zum Valentinstag',
|
||||||
'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528',
|
'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.',
|
||||||
'duration': 307.24,
|
'duration': 307.24,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -180,12 +180,10 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
|
||||||
|
|
||||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, page, 'clip id')
|
|
||||||
|
|
||||||
access_token = 'testclient'
|
access_token = 'testclient'
|
||||||
client_name = 'kolibri-1.2.5'
|
client_name = 'kolibri-1.2.5'
|
||||||
@ -234,12 +232,12 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||||||
|
|
||||||
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
|
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
|
||||||
|
|
||||||
title = self._html_search_regex(self._TITLE_REGEXES, page, 'title')
|
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
|
||||||
description = self._html_search_regex(self._DESCRIPTION_REGEXES, page, 'description', fatal=False)
|
description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
|
||||||
thumbnail = self._og_search_thumbnail(page)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
self._UPLOAD_DATE_REGEXES, page, 'upload date', default=None))
|
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class SBSIE(InfoExtractor):
|
class SBSIE(InfoExtractor):
|
||||||
IE_DESC = 'sbs.com.au'
|
IE_DESC = 'sbs.com.au'
|
||||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
|
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Original URL is handled by the generic IE which finds the iframe:
|
# Original URL is handled by the generic IE which finds the iframe:
|
||||||
@ -21,12 +21,16 @@ class SBSIE(InfoExtractor):
|
|||||||
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '320403011771',
|
'id': '320403011771',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Dingo Conservation',
|
'title': 'Dingo Conservation',
|
||||||
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
|
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
'add_ies': ['generic'],
|
'add_ies': ['generic'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
117
youtube_dl/extractor/sexykarma.py
Normal file
117
youtube_dl/extractor/sexykarma.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SexyKarmaIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Sexy Karma and Watch Indian Porn'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
|
||||||
|
'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'yHI70cOyIHt',
|
||||||
|
'display_id': 'taking-a-quick-pee',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Taking a quick pee.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'wildginger7',
|
||||||
|
'upload_date': '20141007',
|
||||||
|
'duration': 22,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'categories': list,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
||||||
|
'md5': 'dd216c68d29b49b12842b9babe762a5d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8Id6EZPbuHf',
|
||||||
|
'display_id': 'pot-pixie-tribute',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'pot_pixie tribute',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'banffite',
|
||||||
|
'upload_date': '20141013',
|
||||||
|
'duration': 16,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'categories': list,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
|
||||||
|
'md5': '9afb80675550406ed9a63ac2819ef69d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dW2mtctxJfs',
|
||||||
|
'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Don',
|
||||||
|
'upload_date': '20140213',
|
||||||
|
'duration': 83,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'categories': list,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r"url: escape\('([^']+)'\)", webpage, 'url')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h2 class="he2"><span>(.*?)</span>',
|
||||||
|
webpage, 'title')
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<span id="container"><img\s+src="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'class="aupa">\s*(.*?)</a>',
|
||||||
|
webpage, 'uploader')
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
comment_count = int_or_none(self._search_regex(
|
||||||
|
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||||
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
|
categories = re.findall(
|
||||||
|
r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'categories': categories,
|
||||||
|
}
|
@ -31,7 +31,8 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
(?!sets/|likes/?(?:$|[?#]))
|
(?!sets/|likes/?(?:$|[?#]))
|
||||||
(?P<title>[\w\d-]+)/?
|
(?P<title>[\w\d-]+)/?
|
||||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||||
|
(?:/?\?secret_token=(?P<secret_token>[^&]+?))?$)
|
||||||
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
|
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
@ -80,6 +81,20 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'duration': 9,
|
'duration': 9,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# private link (alt format)
|
||||||
|
{
|
||||||
|
'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
|
||||||
|
'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '123998367',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||||
|
'uploader': 'jaimeMF',
|
||||||
|
'description': 'test chars: \"\'/\\ä↭',
|
||||||
|
'upload_date': '20131209',
|
||||||
|
'duration': 9,
|
||||||
|
},
|
||||||
|
},
|
||||||
# downloadable song
|
# downloadable song
|
||||||
{
|
{
|
||||||
'url': 'https://soundcloud.com/oddsamples/bus-brakes',
|
'url': 'https://soundcloud.com/oddsamples/bus-brakes',
|
||||||
@ -197,6 +212,9 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
if track_id is not None:
|
if track_id is not None:
|
||||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||||
full_title = track_id
|
full_title = track_id
|
||||||
|
token = mobj.group('secret_token')
|
||||||
|
if token:
|
||||||
|
info_json_url += "&secret_token=" + token
|
||||||
elif mobj.group('player'):
|
elif mobj.group('player'):
|
||||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
return self.url_result(query['url'][0])
|
return self.url_result(query['url'][0])
|
||||||
@ -220,7 +238,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class SoundcloudSetIE(SoundcloudIE):
|
class SoundcloudSetIE(SoundcloudIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
|
||||||
IE_NAME = 'soundcloud:set'
|
IE_NAME = 'soundcloud:set'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
||||||
@ -234,14 +252,19 @@ class SoundcloudSetIE(SoundcloudIE):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
# extract uploader (which is in the url)
|
# extract uploader (which is in the url)
|
||||||
uploader = mobj.group(1)
|
uploader = mobj.group('uploader')
|
||||||
# extract simple title (uploader + slug of song title)
|
# extract simple title (uploader + slug of song title)
|
||||||
slug_title = mobj.group(2)
|
slug_title = mobj.group('slug_title')
|
||||||
full_title = '%s/sets/%s' % (uploader, slug_title)
|
full_title = '%s/sets/%s' % (uploader, slug_title)
|
||||||
|
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||||
|
|
||||||
|
token = mobj.group('token')
|
||||||
|
if token:
|
||||||
|
full_title += '/' + token
|
||||||
|
url += '/' + token
|
||||||
|
|
||||||
self.report_resolve(full_title)
|
self.report_resolve(full_title)
|
||||||
|
|
||||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
|
||||||
resolv_url = self._resolv_url(url)
|
resolv_url = self._resolv_url(url)
|
||||||
info = self._download_json(resolv_url, full_title)
|
info = self._download_json(resolv_url, full_title)
|
||||||
|
|
||||||
@ -252,7 +275,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'entries': [self._extract_info_dict(track) for track in info['tracks']],
|
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
|
||||||
'id': info['id'],
|
'id': info['id'],
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
}
|
}
|
||||||
@ -315,11 +338,9 @@ class SoundcloudUserIE(SoundcloudIE):
|
|||||||
|
|
||||||
|
|
||||||
class SoundcloudPlaylistIE(SoundcloudIE):
|
class SoundcloudPlaylistIE(SoundcloudIE):
|
||||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||||
IE_NAME = 'soundcloud:playlist'
|
IE_NAME = 'soundcloud:playlist'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
|
|
||||||
{
|
|
||||||
'url': 'http://api.soundcloud.com/playlists/4110309',
|
'url': 'http://api.soundcloud.com/playlists/4110309',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4110309',
|
'id': '4110309',
|
||||||
@ -327,22 +348,28 @@ class SoundcloudPlaylistIE(SoundcloudIE):
|
|||||||
'description': 're:.*?TILT Brass - Bowery Poetry Club',
|
'description': 're:.*?TILT Brass - Bowery Poetry Club',
|
||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
}
|
}]
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
playlist_id = mobj.group('id')
|
playlist_id = mobj.group('id')
|
||||||
base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
|
base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
|
||||||
|
|
||||||
data = compat_urllib_parse.urlencode({
|
data_dict = {
|
||||||
'client_id': self._CLIENT_ID,
|
'client_id': self._CLIENT_ID,
|
||||||
})
|
}
|
||||||
|
token = mobj.group('token')
|
||||||
|
|
||||||
|
if token:
|
||||||
|
data_dict['secret_token'] = token
|
||||||
|
|
||||||
|
data = compat_urllib_parse.urlencode(data_dict)
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
base_url + data, playlist_id, 'Downloading playlist')
|
base_url + data, playlist_id, 'Downloading playlist')
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self._extract_info_dict(t, quiet=True) for t in data['tracks']]
|
self._extract_info_dict(t, quiet=True, secret_token=token)
|
||||||
|
for t in data['tracks']]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
@ -9,7 +9,6 @@ from ..utils import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
int_or_none,
|
|
||||||
)
|
)
|
||||||
from ..aes import aes_decrypt_text
|
from ..aes import aes_decrypt_text
|
||||||
|
|
||||||
@ -40,31 +39,42 @@ class SpankwireIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
title = self._html_search_regex(
|
||||||
|
r'<h1>([^<]+)', webpage, 'title')
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
|
r'<div\s+id="descriptionContent">([^<]+)<',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
|
r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
|
r'by:\s*<a [^>]*>(.+?)</a>',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
|
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"',
|
||||||
upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
if upload_date:
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
upload_date = unified_strdate(upload_date)
|
r'</a> on (.+?) at \d+:\d+',
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
view_count = self._html_search_regex(
|
view_count = str_to_int(self._html_search_regex(
|
||||||
r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
|
r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
|
||||||
if view_count:
|
webpage, 'view count', fatal=False))
|
||||||
view_count = str_to_int(view_count)
|
comment_count = str_to_int(self._html_search_regex(
|
||||||
comment_count = int_or_none(self._html_search_regex(
|
r'Comments<span[^>]+>\s*\(([\d,\.]+)\)</span>',
|
||||||
r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
|
webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
video_urls = list(map(
|
||||||
|
compat_urllib_parse.unquote,
|
||||||
|
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage)))
|
||||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||||
password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ')
|
password = self._html_search_regex(
|
||||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
r'flashvars\.video_title = "([^"]+)',
|
||||||
|
webpage, 'password').replace('+', ' ')
|
||||||
|
video_urls = list(map(
|
||||||
|
lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'),
|
||||||
|
video_urls))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_url in video_urls:
|
for video_url in video_urls:
|
||||||
|
92
youtube_dl/extractor/sport5.py
Normal file
92
youtube_dl/extractor/sport5.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class Sport5IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's5-Y59xx1-GUh2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ולנסיה-קורדובה 0:3',
|
||||||
|
'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה',
|
||||||
|
'duration': 228,
|
||||||
|
'categories': list,
|
||||||
|
},
|
||||||
|
'skip': 'Blocked outside of Israel',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's5-SiXxx1-hKh2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'GOALS_CELTIC_270914.mp4',
|
||||||
|
'description': '',
|
||||||
|
'duration': 87,
|
||||||
|
'categories': list,
|
||||||
|
},
|
||||||
|
'skip': 'Blocked outside of Israel',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
media_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, media_id)
|
||||||
|
|
||||||
|
video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id')
|
||||||
|
|
||||||
|
metadata = self._download_xml(
|
||||||
|
'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
error = metadata.find('./Error')
|
||||||
|
if error is not None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s - %s' % (
|
||||||
|
self.IE_NAME,
|
||||||
|
error.find('./Name').text,
|
||||||
|
error.find('./Description').text),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
title = metadata.find('./Title').text
|
||||||
|
description = metadata.find('./Description').text
|
||||||
|
duration = int(metadata.find('./Duration').text)
|
||||||
|
|
||||||
|
posters_el = metadata.find('./PosterLinks')
|
||||||
|
thumbnails = [{
|
||||||
|
'url': thumbnail.text,
|
||||||
|
'width': int(thumbnail.get('width')),
|
||||||
|
'height': int(thumbnail.get('height')),
|
||||||
|
} for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else []
|
||||||
|
|
||||||
|
categories_el = metadata.find('./Categories')
|
||||||
|
categories = [
|
||||||
|
cat.get('name') for cat in categories_el.findall('./Category')
|
||||||
|
] if categories_el is not None else []
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': fmt.text,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'vbr': int(fmt.get('bitrate')),
|
||||||
|
'width': int(fmt.get('width')),
|
||||||
|
'height': int(fmt.get('height')),
|
||||||
|
} for fmt in metadata.findall('./PlaybackLinks/FileURL')]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'duration': duration,
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
81
youtube_dl/extractor/sportbox.py
Normal file
81
youtube_dl/extractor/sportbox.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SportBoxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://news\.sportbox\.ru/Vidy_sporta/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S',
|
||||||
|
'md5': 'ff56a598c2cf411a9a38a69709e97079',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '80822',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн',
|
||||||
|
'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1411896237,
|
||||||
|
'upload_date': '20140928',
|
||||||
|
'duration': 4846,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'src="/vdl/player/media/(\d+)"', webpage, 'video id')
|
||||||
|
|
||||||
|
player = self._download_webpage(
|
||||||
|
'http://news.sportbox.ru/vdl/player/media/%s' % video_id,
|
||||||
|
display_id, 'Downloading player webpage')
|
||||||
|
|
||||||
|
hls = self._search_regex(
|
||||||
|
r"var\s+original_hls_file\s*=\s*'([^']+)'", player, 'hls file')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(hls, display_id, 'mp4')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1 itemprop="name">([^<]+)</h1>', webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div itemprop="description">(.+?)</div>', webpage, 'description', fatal=False)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
|
r'<span itemprop="uploadDate">([^<]+)</span>', webpage, 'timestamp', fatal=False))
|
||||||
|
duration = parse_duration(self._html_search_regex(
|
||||||
|
r'<meta itemprop="duration" content="PT([^"]+)">', webpage, 'duration', fatal=False))
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'<span>Просмотров: (\d+)</span>', player, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -17,11 +17,11 @@ class SportDeutschlandIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
|
'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
|
||||||
'categories': ['Badminton'],
|
'categories': ['Badminton'],
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE',
|
'description': 're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV',
|
||||||
'timestamp': int,
|
'timestamp': int,
|
||||||
'upload_date': 're:^201408[23][0-9]$',
|
'upload_date': 're:^201408[23][0-9]$',
|
||||||
},
|
},
|
||||||
|
@ -39,10 +39,10 @@ class SunPornoIE(InfoExtractor):
|
|||||||
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._search_regex(
|
||||||
r'<span>Duration: (\d+:\d+)</span>', webpage, 'duration', fatal=False))
|
r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'<span class="views">(\d+)</span>', webpage, 'view count', fatal=False))
|
r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False))
|
||||||
comment_count = int_or_none(self._html_search_regex(
|
comment_count = int_or_none(self._html_search_regex(
|
||||||
r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
|
r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user