Compare commits
472 Commits
2015.02.19
...
2015.04.03
Author | SHA1 | Date | |
---|---|---|---|
|
a9cbab1735 | ||
|
6b7556a554 | ||
|
a3c7019e06 | ||
|
416b9c29f7 | ||
|
2ec8e04cac | ||
|
e03bfb30ce | ||
|
f5b669113f | ||
|
d08225edf4 | ||
|
8075d4f99d | ||
|
1a944d8a2a | ||
|
7cf02b6619 | ||
|
55cde6ef3c | ||
|
69c3af567d | ||
|
60e1fe0079 | ||
|
4669393070 | ||
|
ce3bfe5d57 | ||
|
2a0c2ca2b8 | ||
|
c89fbfb385 | ||
|
facecb84a1 | ||
|
ed06e9949b | ||
|
e15307a612 | ||
|
5cbb2699ee | ||
|
a2edf2e7ff | ||
|
1d31e7a2fc | ||
|
a2a4d5fa31 | ||
|
a28ccbabc6 | ||
|
edd7344820 | ||
|
c808ef81bb | ||
|
fd203fe357 | ||
|
5bb7ab9928 | ||
|
87270c8416 | ||
|
ebc2f7a2db | ||
|
7700207ec7 | ||
|
4d5d14f5cf | ||
|
72b249bf1f | ||
|
9b4774b21b | ||
|
2ddf083588 | ||
|
8343a03357 | ||
|
ad320e9b83 | ||
|
ecb750a446 | ||
|
5f88e02818 | ||
|
616af2f4b9 | ||
|
5a3b315b5f | ||
|
b7a2268e7b | ||
|
20d729228c | ||
|
af8c93086c | ||
|
79fd11ab8e | ||
|
cb88671e37 | ||
|
ff79552f13 | ||
|
643fe72717 | ||
|
4747e2183a | ||
|
c59e701e35 | ||
|
8e678af4ba | ||
|
70a1165b32 | ||
|
af14000215 | ||
|
998e6cdba0 | ||
|
2315fb5e5f | ||
|
157e9e5aa5 | ||
|
c496ec0848 | ||
|
15b67a268a | ||
|
31c4809827 | ||
|
ac0df2350a | ||
|
223b27f46c | ||
|
425142be60 | ||
|
7e17ec8c71 | ||
|
448830ce7b | ||
|
8896b614a9 | ||
|
a7fce980ad | ||
|
91757b0f37 | ||
|
fbfcc2972b | ||
|
db40364b87 | ||
|
094ce39c45 | ||
|
ae67d082fe | ||
|
8f76df7f37 | ||
|
5c19d18cbf | ||
|
838b93405b | ||
|
2676caf344 | ||
|
48c971e073 | ||
|
f5e2efbbf0 | ||
|
5d1f0e607b | ||
|
b0872c19ea | ||
|
9f790b9901 | ||
|
c41a2ec4af | ||
|
32d687f55e | ||
|
93f787070f | ||
|
f9544f6e8f | ||
|
336d19044c | ||
|
7866c9e173 | ||
|
1a4123de04 | ||
|
cf2e2eb1c0 | ||
|
2051acdeb2 | ||
|
cefdf970cc | ||
|
a1d0aa7b88 | ||
|
49aeedb8cb | ||
|
ef249a2cd7 | ||
|
a09141548a | ||
|
5379a2d40d | ||
|
c9450c7ab1 | ||
|
faa1b5c292 | ||
|
393d9fc6d2 | ||
|
4e6a228689 | ||
|
179d6678b1 | ||
|
85698c5086 | ||
|
a7d9ded45d | ||
|
531980d89c | ||
|
1887ecd4d6 | ||
|
cd32c2caba | ||
|
1c9a1457fc | ||
|
038b0eb1da | ||
|
f20bf146e2 | ||
|
01218f919b | ||
|
2684871bc1 | ||
|
ccf3960eec | ||
|
eecc0685c9 | ||
|
2ed849eccf | ||
|
3378d67a18 | ||
|
f3c0c667a6 | ||
|
0ae8bbac2d | ||
|
cbc3cfcab4 | ||
|
b30ef07c6c | ||
|
73900846b1 | ||
|
d1dc7e3991 | ||
|
3073a6d5e9 | ||
|
aae53774f2 | ||
|
7a757b7194 | ||
|
fa8ce26904 | ||
|
2c2c06e359 | ||
|
ee580538fa | ||
|
c3c5c31517 | ||
|
ed9a25dd61 | ||
|
9ef4f12b53 | ||
|
84f8101606 | ||
|
b1337948eb | ||
|
98f02fdde2 | ||
|
048fdc2292 | ||
|
2ca1c5aa9f | ||
|
674fb0fcc5 | ||
|
00bfe40e4d | ||
|
cd459b1d49 | ||
|
92a4793b3c | ||
|
dc03a42537 | ||
|
219da6bb68 | ||
|
0499cd866e | ||
|
13047f4135 | ||
|
af69cab21d | ||
|
d41a3fa1b4 | ||
|
733be371af | ||
|
576904bce6 | ||
|
cf47794f09 | ||
|
c06a9f8730 | ||
|
2e90dff2c2 | ||
|
90183a46d8 | ||
|
b68eedba23 | ||
|
d5b559393b | ||
|
1de4ac1385 | ||
|
39aa42ffbb | ||
|
ec1b9577ba | ||
|
3b4444f99a | ||
|
613b2d9dc6 | ||
|
8f4cc22455 | ||
|
7c42327e0e | ||
|
873383e9bd | ||
|
8508557e77 | ||
|
4d1652484f | ||
|
88cf6fb368 | ||
|
e7db87f700 | ||
|
2cb434e53e | ||
|
cd65491c30 | ||
|
082b1155a3 | ||
|
9202b1b787 | ||
|
a7e01c438d | ||
|
05be67e77d | ||
|
85741b9986 | ||
|
f247a199fe | ||
|
29171bc2d2 | ||
|
7be5a62ed7 | ||
|
3647136f24 | ||
|
13598940e3 | ||
|
0eb365868e | ||
|
28c6411e49 | ||
|
bba3fc7960 | ||
|
fcd877013e | ||
|
ba1d4c0488 | ||
|
517bcca299 | ||
|
1b53778175 | ||
|
b7a0304d92 | ||
|
545315a985 | ||
|
3f4327520c | ||
|
4a34f69ea6 | ||
|
fb7e68833c | ||
|
486dd09e0b | ||
|
054b99a330 | ||
|
65c5e044c7 | ||
|
11984c7467 | ||
|
3946864c8a | ||
|
b84037013e | ||
|
1dbfc62d75 | ||
|
d7d79106c7 | ||
|
1138491631 | ||
|
71705fa70d | ||
|
602814adab | ||
|
3a77719c5a | ||
|
7e195d0e92 | ||
|
e04793401d | ||
|
a3fbd18824 | ||
|
c6052b8c14 | ||
|
c792b5011f | ||
|
32aaeca775 | ||
|
1593194c63 | ||
|
614a7e1e23 | ||
|
2ebfeacabc | ||
|
f5d8f58a17 | ||
|
937daef4a7 | ||
|
dd77f14c64 | ||
|
c36cbe5a8a | ||
|
41b2194f86 | ||
|
d1e2e8f583 | ||
|
47fe42e1ab | ||
|
4c60393854 | ||
|
f848215dfc | ||
|
dcca581967 | ||
|
d475b3384c | ||
|
dd7831fe94 | ||
|
cc08b11d16 | ||
|
8bba753cca | ||
|
43d6280d0a | ||
|
e5a11a2293 | ||
|
f18ef2d144 | ||
|
1bb5c511a5 | ||
|
d55de57b67 | ||
|
a2aaf4dbc6 | ||
|
bdf6eee0ae | ||
|
8b910bda0c | ||
|
24993e3b39 | ||
|
11101076a1 | ||
|
f838875726 | ||
|
28778d6bae | ||
|
1132eae56d | ||
|
d34e79492d | ||
|
ab205b9dc8 | ||
|
7dcad95d4f | ||
|
8a48223a7b | ||
|
d47ae7f620 | ||
|
135c9c42bf | ||
|
0bf79ac455 | ||
|
98998cded6 | ||
|
14137b5781 | ||
|
a172d96292 | ||
|
23ba76bc0e | ||
|
61e00a9775 | ||
|
d1508cd68d | ||
|
9c85b5376d | ||
|
3c6f245083 | ||
|
f207019ce5 | ||
|
bd05aa4e24 | ||
|
8dc9d361c2 | ||
|
d0e958c71c | ||
|
a0bb7c5593 | ||
|
7feddd9fc7 | ||
|
55969016e9 | ||
|
9609f02e3c | ||
|
5c7495a194 | ||
|
5ee6fc974e | ||
|
c2ebea6580 | ||
|
12a129ec6d | ||
|
f28fe66970 | ||
|
123397317c | ||
|
dc570c4951 | ||
|
22d3628319 | ||
|
50c9949d7a | ||
|
376817c6d4 | ||
|
63fc800057 | ||
|
e0d0572b73 | ||
|
7fde87c77d | ||
|
938c3f65b6 | ||
|
2461f79d2a | ||
|
499bfcbfd0 | ||
|
07490f8017 | ||
|
91410c9bfa | ||
|
a7440261c5 | ||
|
76c73715fb | ||
|
c75f0b361a | ||
|
295df4edb9 | ||
|
562ceab13d | ||
|
2f0f6578c3 | ||
|
30cbd4e0d6 | ||
|
549e58069c | ||
|
7594be85ff | ||
|
3630034609 | ||
|
4e01501bbf | ||
|
1aa5172f56 | ||
|
f7e2ee8fa6 | ||
|
66dc9a3701 | ||
|
31bd39256b | ||
|
003c69a84b | ||
|
0134901108 | ||
|
eee6293d57 | ||
|
8237bec4f0 | ||
|
29cad7ad13 | ||
|
0d103de3b0 | ||
|
a0090691d0 | ||
|
6c87c2eea8 | ||
|
58c2ec6ab3 | ||
|
df5ae3eb16 | ||
|
efda2d7854 | ||
|
e143f5dae9 | ||
|
48218cdb97 | ||
|
e9fade72f3 | ||
|
0f2c0d335b | ||
|
40b077bc7e | ||
|
a931092cb3 | ||
|
bd3749ed69 | ||
|
4ffbf77886 | ||
|
781a7ef60a | ||
|
5b2949ee0b | ||
|
a0d646135a | ||
|
7862ad88b7 | ||
|
f3bff94cf9 | ||
|
0eba1e1782 | ||
|
e3216b82bf | ||
|
da419e2332 | ||
|
0d97ef43be | ||
|
1a2313a6f2 | ||
|
250a9bdfe2 | ||
|
6317a3e9da | ||
|
7ab7c9e932 | ||
|
e129c5bc0d | ||
|
2e241242a3 | ||
|
9724e5d336 | ||
|
63a562f95e | ||
|
5c340b0387 | ||
|
1c6510f57a | ||
|
2a15a98a6a | ||
|
72a406e7aa | ||
|
feccc3ff37 | ||
|
265bfa2c79 | ||
|
8faf9b9b41 | ||
|
84be7c230c | ||
|
3e675fabe0 | ||
|
cd5b4b0bc2 | ||
|
7ef822021b | ||
|
9a48926a57 | ||
|
13cd97f3df | ||
|
183139340b | ||
|
1c69bca258 | ||
|
c10ea454dc | ||
|
9504fc21b5 | ||
|
13d8fbef30 | ||
|
b8988b63a6 | ||
|
5eaaeb7c31 | ||
|
c4f8c453ae | ||
|
6f4ba54079 | ||
|
637570326b | ||
|
37f885650c | ||
|
c8c34ccb20 | ||
|
e765ed3a9c | ||
|
677063594e | ||
|
59c7cbd482 | ||
|
570311610e | ||
|
41b264e77c | ||
|
df4bd0d53f | ||
|
7f09a662a0 | ||
|
4f3b21e1c7 | ||
|
54233c9080 | ||
|
db8e13ef71 | ||
|
5a42414b9c | ||
|
9c665ab72e | ||
|
b665ba6aa6 | ||
|
ec5913b5cd | ||
|
25ac63ed71 | ||
|
99209c2916 | ||
|
1fbaa0a521 | ||
|
3037b91e05 | ||
|
ffdf972b91 | ||
|
459e5fbd5f | ||
|
bfc993cc91 | ||
|
4432db35d9 | ||
|
591ab1dff9 | ||
|
5bca2424bc | ||
|
bd61a9e770 | ||
|
3438e7acd2 | ||
|
09c200acf2 | ||
|
716889cab1 | ||
|
409693984f | ||
|
04e8c11080 | ||
|
80af2b73ab | ||
|
3cc57f9645 | ||
|
a65d4e7f14 | ||
|
b531cfc019 | ||
|
543ec2136b | ||
|
93b5071f73 | ||
|
ddc369f073 | ||
|
1b40dc92eb | ||
|
fcc3e6138b | ||
|
9fe6ef7ab2 | ||
|
c010af6f19 | ||
|
35b7982303 | ||
|
f311cfa231 | ||
|
80970e531b | ||
|
b7bb76df05 | ||
|
98c70d6fc7 | ||
|
ab84349b16 | ||
|
03091e372f | ||
|
4d17184817 | ||
|
e086e0eb6c | ||
|
314368c822 | ||
|
c5181ab410 | ||
|
ea5152cae1 | ||
|
255fca5eea | ||
|
4aeccadf4e | ||
|
93540ee10e | ||
|
8fb3ac3649 | ||
|
77b2986b5b | ||
|
62b013df0d | ||
|
fad6768bd1 | ||
|
a78125f925 | ||
|
a00a8bcc8a | ||
|
1e9a9e167d | ||
|
3da0db62e6 | ||
|
e14ced7918 | ||
|
ab9d02f53b | ||
|
a461a11989 | ||
|
1bd838608f | ||
|
365577f567 | ||
|
50efb383f0 | ||
|
5da6bd0083 | ||
|
5e9a033e6e | ||
|
fb7cb6823e | ||
|
dd0a58f5f0 | ||
|
a21420389e | ||
|
6140baf4e1 | ||
|
8fc642eb5b | ||
|
e66e1a0046 | ||
|
d5c69f1da4 | ||
|
f13b1e7d7f | ||
|
5c8a3f862a | ||
|
8807f1277f | ||
|
a3b9157f49 | ||
|
b88ba05356 | ||
|
b74d505577 | ||
|
9e2d7dca87 | ||
|
d236b37ac9 | ||
|
e880c66bd8 | ||
|
383456aa29 | ||
|
1a13940c8d | ||
|
3d54788495 | ||
|
71d53ace2f | ||
|
f37e3f99f0 | ||
|
bd03ffc16e | ||
|
1ac1af9b47 | ||
|
3bf5705316 | ||
|
1c2528c8a3 | ||
|
4f7cea6c53 | ||
|
afbdd3acc3 | ||
|
01561da142 | ||
|
0af25f784b | ||
|
b9b42f2ea0 | ||
|
311c393838 | ||
|
18c1c42405 | ||
|
37dd5d4629 | ||
|
edab9dbf4d | ||
|
9868ea4936 | ||
|
85920dd01d | ||
|
bd7fe0cf66 | ||
|
48246541da | ||
|
360e1ca5cc | ||
|
a1f2a06b34 | ||
|
c84dd8a90d | ||
|
65469a7f8b | ||
|
6b597516c1 | ||
|
b5857f62e2 | ||
|
a504ced097 |
@@ -2,6 +2,7 @@ language: python
|
|||||||
python:
|
python:
|
||||||
- "2.6"
|
- "2.6"
|
||||||
- "2.7"
|
- "2.7"
|
||||||
|
- "3.2"
|
||||||
- "3.3"
|
- "3.3"
|
||||||
- "3.4"
|
- "3.4"
|
||||||
before_install:
|
before_install:
|
||||||
|
9
AUTHORS
9
AUTHORS
@@ -111,3 +111,12 @@ Paul Hartmann
|
|||||||
Frans de Jonge
|
Frans de Jonge
|
||||||
Robin de Rooij
|
Robin de Rooij
|
||||||
Ryan Schmidt
|
Ryan Schmidt
|
||||||
|
Leslie P. Polzer
|
||||||
|
Duncan Keall
|
||||||
|
Alexander Mamay
|
||||||
|
Devin J. Pohly
|
||||||
|
Eduardo Ferro Aldama
|
||||||
|
Jeff Buchbinder
|
||||||
|
Amish Bhadeshia
|
||||||
|
Joram Schrijver
|
||||||
|
Will W.
|
||||||
|
@@ -18,7 +18,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
|||||||
|
|
||||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||||
|
|
||||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||||
|
|
||||||
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
|
5
Makefile
5
Makefile
@@ -1,7 +1,8 @@
|
|||||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||||
|
find . -name "*.pyc" -delete
|
||||||
|
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
BINDIR ?= $(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
@@ -43,7 +44,7 @@ test:
|
|||||||
ot: offlinetest
|
ot: offlinetest
|
||||||
|
|
||||||
offlinetest: codetest
|
offlinetest: codetest
|
||||||
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
|
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
|
||||||
|
|
||||||
tar: youtube-dl.tar.gz
|
tar: youtube-dl.tar.gz
|
||||||
|
|
||||||
|
436
README.md
436
README.md
@@ -47,209 +47,109 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
# OPTIONS
|
# OPTIONS
|
||||||
-h, --help print this help text and exit
|
-h, --help print this help text and exit
|
||||||
--version print program version and exit
|
--version print program version and exit
|
||||||
-U, --update update this program to latest version. Make
|
-U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
|
||||||
sure that you have sufficient permissions
|
-i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist
|
||||||
(run with sudo if needed)
|
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
|
||||||
-i, --ignore-errors continue on download errors, for example to
|
|
||||||
skip unavailable videos in a playlist
|
|
||||||
--abort-on-error Abort downloading of further videos (in the
|
|
||||||
playlist or the command line) if an error
|
|
||||||
occurs
|
|
||||||
--dump-user-agent display the current browser identification
|
--dump-user-agent display the current browser identification
|
||||||
--list-extractors List all supported extractors and the URLs
|
--list-extractors List all supported extractors and the URLs they would handle
|
||||||
they would handle
|
--extractor-descriptions Output descriptions of all supported extractors
|
||||||
--extractor-descriptions Output descriptions of all supported
|
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
||||||
extractors
|
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
||||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
||||||
example "gvsearch2:" downloads two videos
|
--ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
|
||||||
from google videos for youtube-dl "large
|
in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
|
||||||
apple". Use the value "auto" to let
|
--flat-playlist Do not extract the videos of a playlist, only list them.
|
||||||
youtube-dl guess ("auto_warning" to emit a
|
|
||||||
warning when guessing). "error" just throws
|
|
||||||
an error. The default value "fixup_error"
|
|
||||||
repairs broken URLs, but emits an error if
|
|
||||||
this is not possible instead of searching.
|
|
||||||
--ignore-config Do not read configuration files. When given
|
|
||||||
in the global configuration file /etc
|
|
||||||
/youtube-dl.conf: Do not read the user
|
|
||||||
configuration in ~/.config/youtube-
|
|
||||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
|
||||||
on Windows)
|
|
||||||
--flat-playlist Do not extract the videos of a playlist,
|
|
||||||
only list them.
|
|
||||||
--no-color Do not emit color codes in output.
|
--no-color Do not emit color codes in output.
|
||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
|
||||||
an empty string (--proxy "") for direct
|
|
||||||
connection
|
|
||||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||||
--source-address IP Client-side IP address to bind to
|
--source-address IP Client-side IP address to bind to (experimental)
|
||||||
(experimental)
|
-4, --force-ipv4 Make all connections via IPv4 (experimental)
|
||||||
-4, --force-ipv4 Make all connections via IPv4
|
-6, --force-ipv6 Make all connections via IPv6 (experimental)
|
||||||
(experimental)
|
--cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
|
||||||
-6, --force-ipv6 Make all connections via IPv6
|
not present) is used for the actual downloading. (experimental)
|
||||||
(experimental)
|
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
--playlist-end NUMBER playlist video to end at (default is last)
|
--playlist-end NUMBER playlist video to end at (default is last)
|
||||||
--playlist-items ITEM_SPEC playlist video items to download. Specify
|
--playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
||||||
indices of the videos in the playlist
|
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
||||||
seperated by commas like: "--playlist-items
|
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||||
1,2,5,8" if you want to download videos
|
--match-title REGEX download only matching titles (regex or caseless sub-string)
|
||||||
indexed 1, 2, 5, 8 in the playlist. You can
|
--reject-title REGEX skip download for matching titles (regex or caseless sub-string)
|
||||||
specify range: "--playlist-items
|
|
||||||
1-3,7,10-13", it will download the videos
|
|
||||||
at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
|
||||||
--match-title REGEX download only matching titles (regex or
|
|
||||||
caseless sub-string)
|
|
||||||
--reject-title REGEX skip download for matching titles (regex or
|
|
||||||
caseless sub-string)
|
|
||||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||||
--min-filesize SIZE Do not download any videos smaller than
|
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
|
||||||
SIZE (e.g. 50k or 44.6m)
|
--max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
|
||||||
--max-filesize SIZE Do not download any videos larger than SIZE
|
|
||||||
(e.g. 50k or 44.6m)
|
|
||||||
--date DATE download only videos uploaded in this date
|
--date DATE download only videos uploaded in this date
|
||||||
--datebefore DATE download only videos uploaded on or before
|
--datebefore DATE download only videos uploaded on or before this date (i.e. inclusive)
|
||||||
this date (i.e. inclusive)
|
--dateafter DATE download only videos uploaded on or after this date (i.e. inclusive)
|
||||||
--dateafter DATE download only videos uploaded on or after
|
--min-views COUNT Do not download any videos with less than COUNT views
|
||||||
this date (i.e. inclusive)
|
--max-views COUNT Do not download any videos with more than COUNT views
|
||||||
--min-views COUNT Do not download any videos with less than
|
--match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present,
|
||||||
COUNT views
|
!key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
|
||||||
--max-views COUNT Do not download any videos with more than
|
a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
|
||||||
COUNT views
|
operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
|
||||||
--match-filter FILTER (Experimental) Generic video filter.
|
functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
|
||||||
Specify any key (see help for -o for a list
|
|
||||||
of available keys) to match if the key is
|
|
||||||
present, !key to check if the key is not
|
|
||||||
present,key > NUMBER (like "comment_count >
|
|
||||||
12", also works with >=, <, <=, !=, =) to
|
|
||||||
compare against a number, and & to require
|
|
||||||
multiple matches. Values which are not
|
|
||||||
known are excluded unless you put a
|
|
||||||
question mark (?) after the operator.For
|
|
||||||
example, to only match videos that have
|
|
||||||
been liked more than 100 times and disliked
|
|
||||||
less than 50 times (or the dislike
|
|
||||||
functionality is not available at the given
|
|
||||||
service), but who also have a description,
|
|
||||||
use --match-filter "like_count > 100 &
|
|
||||||
dislike_count <? 50 & description" .
|
dislike_count <? 50 & description" .
|
||||||
--no-playlist If the URL refers to a video and a
|
--no-playlist If the URL refers to a video and a playlist, download only the video.
|
||||||
playlist, download only the video.
|
--yes-playlist If the URL refers to a video and a playlist, download the playlist.
|
||||||
--age-limit YEARS download only videos suitable for the given
|
--age-limit YEARS download only videos suitable for the given age
|
||||||
age
|
--download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
|
||||||
--download-archive FILE Download only videos not listed in the
|
--include-ads Download advertisements as well (experimental)
|
||||||
archive file. Record the IDs of all
|
|
||||||
downloaded videos in it.
|
|
||||||
--include-ads Download advertisements as well
|
|
||||||
(experimental)
|
|
||||||
|
|
||||||
## Download Options:
|
## Download Options:
|
||||||
-r, --rate-limit LIMIT maximum download rate in bytes per second
|
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M)
|
||||||
(e.g. 50K or 4.2M)
|
-R, --retries RETRIES number of retries (default is 10), or "infinite".
|
||||||
-R, --retries RETRIES number of retries (default is 10), or
|
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024)
|
||||||
"infinite".
|
--no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
|
||||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
|
|
||||||
(default is 1024)
|
|
||||||
--no-resize-buffer do not automatically adjust the buffer
|
|
||||||
size. By default, the buffer size is
|
|
||||||
automatically resized from an initial value
|
|
||||||
of SIZE.
|
|
||||||
--playlist-reverse Download playlist videos in reverse order
|
--playlist-reverse Download playlist videos in reverse order
|
||||||
--xattr-set-filesize (experimental) set file xattribute
|
--xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize
|
||||||
ytdl.filesize with expected filesize
|
--hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg.
|
||||||
--hls-prefer-native (experimental) Use the native HLS
|
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
|
||||||
downloader instead of ffmpeg.
|
--external-downloader-args ARGS Give these arguments to the external downloader.
|
||||||
--external-downloader COMMAND (experimental) Use the specified external
|
|
||||||
downloader. Currently supports
|
|
||||||
aria2c,curl,wget
|
|
||||||
|
|
||||||
## Filesystem Options:
|
## Filesystem Options:
|
||||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||||
stdin)
|
|
||||||
--id use only video ID in file name
|
--id use only video ID in file name
|
||||||
-o, --output TEMPLATE output filename template. Use %(title)s to
|
-o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
|
||||||
get the title, %(uploader)s for the
|
nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
|
||||||
uploader name, %(uploader_id)s for the
|
the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"),
|
||||||
uploader nickname if different,
|
%(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
|
||||||
%(autonumber)s to get an automatically
|
%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
|
||||||
incremented number, %(ext)s for the
|
%(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
|
||||||
filename extension, %(format)s for the
|
%(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
|
||||||
format description (like "22 - 1280x720" or
|
Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||||
"HD"), %(format_id)s for the unique id of
|
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
|
||||||
the format (like Youtube's itags: "137"),
|
--restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
|
||||||
%(upload_date)s for the upload date
|
-A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000
|
||||||
(YYYYMMDD), %(extractor)s for the provider
|
-t, --title [deprecated] use title in file name (default)
|
||||||
(youtube, metacafe, etc), %(id)s for the
|
|
||||||
video id, %(playlist_title)s,
|
|
||||||
%(playlist_id)s, or %(playlist)s (=title if
|
|
||||||
present, ID otherwise) for the playlist the
|
|
||||||
video is in, %(playlist_index)s for the
|
|
||||||
position in the playlist. %(height)s and
|
|
||||||
%(width)s for the width and height of the
|
|
||||||
video format. %(resolution)s for a textual
|
|
||||||
description of the resolution of the video
|
|
||||||
format. %% for a literal percent. Use - to
|
|
||||||
output to stdout. Can also be used to
|
|
||||||
download to a different directory, for
|
|
||||||
example with -o '/my/downloads/%(uploader)s
|
|
||||||
/%(title)s-%(id)s.%(ext)s' .
|
|
||||||
--autonumber-size NUMBER Specifies the number of digits in
|
|
||||||
%(autonumber)s when it is present in output
|
|
||||||
filename template or --auto-number option
|
|
||||||
is given
|
|
||||||
--restrict-filenames Restrict filenames to only ASCII
|
|
||||||
characters, and avoid "&" and spaces in
|
|
||||||
filenames
|
|
||||||
-A, --auto-number [deprecated; use -o
|
|
||||||
"%(autonumber)s-%(title)s.%(ext)s" ] number
|
|
||||||
downloaded files starting from 00000
|
|
||||||
-t, --title [deprecated] use title in file name
|
|
||||||
(default)
|
|
||||||
-l, --literal [deprecated] alias of --title
|
-l, --literal [deprecated] alias of --title
|
||||||
-w, --no-overwrites do not overwrite files
|
-w, --no-overwrites do not overwrite files
|
||||||
-c, --continue force resume of partially downloaded files.
|
-c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
|
||||||
By default, youtube-dl will resume
|
--no-continue do not resume partially downloaded files (restart from beginning)
|
||||||
downloads if possible.
|
--no-part do not use .part files - write directly into output file
|
||||||
--no-continue do not resume partially downloaded files
|
--no-mtime do not use the Last-modified header to set the file modification time
|
||||||
(restart from beginning)
|
--write-description write video description to a .description file
|
||||||
--no-part do not use .part files - write directly
|
|
||||||
into output file
|
|
||||||
--no-mtime do not use the Last-modified header to set
|
|
||||||
the file modification time
|
|
||||||
--write-description write video description to a .description
|
|
||||||
file
|
|
||||||
--write-info-json write video metadata to a .info.json file
|
--write-info-json write video metadata to a .info.json file
|
||||||
--write-annotations write video annotations to a .annotation
|
--write-annotations write video annotations to a .annotation file
|
||||||
file
|
--load-info FILE json file containing the video information (created with the "--write-json" option)
|
||||||
--load-info FILE json file containing the video information
|
--cookies FILE file to read cookies from and dump cookie jar in
|
||||||
(created with the "--write-json" option)
|
--cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
|
||||||
--cookies FILE file to read cookies from and dump cookie
|
or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
|
||||||
jar in
|
change.
|
||||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
|
||||||
can store some downloaded information
|
|
||||||
permanently. By default $XDG_CACHE_HOME
|
|
||||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
|
||||||
moment, only YouTube player files (for
|
|
||||||
videos with obfuscated signatures) are
|
|
||||||
cached, but that may change.
|
|
||||||
--no-cache-dir Disable filesystem caching
|
--no-cache-dir Disable filesystem caching
|
||||||
--rm-cache-dir Delete all filesystem cache files
|
--rm-cache-dir Delete all filesystem cache files
|
||||||
|
|
||||||
## Thumbnail images:
|
## Thumbnail images:
|
||||||
--write-thumbnail write thumbnail image to disk
|
--write-thumbnail write thumbnail image to disk
|
||||||
--write-all-thumbnails write all thumbnail image formats to disk
|
--write-all-thumbnails write all thumbnail image formats to disk
|
||||||
--list-thumbnails Simulate and list all available thumbnail
|
--list-thumbnails Simulate and list all available thumbnail formats
|
||||||
formats
|
|
||||||
|
|
||||||
## Verbosity / Simulation Options:
|
## Verbosity / Simulation Options:
|
||||||
-q, --quiet activates quiet mode
|
-q, --quiet activates quiet mode
|
||||||
--no-warnings Ignore warnings
|
--no-warnings Ignore warnings
|
||||||
-s, --simulate do not download the video and do not write
|
-s, --simulate do not download the video and do not write anything to disk
|
||||||
anything to disk
|
|
||||||
--skip-download do not download the video
|
--skip-download do not download the video
|
||||||
-g, --get-url simulate, quiet but print URL
|
-g, --get-url simulate, quiet but print URL
|
||||||
-e, --get-title simulate, quiet but print title
|
-e, --get-title simulate, quiet but print title
|
||||||
@@ -259,153 +159,87 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--get-duration simulate, quiet but print video length
|
--get-duration simulate, quiet but print video length
|
||||||
--get-filename simulate, quiet but print output filename
|
--get-filename simulate, quiet but print output filename
|
||||||
--get-format simulate, quiet but print output format
|
--get-format simulate, quiet but print output format
|
||||||
-j, --dump-json simulate, quiet but print JSON information.
|
-j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys.
|
||||||
See --output for a description of available
|
-J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
|
||||||
keys.
|
information in a single line.
|
||||||
-J, --dump-single-json simulate, quiet but print JSON information
|
--print-json Be quiet and print the video information as JSON (video is still being downloaded).
|
||||||
for each command-line argument. If the URL
|
|
||||||
refers to a playlist, dump the whole
|
|
||||||
playlist information in a single line.
|
|
||||||
--print-json Be quiet and print the video information as
|
|
||||||
JSON (video is still being downloaded).
|
|
||||||
--newline output progress bar as new lines
|
--newline output progress bar as new lines
|
||||||
--no-progress do not print progress bar
|
--no-progress do not print progress bar
|
||||||
--console-title display progress in console titlebar
|
--console-title display progress in console titlebar
|
||||||
-v, --verbose print various debugging information
|
-v, --verbose print various debugging information
|
||||||
--dump-intermediate-pages print downloaded pages to debug problems
|
--dump-pages print downloaded pages to debug problems (very verbose)
|
||||||
(very verbose)
|
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
|
||||||
--write-pages Write downloaded intermediary pages to
|
|
||||||
files in the current directory to debug
|
|
||||||
problems
|
|
||||||
--print-traffic Display sent and read HTTP traffic
|
--print-traffic Display sent and read HTTP traffic
|
||||||
-C, --call-home Contact the youtube-dl server for
|
-C, --call-home Contact the youtube-dl server for debugging.
|
||||||
debugging.
|
--no-call-home Do NOT contact the youtube-dl server for debugging.
|
||||||
--no-call-home Do NOT contact the youtube-dl server for
|
|
||||||
debugging.
|
|
||||||
|
|
||||||
## Workarounds:
|
## Workarounds:
|
||||||
--encoding ENCODING Force the specified encoding (experimental)
|
--encoding ENCODING Force the specified encoding (experimental)
|
||||||
--no-check-certificate Suppress HTTPS certificate validation.
|
--no-check-certificate Suppress HTTPS certificate validation.
|
||||||
--prefer-insecure Use an unencrypted connection to retrieve
|
--prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
|
||||||
information about the video. (Currently
|
|
||||||
supported only for YouTube)
|
|
||||||
--user-agent UA specify a custom user agent
|
--user-agent UA specify a custom user agent
|
||||||
--referer URL specify a custom referer, use if the video
|
--referer URL specify a custom referer, use if the video access is restricted to one domain
|
||||||
access is restricted to one domain
|
--add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
|
||||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
--bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
|
||||||
separated by a colon ':'. You can use this
|
--sleep-interval SECONDS Number of seconds to sleep before each download.
|
||||||
option multiple times
|
|
||||||
--bidi-workaround Work around terminals that lack
|
|
||||||
bidirectional text support. Requires bidiv
|
|
||||||
or fribidi executable in PATH
|
|
||||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
|
||||||
download.
|
|
||||||
|
|
||||||
## Video Format Options:
|
## Video Format Options:
|
||||||
-f, --format FORMAT video format code, specify the order of
|
-f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
|
||||||
preference using slashes, as in -f 22/17/18
|
extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
|
||||||
. Instead of format codes, you can select
|
"worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
|
||||||
by extension for the extensions aac, m4a,
|
This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
|
||||||
mp3, mp4, ogg, wav, webm. You can also use
|
vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a
|
||||||
the special names "best", "bestvideo",
|
question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos
|
||||||
"bestaudio", "worst". You can filter the
|
(or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality.
|
||||||
video results by putting a condition in
|
Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
|
||||||
brackets, as in -f "best[height=720]" (or
|
of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
|
||||||
-f "[filesize>10M]"). This works for
|
|
||||||
filesize, height, width, tbr, abr, vbr,
|
|
||||||
asr, and fps and the comparisons <, <=, >,
|
|
||||||
>=, =, != and for ext, acodec, vcodec,
|
|
||||||
container, and protocol and the comparisons
|
|
||||||
=, != . Formats for which the value is not
|
|
||||||
known are excluded unless you put a
|
|
||||||
question mark (?) after the operator. You
|
|
||||||
can combine format filters, so -f "[height
|
|
||||||
<=? 720][tbr>500]" selects up to 720p
|
|
||||||
videos (or videos where the height is not
|
|
||||||
known) with a bitrate of at least 500
|
|
||||||
KBit/s. By default, youtube-dl will pick
|
|
||||||
the best quality. Use commas to download
|
|
||||||
multiple audio formats, such as -f
|
|
||||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
|
||||||
You can merge the video and audio of two
|
|
||||||
formats into a single file using -f <video-
|
|
||||||
format>+<audio-format> (requires ffmpeg or
|
|
||||||
avconv), for example -f
|
|
||||||
bestvideo+bestaudio.
|
bestvideo+bestaudio.
|
||||||
--all-formats download all available video formats
|
--all-formats download all available video formats
|
||||||
--prefer-free-formats prefer free video formats unless a specific
|
--prefer-free-formats prefer free video formats unless a specific one is requested
|
||||||
one is requested
|
|
||||||
--max-quality FORMAT highest quality format to download
|
--max-quality FORMAT highest quality format to download
|
||||||
-F, --list-formats list all available formats
|
-F, --list-formats list all available formats
|
||||||
--youtube-skip-dash-manifest Do not download the DASH manifest on
|
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
|
||||||
YouTube videos
|
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
|
||||||
--merge-output-format FORMAT If a merge is required (e.g.
|
merge is required
|
||||||
bestvideo+bestaudio), output to given
|
|
||||||
container format. One of mkv, mp4, ogg,
|
|
||||||
webm, flv.Ignored if no merge is required
|
|
||||||
|
|
||||||
## Subtitle Options:
|
## Subtitle Options:
|
||||||
--write-sub write subtitle file
|
--write-sub write subtitle file
|
||||||
--write-auto-sub write automatic subtitle file (youtube
|
--write-auto-sub write automatic subtitle file (youtube only)
|
||||||
only)
|
--all-subs downloads all the available subtitles of the video
|
||||||
--all-subs downloads all the available subtitles of
|
|
||||||
the video
|
|
||||||
--list-subs lists all available subtitles for the video
|
--list-subs lists all available subtitles for the video
|
||||||
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt]
|
--sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best"
|
||||||
youtube only)
|
--sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
|
||||||
--sub-lang LANGS languages of the subtitles to download
|
|
||||||
(optional) separated by commas, use IETF
|
|
||||||
language tags like 'en,pt'
|
|
||||||
|
|
||||||
## Authentication Options:
|
## Authentication Options:
|
||||||
-u, --username USERNAME login with this account ID
|
-u, --username USERNAME login with this account ID
|
||||||
-p, --password PASSWORD account password. If this option is left
|
-p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively.
|
||||||
out, youtube-dl will ask interactively.
|
|
||||||
-2, --twofactor TWOFACTOR two-factor auth code
|
-2, --twofactor TWOFACTOR two-factor auth code
|
||||||
-n, --netrc use .netrc authentication data
|
-n, --netrc use .netrc authentication data
|
||||||
--video-password PASSWORD video password (vimeo, smotri)
|
--video-password PASSWORD video password (vimeo, smotri)
|
||||||
|
|
||||||
## Post-processing Options:
|
## Post-processing Options:
|
||||||
-x, --extract-audio convert video files to audio-only files
|
-x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
||||||
(requires ffmpeg or avconv and ffprobe or
|
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
||||||
avprobe)
|
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K
|
||||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
|
(default 5)
|
||||||
"opus", or "wav"; "best" by default
|
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
||||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification,
|
-k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default
|
||||||
insert a value between 0 (better) and 9
|
--no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||||
(worse) for VBR or a specific bitrate like
|
--embed-subs embed subtitles in the video (only for mp4 videos)
|
||||||
128K (default 5)
|
|
||||||
--recode-video FORMAT Encode the video to another format if
|
|
||||||
necessary (currently supported:
|
|
||||||
mp4|flv|ogg|webm|mkv)
|
|
||||||
-k, --keep-video keeps the video file on disk after the
|
|
||||||
post-processing; the video is erased by
|
|
||||||
default
|
|
||||||
--no-post-overwrites do not overwrite post-processed files; the
|
|
||||||
post-processed files are overwritten by
|
|
||||||
default
|
|
||||||
--embed-subs embed subtitles in the video (only for mp4
|
|
||||||
videos)
|
|
||||||
--embed-thumbnail embed thumbnail in the audio as cover art
|
--embed-thumbnail embed thumbnail in the audio as cover art
|
||||||
--add-metadata write metadata to the video file
|
--add-metadata write metadata to the video file
|
||||||
--xattrs write metadata to the video file's xattrs
|
--metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
||||||
(using dublin core and xdg standards)
|
parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
|
||||||
--fixup POLICY Automatically correct known faults of the
|
%(title)s" matches a title like "Coldplay - Paradise"
|
||||||
file. One of never (do nothing), warn (only
|
--xattrs write metadata to the video file's xattrs (using dublin core and xdg standards)
|
||||||
emit a warning), detect_or_warn(the
|
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
|
||||||
default; fix file if we can, warn
|
fix file if we can, warn otherwise)
|
||||||
otherwise)
|
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
|
||||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
--prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
|
||||||
postprocessors (default)
|
--ffmpeg-location PATH Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.
|
||||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
--exec CMD Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm
|
||||||
postprocessors
|
{}'
|
||||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
--convert-subtitles FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt)
|
||||||
either the path to the binary or its
|
|
||||||
containing directory.
|
|
||||||
--exec CMD Execute a command on the file after
|
|
||||||
downloading, similar to find's -exec
|
|
||||||
syntax. Example: --exec 'adb push {}
|
|
||||||
/sdcard/Music/ && rm {}'
|
|
||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
@@ -525,6 +359,10 @@ YouTube requires an additional signature since September 2012 which is not suppo
|
|||||||
|
|
||||||
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
|
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
||||||
|
|
||||||
|
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
|
||||||
|
|
||||||
### SyntaxError: Non-ASCII character ###
|
### SyntaxError: Non-ASCII character ###
|
||||||
|
|
||||||
The error
|
The error
|
||||||
@@ -569,9 +407,21 @@ A note on the service that they don't host the infringing content, but just link
|
|||||||
|
|
||||||
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
|
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
|
||||||
|
|
||||||
|
### How can I speed up work on my issue?
|
||||||
|
|
||||||
|
(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
|
||||||
|
|
||||||
|
First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
|
||||||
|
|
||||||
|
Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
|
||||||
|
|
||||||
|
If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
|
||||||
|
|
||||||
|
Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
|
||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
|
||||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||||
|
|
||||||
@@ -668,6 +518,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
|
|||||||
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
from __future__ import unicode_literals
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
|
||||||
ydl_opts = {}
|
ydl_opts = {}
|
||||||
@@ -680,6 +531,7 @@ Most likely, you'll want to use various options. For a list of what can be done,
|
|||||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
from __future__ import unicode_literals
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
|
||||||
|
|
||||||
@@ -737,7 +589,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
|||||||
|
|
||||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||||
|
|
||||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||||
|
|
||||||
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
|
@@ -28,7 +28,7 @@ for test in get_testcases():
|
|||||||
if METHOD == 'EURISTIC':
|
if METHOD == 'EURISTIC':
|
||||||
try:
|
try:
|
||||||
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||||
except:
|
except Exception:
|
||||||
print('\nFail: {0}'.format(test['name']))
|
print('\nFail: {0}'.format(test['name']))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -45,12 +45,12 @@ for test in get_testcases():
|
|||||||
|
|
||||||
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
|
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
|
||||||
|
|
||||||
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
|
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
|
||||||
or test['info_dict']['age_limit'] != 18):
|
test['info_dict']['age_limit'] != 18):
|
||||||
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
||||||
|
|
||||||
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
|
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
|
||||||
and test['info_dict']['age_limit'] == 18):
|
test['info_dict']['age_limit'] == 18):
|
||||||
print('\nPotential false negative: {0}'.format(test['name']))
|
print('\nPotential false negative: {0}'.format(test['name']))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
42
devscripts/generate_aes_testdata.py
Normal file
42
devscripts/generate_aes_testdata.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.utils import intlist_to_bytes
|
||||||
|
from youtube_dl.aes import aes_encrypt, key_expansion
|
||||||
|
|
||||||
|
secret_msg = b'Secret message goes here'
|
||||||
|
|
||||||
|
|
||||||
|
def hex_str(int_list):
|
||||||
|
return codecs.encode(intlist_to_bytes(int_list), 'hex')
|
||||||
|
|
||||||
|
|
||||||
|
def openssl_encode(algo, key, iv):
|
||||||
|
cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
|
||||||
|
prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||||
|
out, _ = prog.communicate(secret_msg)
|
||||||
|
return out
|
||||||
|
|
||||||
|
iv = key = [0x20, 0x15] + 14 * [0]
|
||||||
|
|
||||||
|
r = openssl_encode('aes-128-cbc', key, iv)
|
||||||
|
print('aes_cbc_decrypt')
|
||||||
|
print(repr(r))
|
||||||
|
|
||||||
|
password = key
|
||||||
|
new_key = aes_encrypt(password, key_expansion(password))
|
||||||
|
r = openssl_encode('aes-128-ctr', new_key, iv)
|
||||||
|
print('aes_decrypt_text 16')
|
||||||
|
print(repr(r))
|
||||||
|
|
||||||
|
password = key + 16 * [0]
|
||||||
|
new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
|
||||||
|
r = openssl_encode('aes-256-ctr', new_key, iv)
|
||||||
|
print('aes_decrypt_text 32')
|
||||||
|
print(repr(r))
|
@@ -2,6 +2,8 @@
|
|||||||
- **1tv**: Первый канал
|
- **1tv**: Первый канал
|
||||||
- **1up.com**
|
- **1up.com**
|
||||||
- **220.ro**
|
- **220.ro**
|
||||||
|
- **22tracks:genre**
|
||||||
|
- **22tracks:track**
|
||||||
- **24video**
|
- **24video**
|
||||||
- **3sat**
|
- **3sat**
|
||||||
- **4tube**
|
- **4tube**
|
||||||
@@ -17,6 +19,7 @@
|
|||||||
- **AdultSwim**
|
- **AdultSwim**
|
||||||
- **Aftenposten**
|
- **Aftenposten**
|
||||||
- **Aftonbladet**
|
- **Aftonbladet**
|
||||||
|
- **AirMozilla**
|
||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
- **AlphaPorno**
|
- **AlphaPorno**
|
||||||
@@ -46,6 +49,7 @@
|
|||||||
- **Bandcamp**
|
- **Bandcamp**
|
||||||
- **Bandcamp:album**
|
- **Bandcamp:album**
|
||||||
- **bbc.co.uk**: BBC iPlayer
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
|
- **BeatportPro**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
- **BehindKink**
|
- **BehindKink**
|
||||||
- **Bet**
|
- **Bet**
|
||||||
@@ -68,9 +72,12 @@
|
|||||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
|
- **CBSSports**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
- **channel9**: Channel 9
|
- **channel9**: Channel 9
|
||||||
- **Chilloutzone**
|
- **Chilloutzone**
|
||||||
|
- **chirbit**
|
||||||
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
- **Cinemassacre**
|
- **Cinemassacre**
|
||||||
- **clipfish**
|
- **clipfish**
|
||||||
@@ -104,15 +111,19 @@
|
|||||||
- **DctpTv**
|
- **DctpTv**
|
||||||
- **DeezerPlaylist**
|
- **DeezerPlaylist**
|
||||||
- **defense.gouv.fr**
|
- **defense.gouv.fr**
|
||||||
|
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **divxstage**: DivxStage
|
- **divxstage**: DivxStage
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
|
- **DouyuTV**
|
||||||
- **DRBonanza**
|
- **DRBonanza**
|
||||||
- **Dropbox**
|
- **Dropbox**
|
||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
- **DRTV**
|
- **DRTV**
|
||||||
- **Dump**
|
- **Dump**
|
||||||
|
- **Dumpert**
|
||||||
- **dvtv**: http://video.aktualne.cz/
|
- **dvtv**: http://video.aktualne.cz/
|
||||||
|
- **EaglePlatform**
|
||||||
- **EbaumsWorld**
|
- **EbaumsWorld**
|
||||||
- **EchoMsk**
|
- **EchoMsk**
|
||||||
- **eHow**
|
- **eHow**
|
||||||
@@ -140,6 +151,7 @@
|
|||||||
- **Firstpost**
|
- **Firstpost**
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
|
- **FootyRoom**
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
- **FoxNews**
|
- **FoxNews**
|
||||||
- **france2.fr:generation-quoi**
|
- **france2.fr:generation-quoi**
|
||||||
@@ -157,6 +169,7 @@
|
|||||||
- **GameSpot**
|
- **GameSpot**
|
||||||
- **GameStar**
|
- **GameStar**
|
||||||
- **Gametrailers**
|
- **Gametrailers**
|
||||||
|
- **Gazeta**
|
||||||
- **GDCVault**
|
- **GDCVault**
|
||||||
- **generic**: Generic downloader that works on some sites
|
- **generic**: Generic downloader that works on some sites
|
||||||
- **GiantBomb**
|
- **GiantBomb**
|
||||||
@@ -191,6 +204,7 @@
|
|||||||
- **ign.com**
|
- **ign.com**
|
||||||
- **imdb**: Internet Movie Database trailers
|
- **imdb**: Internet Movie Database trailers
|
||||||
- **imdb:list**: Internet Movie Database lists
|
- **imdb:list**: Internet Movie Database lists
|
||||||
|
- **Imgur**
|
||||||
- **Ina**
|
- **Ina**
|
||||||
- **InfoQ**
|
- **InfoQ**
|
||||||
- **Instagram**
|
- **Instagram**
|
||||||
@@ -205,6 +219,8 @@
|
|||||||
- **Jove**
|
- **Jove**
|
||||||
- **jpopsuki.tv**
|
- **jpopsuki.tv**
|
||||||
- **Jukebox**
|
- **Jukebox**
|
||||||
|
- **Kaltura**
|
||||||
|
- **KanalPlay**: Kanal 5/9/11 Play
|
||||||
- **Kankan**
|
- **Kankan**
|
||||||
- **Karaoketv**
|
- **Karaoketv**
|
||||||
- **keek**
|
- **keek**
|
||||||
@@ -216,6 +232,10 @@
|
|||||||
- **Ku6**
|
- **Ku6**
|
||||||
- **la7.tv**
|
- **la7.tv**
|
||||||
- **Laola1Tv**
|
- **Laola1Tv**
|
||||||
|
- **Letv**
|
||||||
|
- **LetvPlaylist**
|
||||||
|
- **LetvTv**
|
||||||
|
- **Libsyn**
|
||||||
- **lifenews**: LIFE | NEWS
|
- **lifenews**: LIFE | NEWS
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
@@ -235,6 +255,7 @@
|
|||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
- **Minhateca**
|
- **Minhateca**
|
||||||
- **MinistryGrid**
|
- **MinistryGrid**
|
||||||
|
- **miomio.tv**
|
||||||
- **mitele.es**
|
- **mitele.es**
|
||||||
- **mixcloud**
|
- **mixcloud**
|
||||||
- **MLB**
|
- **MLB**
|
||||||
@@ -263,10 +284,13 @@
|
|||||||
- **myvideo**
|
- **myvideo**
|
||||||
- **MyVidster**
|
- **MyVidster**
|
||||||
- **n-tv.de**
|
- **n-tv.de**
|
||||||
|
- **NationalGeographic**
|
||||||
- **Naver**
|
- **Naver**
|
||||||
- **NBA**
|
- **NBA**
|
||||||
- **NBC**
|
- **NBC**
|
||||||
- **NBCNews**
|
- **NBCNews**
|
||||||
|
- **NBCSports**
|
||||||
|
- **NBCSportsVPlayer**
|
||||||
- **ndr**: NDR.de - Mediathek
|
- **ndr**: NDR.de - Mediathek
|
||||||
- **NDTV**
|
- **NDTV**
|
||||||
- **NerdCubedFeed**
|
- **NerdCubedFeed**
|
||||||
@@ -294,16 +318,19 @@
|
|||||||
- **npo.nl:radio**
|
- **npo.nl:radio**
|
||||||
- **npo.nl:radio:fragment**
|
- **npo.nl:radio:fragment**
|
||||||
- **NRK**
|
- **NRK**
|
||||||
|
- **NRKPlaylist**
|
||||||
- **NRKTV**
|
- **NRKTV**
|
||||||
- **ntv.ru**
|
- **ntv.ru**
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
- **ocw.mit.edu**
|
- **ocw.mit.edu**
|
||||||
|
- **Odnoklassniki**
|
||||||
- **OktoberfestTV**
|
- **OktoberfestTV**
|
||||||
- **on.aol.com**
|
- **on.aol.com**
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
- **OpenFilm**
|
- **OpenFilm**
|
||||||
- **orf:fm4**: radio FM4
|
- **orf:fm4**: radio FM4
|
||||||
|
- **orf:iptv**: iptv.ORF.at
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
- **parliamentlive.tv**: UK parliament videos
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
@@ -311,21 +338,27 @@
|
|||||||
- **PBS**
|
- **PBS**
|
||||||
- **Phoenix**
|
- **Phoenix**
|
||||||
- **Photobucket**
|
- **Photobucket**
|
||||||
|
- **Pladform**
|
||||||
- **PlanetaPlay**
|
- **PlanetaPlay**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
- **played.to**
|
- **played.to**
|
||||||
- **Playvid**
|
- **Playvid**
|
||||||
|
- **Playwire**
|
||||||
- **plus.google**: Google Plus
|
- **plus.google**: Google Plus
|
||||||
- **pluzz.francetv.fr**
|
- **pluzz.francetv.fr**
|
||||||
- **podomatic**
|
- **podomatic**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**
|
- **PornHub**
|
||||||
|
- **PornHubPlaylist**
|
||||||
- **Pornotube**
|
- **Pornotube**
|
||||||
- **PornoXO**
|
- **PornoXO**
|
||||||
|
- **PrimeShareTV**
|
||||||
- **PromptFile**
|
- **PromptFile**
|
||||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
|
- **Puls4**
|
||||||
- **Pyvideo**
|
- **Pyvideo**
|
||||||
- **QuickVid**
|
- **QuickVid**
|
||||||
|
- **R7**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
- **radiobremen**
|
- **radiobremen**
|
||||||
- **radiofrance**
|
- **radiofrance**
|
||||||
@@ -345,6 +378,7 @@
|
|||||||
- **RTP**
|
- **RTP**
|
||||||
- **RTS**: RTS.ch
|
- **RTS**: RTS.ch
|
||||||
- **rtve.es:alacarta**: RTVE a la carta
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
|
- **rtve.es:infantil**: RTVE infantil
|
||||||
- **rtve.es:live**: RTVE.es live streams
|
- **rtve.es:live**: RTVE.es live streams
|
||||||
- **RUHD**
|
- **RUHD**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
@@ -353,6 +387,8 @@
|
|||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
|
- **safari**: safaribooksonline.com online video
|
||||||
|
- **safari:course**: safaribooksonline.com online courses
|
||||||
- **Sandia**: Sandia National Laboratories
|
- **Sandia**: Sandia National Laboratories
|
||||||
- **Sapo**: SAPO Vídeos
|
- **Sapo**: SAPO Vídeos
|
||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
@@ -381,7 +417,8 @@
|
|||||||
- **soundcloud:playlist**
|
- **soundcloud:playlist**
|
||||||
- **soundcloud:set**
|
- **soundcloud:set**
|
||||||
- **soundcloud:user**
|
- **soundcloud:user**
|
||||||
- **Soundgasm**
|
- **soundgasm**
|
||||||
|
- **soundgasm:profile**
|
||||||
- **southpark.cc.com**
|
- **southpark.cc.com**
|
||||||
- **southpark.de**
|
- **southpark.de**
|
||||||
- **Space**
|
- **Space**
|
||||||
@@ -394,13 +431,14 @@
|
|||||||
- **SportBox**
|
- **SportBox**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
- **SRMediathek**: Saarländischer Rundfunk
|
- **SRMediathek**: Saarländischer Rundfunk
|
||||||
|
- **SSA**
|
||||||
- **stanfordoc**: Stanford Open ClassRoom
|
- **stanfordoc**: Stanford Open ClassRoom
|
||||||
- **Steam**
|
- **Steam**
|
||||||
- **streamcloud.eu**
|
- **streamcloud.eu**
|
||||||
- **StreamCZ**
|
- **StreamCZ**
|
||||||
- **StreetVoice**
|
- **StreetVoice**
|
||||||
- **SunPorno**
|
- **SunPorno**
|
||||||
- **SVTPlay**
|
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||||
- **SWRMediathek**
|
- **SWRMediathek**
|
||||||
- **Syfy**
|
- **Syfy**
|
||||||
- **SztvHu**
|
- **SztvHu**
|
||||||
@@ -447,6 +485,7 @@
|
|||||||
- **Turbo**
|
- **Turbo**
|
||||||
- **Tutv**
|
- **Tutv**
|
||||||
- **tv.dfb.de**
|
- **tv.dfb.de**
|
||||||
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvp.pl**
|
- **tvp.pl**
|
||||||
- **tvp.pl:Series**
|
- **tvp.pl:Series**
|
||||||
@@ -462,13 +501,16 @@
|
|||||||
- **Ubu**
|
- **Ubu**
|
||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
|
- **Ultimedia**
|
||||||
- **Unistra**
|
- **Unistra**
|
||||||
- **Urort**: NRK P3 Urørt
|
- **Urort**: NRK P3 Urørt
|
||||||
- **ustream**
|
- **ustream**
|
||||||
- **ustream:channel**
|
- **ustream:channel**
|
||||||
|
- **Varzesh3**
|
||||||
- **Vbox7**
|
- **Vbox7**
|
||||||
- **VeeHD**
|
- **VeeHD**
|
||||||
- **Veoh**
|
- **Veoh**
|
||||||
|
- **Vessel**
|
||||||
- **Vesti**: Вести.Ru
|
- **Vesti**: Вести.Ru
|
||||||
- **Vevo**
|
- **Vevo**
|
||||||
- **VGTV**
|
- **VGTV**
|
||||||
@@ -489,6 +531,7 @@
|
|||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
- **vier**
|
- **vier**
|
||||||
- **vier:videos**
|
- **vier:videos**
|
||||||
|
- **Viewster**
|
||||||
- **viki**
|
- **viki**
|
||||||
- **vimeo**
|
- **vimeo**
|
||||||
- **vimeo:album**
|
- **vimeo:album**
|
||||||
@@ -535,6 +578,9 @@
|
|||||||
- **XXXYMovies**
|
- **XXXYMovies**
|
||||||
- **Yahoo**: Yahoo screen and movies
|
- **Yahoo**: Yahoo screen and movies
|
||||||
- **Yam**
|
- **Yam**
|
||||||
|
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||||
|
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||||
|
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||||
- **YesJapan**
|
- **YesJapan**
|
||||||
- **Ynet**
|
- **Ynet**
|
||||||
- **YouJizz**
|
- **YouJizz**
|
||||||
@@ -553,7 +599,8 @@
|
|||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
- **youtube:show**: YouTube.com (multi-season) shows
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
|
- **Zapiks**
|
||||||
- **ZDF**
|
- **ZDF**
|
||||||
- **ZDFChannel**
|
- **ZDFChannel**
|
||||||
- **zingmp3:album**: mp3.zing.vn albums
|
- **zingmp3:album**: mp3.zing.vn albums
|
||||||
|
@@ -28,7 +28,7 @@
|
|||||||
"retries": 10,
|
"retries": 10,
|
||||||
"simulate": false,
|
"simulate": false,
|
||||||
"subtitleslang": null,
|
"subtitleslang": null,
|
||||||
"subtitlesformat": "srt",
|
"subtitlesformat": "best",
|
||||||
"test": true,
|
"test": true,
|
||||||
"updatetime": true,
|
"updatetime": true,
|
||||||
"usenetrc": false,
|
"usenetrc": false,
|
||||||
|
@@ -14,6 +14,9 @@ from test.helper import FakeYDL, assertRegexpMatches
|
|||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.postprocessor.common import PostProcessor
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
|
from youtube_dl.utils import match_filter_func
|
||||||
|
|
||||||
|
TEST_URL = 'http://localhost/sample.mp4'
|
||||||
|
|
||||||
|
|
||||||
class YDL(FakeYDL):
|
class YDL(FakeYDL):
|
||||||
@@ -46,8 +49,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
ydl.params['prefer_free_formats'] = True
|
ydl.params['prefer_free_formats'] = True
|
||||||
formats = [
|
formats = [
|
||||||
{'ext': 'webm', 'height': 460, 'url': 'x'},
|
{'ext': 'webm', 'height': 460, 'url': TEST_URL},
|
||||||
{'ext': 'mp4', 'height': 460, 'url': 'y'},
|
{'ext': 'mp4', 'height': 460, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
yie = YoutubeIE(ydl)
|
yie = YoutubeIE(ydl)
|
||||||
@@ -60,8 +63,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
ydl.params['prefer_free_formats'] = True
|
ydl.params['prefer_free_formats'] = True
|
||||||
formats = [
|
formats = [
|
||||||
{'ext': 'webm', 'height': 720, 'url': 'a'},
|
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||||
{'ext': 'mp4', 'height': 1080, 'url': 'b'},
|
{'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
yie = YoutubeIE(ydl)
|
yie = YoutubeIE(ydl)
|
||||||
@@ -74,9 +77,9 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
ydl.params['prefer_free_formats'] = False
|
ydl.params['prefer_free_formats'] = False
|
||||||
formats = [
|
formats = [
|
||||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||||
{'ext': 'mp4', 'height': 720, 'url': '_'},
|
{'ext': 'mp4', 'height': 720, 'url': TEST_URL},
|
||||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
yie = YoutubeIE(ydl)
|
yie = YoutubeIE(ydl)
|
||||||
@@ -88,8 +91,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
ydl.params['prefer_free_formats'] = False
|
ydl.params['prefer_free_formats'] = False
|
||||||
formats = [
|
formats = [
|
||||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
|
||||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
yie = YoutubeIE(ydl)
|
yie = YoutubeIE(ydl)
|
||||||
@@ -133,10 +136,10 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def test_format_selection(self):
|
def test_format_selection(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
|
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
|
||||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
|
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
|
||||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
|
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
@@ -167,10 +170,10 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def test_format_selection_audio(self):
|
def test_format_selection_audio(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
|
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
|
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
|
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
|
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
@@ -185,8 +188,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
self.assertEqual(downloaded['format_id'], 'audio-low')
|
self.assertEqual(downloaded['format_id'], 'audio-low')
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
|
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
@@ -228,9 +231,9 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def test_format_selection_video(self):
|
def test_format_selection_video(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
|
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
|
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
@@ -337,6 +340,67 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'G')
|
self.assertEqual(downloaded['format_id'], 'G')
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeDL(unittest.TestCase):
|
||||||
|
def test_subtitles(self):
|
||||||
|
def s_formats(lang, autocaption=False):
|
||||||
|
return [{
|
||||||
|
'ext': ext,
|
||||||
|
'url': 'http://localhost/video.%s.%s' % (lang, ext),
|
||||||
|
'_auto': autocaption,
|
||||||
|
} for ext in ['vtt', 'srt', 'ass']]
|
||||||
|
subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
|
||||||
|
auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
|
||||||
|
info_dict = {
|
||||||
|
'id': 'test',
|
||||||
|
'title': 'Test',
|
||||||
|
'url': 'http://localhost/video.mp4',
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'automatic_captions': auto_captions,
|
||||||
|
'extractor': 'TEST',
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_info(params={}):
|
||||||
|
params.setdefault('simulate', True)
|
||||||
|
ydl = YDL(params)
|
||||||
|
ydl.report_warning = lambda *args, **kargs: None
|
||||||
|
return ydl.process_video_result(info_dict, download=False)
|
||||||
|
|
||||||
|
result = get_info()
|
||||||
|
self.assertFalse(result.get('requested_subtitles'))
|
||||||
|
self.assertEqual(result['subtitles'], subtitles)
|
||||||
|
self.assertEqual(result['automatic_captions'], auto_captions)
|
||||||
|
|
||||||
|
result = get_info({'writesubtitles': True})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertTrue(subs)
|
||||||
|
self.assertEqual(set(subs.keys()), set(['en']))
|
||||||
|
self.assertTrue(subs['en'].get('data') is None)
|
||||||
|
self.assertEqual(subs['en']['ext'], 'ass')
|
||||||
|
|
||||||
|
result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertEqual(subs['en']['ext'], 'srt')
|
||||||
|
|
||||||
|
result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertTrue(subs)
|
||||||
|
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
|
||||||
|
|
||||||
|
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertTrue(subs)
|
||||||
|
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
|
||||||
|
self.assertFalse(subs['es']['_auto'])
|
||||||
|
self.assertTrue(subs['pt']['_auto'])
|
||||||
|
|
||||||
|
result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertTrue(subs)
|
||||||
|
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
|
||||||
|
self.assertTrue(subs['es']['_auto'])
|
||||||
|
self.assertTrue(subs['pt']['_auto'])
|
||||||
|
|
||||||
def test_add_extra_info(self):
|
def test_add_extra_info(self):
|
||||||
test_dict = {
|
test_dict = {
|
||||||
'extractor': 'Foo',
|
'extractor': 'Foo',
|
||||||
@@ -400,6 +464,73 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||||
os.unlink(audiofile)
|
os.unlink(audiofile)
|
||||||
|
|
||||||
|
def test_match_filter(self):
|
||||||
|
class FilterYDL(YDL):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(FilterYDL, self).__init__(*args, **kwargs)
|
||||||
|
self.params['simulate'] = True
|
||||||
|
|
||||||
|
def process_info(self, info_dict):
|
||||||
|
super(YDL, self).process_info(info_dict)
|
||||||
|
|
||||||
|
def _match_entry(self, info_dict, incomplete):
|
||||||
|
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
|
||||||
|
if res is None:
|
||||||
|
self.downloaded_info_dicts.append(info_dict)
|
||||||
|
return res
|
||||||
|
|
||||||
|
first = {
|
||||||
|
'id': '1',
|
||||||
|
'url': TEST_URL,
|
||||||
|
'title': 'one',
|
||||||
|
'extractor': 'TEST',
|
||||||
|
'duration': 30,
|
||||||
|
'filesize': 10 * 1024,
|
||||||
|
}
|
||||||
|
second = {
|
||||||
|
'id': '2',
|
||||||
|
'url': TEST_URL,
|
||||||
|
'title': 'two',
|
||||||
|
'extractor': 'TEST',
|
||||||
|
'duration': 10,
|
||||||
|
'description': 'foo',
|
||||||
|
'filesize': 5 * 1024,
|
||||||
|
}
|
||||||
|
videos = [first, second]
|
||||||
|
|
||||||
|
def get_videos(filter_=None):
|
||||||
|
ydl = FilterYDL({'match_filter': filter_})
|
||||||
|
for v in videos:
|
||||||
|
ydl.process_ie_result(v, download=True)
|
||||||
|
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||||
|
|
||||||
|
res = get_videos()
|
||||||
|
self.assertEqual(res, ['1', '2'])
|
||||||
|
|
||||||
|
def f(v):
|
||||||
|
if v['id'] == '1':
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return 'Video id is not 1'
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
f = match_filter_func('duration < 30')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['2'])
|
||||||
|
|
||||||
|
f = match_filter_func('description = foo')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['2'])
|
||||||
|
|
||||||
|
f = match_filter_func('description =? foo')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1', '2'])
|
||||||
|
|
||||||
|
f = match_filter_func('filesize > 5KiB')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
55
test/test_aes.py
Normal file
55
test/test_aes.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
|
||||||
|
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
|
||||||
|
|
||||||
|
|
||||||
|
class TestAES(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.key = self.iv = [0x20, 0x15] + 14 * [0]
|
||||||
|
self.secret_msg = b'Secret message goes here'
|
||||||
|
|
||||||
|
def test_encrypt(self):
|
||||||
|
msg = b'message'
|
||||||
|
key = list(range(16))
|
||||||
|
encrypted = aes_encrypt(bytes_to_intlist(msg), key)
|
||||||
|
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
|
||||||
|
self.assertEqual(decrypted, msg)
|
||||||
|
|
||||||
|
def test_cbc_decrypt(self):
|
||||||
|
data = bytes_to_intlist(
|
||||||
|
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
|
||||||
|
)
|
||||||
|
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
|
||||||
|
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||||
|
|
||||||
|
def test_decrypt_text(self):
|
||||||
|
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||||
|
encrypted = base64.b64encode(
|
||||||
|
intlist_to_bytes(self.iv[:8]) +
|
||||||
|
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
|
||||||
|
)
|
||||||
|
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
||||||
|
self.assertEqual(decrypted, self.secret_msg)
|
||||||
|
|
||||||
|
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||||
|
encrypted = base64.b64encode(
|
||||||
|
intlist_to_bytes(self.iv[:8]) +
|
||||||
|
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
|
||||||
|
)
|
||||||
|
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||||
|
self.assertEqual(decrypted, self.secret_msg)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||||
|
|
||||||
def test_youtube_feeds(self):
|
def test_youtube_feeds(self):
|
||||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
|
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
||||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
||||||
@@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||||
|
|
||||||
def test_vimeo_matching(self):
|
def test_vimeo_matching(self):
|
||||||
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||||
self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
|
self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
|
||||||
self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
|
self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
|
||||||
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
|
||||||
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
|
self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
|
||||||
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
||||||
|
|
||||||
# https://github.com/rg3/youtube-dl/issues/1930
|
# https://github.com/rg3/youtube-dl/issues/1930
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
@@ -27,5 +29,12 @@ class TestExecution(unittest.TestCase):
|
|||||||
def test_main_exec(self):
|
def test_main_exec(self):
|
||||||
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
|
|
||||||
|
def test_cmdline_umlauts(self):
|
||||||
|
p = subprocess.Popen(
|
||||||
|
[sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'],
|
||||||
|
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||||
|
_, stderr = p.communicate()
|
||||||
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -8,7 +8,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server
|
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||||
import ssl
|
import ssl
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
@@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
|
|||||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_proxy_handler(name):
|
||||||
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
proxy_name = name
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
|
||||||
|
return HTTPTestRequestHandler
|
||||||
|
|
||||||
|
|
||||||
|
class TestProxy(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.proxy = compat_http_server.HTTPServer(
|
||||||
|
('localhost', 0), _build_proxy_handler('normal'))
|
||||||
|
self.port = self.proxy.socket.getsockname()[1]
|
||||||
|
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||||
|
self.proxy_thread.daemon = True
|
||||||
|
self.proxy_thread.start()
|
||||||
|
|
||||||
|
self.cn_proxy = compat_http_server.HTTPServer(
|
||||||
|
('localhost', 0), _build_proxy_handler('cn'))
|
||||||
|
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||||
|
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||||
|
self.cn_proxy_thread.daemon = True
|
||||||
|
self.cn_proxy_thread.start()
|
||||||
|
|
||||||
|
def test_proxy(self):
|
||||||
|
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
||||||
|
ydl = YoutubeDL({
|
||||||
|
'proxy': 'localhost:{0}'.format(self.port),
|
||||||
|
'cn_verification_proxy': cn_proxy,
|
||||||
|
})
|
||||||
|
url = 'http://foo.com/bar'
|
||||||
|
response = ydl.urlopen(url).read().decode('utf-8')
|
||||||
|
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Ytdl-request-proxy', cn_proxy)
|
||||||
|
response = ydl.urlopen(req).read().decode('utf-8')
|
||||||
|
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
26
test/test_netrc.py
Normal file
26
test/test_netrc.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
from youtube_dl.extractor import (
|
||||||
|
gen_extractors,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestNetRc(unittest.TestCase):
|
||||||
|
def test_netrc_present(self):
|
||||||
|
for ie in gen_extractors():
|
||||||
|
if not hasattr(ie, '_login'):
|
||||||
|
continue
|
||||||
|
self.assertTrue(
|
||||||
|
hasattr(ie, '_NETRC_MACHINE'),
|
||||||
|
'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
17
test/test_postprocessors.py
Normal file
17
test/test_postprocessors.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.postprocessor import MetadataFromTitlePP
|
||||||
|
|
||||||
|
|
||||||
|
class TestMetadataFromTitle(unittest.TestCase):
|
||||||
|
def test_format_to_regex(self):
|
||||||
|
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||||
|
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
@@ -18,6 +18,15 @@ from youtube_dl.extractor import (
|
|||||||
VimeoIE,
|
VimeoIE,
|
||||||
WallaIE,
|
WallaIE,
|
||||||
CeskaTelevizeIE,
|
CeskaTelevizeIE,
|
||||||
|
LyndaIE,
|
||||||
|
NPOIE,
|
||||||
|
ComedyCentralIE,
|
||||||
|
NRKTVIE,
|
||||||
|
RaiIE,
|
||||||
|
VikiIE,
|
||||||
|
ThePlatformIE,
|
||||||
|
RTVEALaCartaIE,
|
||||||
|
FunnyOrDieIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -27,42 +36,38 @@ class BaseTestSubtitles(unittest.TestCase):
|
|||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.DL = FakeYDL()
|
self.DL = FakeYDL()
|
||||||
self.ie = self.IE(self.DL)
|
self.ie = self.IE()
|
||||||
|
self.DL.add_info_extractor(self.ie)
|
||||||
|
|
||||||
def getInfoDict(self):
|
def getInfoDict(self):
|
||||||
info_dict = self.ie.extract(self.url)
|
info_dict = self.DL.extract_info(self.url, download=False)
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
def getSubtitles(self):
|
def getSubtitles(self):
|
||||||
info_dict = self.getInfoDict()
|
info_dict = self.getInfoDict()
|
||||||
return info_dict['subtitles']
|
subtitles = info_dict['requested_subtitles']
|
||||||
|
if not subtitles:
|
||||||
|
return subtitles
|
||||||
|
for sub_info in subtitles.values():
|
||||||
|
if sub_info.get('data') is None:
|
||||||
|
uf = self.DL.urlopen(sub_info['url'])
|
||||||
|
sub_info['data'] = uf.read().decode('utf-8')
|
||||||
|
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeSubtitles(BaseTestSubtitles):
|
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||||
url = 'QRS8MkLhQmM'
|
url = 'QRS8MkLhQmM'
|
||||||
IE = YoutubeIE
|
IE = YoutubeIE
|
||||||
|
|
||||||
def test_youtube_no_writesubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = False
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_youtube_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
|
||||||
|
|
||||||
def test_youtube_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['it']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
|
||||||
|
|
||||||
def test_youtube_allsubtitles(self):
|
def test_youtube_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles.keys()), 13)
|
self.assertEqual(len(subtitles.keys()), 13)
|
||||||
|
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||||
|
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||||
|
for lang in ['it', 'fr', 'de']:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
def test_youtube_subtitles_sbv_format(self):
|
def test_youtube_subtitles_sbv_format(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
@@ -76,12 +81,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||||
|
|
||||||
def test_youtube_list_subtitles(self):
|
|
||||||
self.DL.expect_warning('Video doesn\'t have automatic captions')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_youtube_automatic_captions(self):
|
def test_youtube_automatic_captions(self):
|
||||||
self.url = '8YoUxe5ncPo'
|
self.url = '8YoUxe5ncPo'
|
||||||
self.DL.params['writeautomaticsub'] = True
|
self.DL.params['writeautomaticsub'] = True
|
||||||
@@ -103,55 +102,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
def test_youtube_multiple_langs(self):
|
|
||||||
self.url = 'QRS8MkLhQmM'
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['it', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDailymotionSubtitles(BaseTestSubtitles):
|
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.dailymotion.com/video/xczg00'
|
url = 'http://www.dailymotion.com/video/xczg00'
|
||||||
IE = DailymotionIE
|
IE = DailymotionIE
|
||||||
|
|
||||||
def test_no_writesubtitles(self):
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
|
||||||
|
|
||||||
def test_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['fr']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertTrue(len(subtitles.keys()) >= 6)
|
self.assertTrue(len(subtitles.keys()) >= 6)
|
||||||
|
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||||
def test_list_subtitles(self):
|
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
for lang in ['es', 'fr', 'de']:
|
||||||
self.DL.params['listsubtitles'] = True
|
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_automatic_captions(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslang'] = ['en']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(len(subtitles.keys()) == 0)
|
|
||||||
|
|
||||||
def test_nosubtitles(self):
|
def test_nosubtitles(self):
|
||||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
@@ -159,61 +125,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
def test_multiple_langs(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['es', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
|
|
||||||
class TestTedSubtitles(BaseTestSubtitles):
|
class TestTedSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||||
IE = TEDIE
|
IE = TEDIE
|
||||||
|
|
||||||
def test_no_writesubtitles(self):
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
|
|
||||||
|
|
||||||
def test_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['fr']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertTrue(len(subtitles.keys()) >= 28)
|
self.assertTrue(len(subtitles.keys()) >= 28)
|
||||||
|
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
|
||||||
def test_list_subtitles(self):
|
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
for lang in ['es', 'fr', 'de']:
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_automatic_captions(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslang'] = ['en']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(len(subtitles.keys()) == 0)
|
|
||||||
|
|
||||||
def test_multiple_langs(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['es', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
@@ -221,14 +147,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
|
|||||||
url = 'http://blip.tv/a/a-6603250'
|
url = 'http://blip.tv/a/a-6603250'
|
||||||
IE = BlipTVIE
|
IE = BlipTVIE
|
||||||
|
|
||||||
def test_list_subtitles(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
@@ -240,39 +159,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
|||||||
url = 'http://vimeo.com/76979871'
|
url = 'http://vimeo.com/76979871'
|
||||||
IE = VimeoIE
|
IE = VimeoIE
|
||||||
|
|
||||||
def test_no_writesubtitles(self):
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
|
||||||
|
|
||||||
def test_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['fr']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||||
def test_list_subtitles(self):
|
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_automatic_captions(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslang'] = ['en']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(len(subtitles.keys()) == 0)
|
|
||||||
|
|
||||||
def test_nosubtitles(self):
|
def test_nosubtitles(self):
|
||||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
@@ -280,27 +173,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
def test_multiple_langs(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['es', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
|
|
||||||
class TestWallaSubtitles(BaseTestSubtitles):
|
class TestWallaSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
||||||
IE = WallaIE
|
IE = WallaIE
|
||||||
|
|
||||||
def test_list_subtitles(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
@@ -315,19 +194,13 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
|
|
||||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||||
IE = CeskaTelevizeIE
|
IE = CeskaTelevizeIE
|
||||||
|
|
||||||
def test_list_subtitles(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
@@ -342,7 +215,122 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
|
|
||||||
|
class TestLyndaSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
|
||||||
|
IE = LyndaIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||||
|
|
||||||
|
|
||||||
|
class TestNPOSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
||||||
|
IE = NPOIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['nl']))
|
||||||
|
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
|
||||||
|
|
||||||
|
|
||||||
|
class TestMTVSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
|
||||||
|
IE = ComedyCentralIE
|
||||||
|
|
||||||
|
def getInfoDict(self):
|
||||||
|
return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
|
||||||
|
|
||||||
|
|
||||||
|
class TestNRKSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
|
||||||
|
IE = NRKTVIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['no']))
|
||||||
|
self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
|
||||||
|
|
||||||
|
|
||||||
|
class TestRaiSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||||
|
IE = RaiIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['it']))
|
||||||
|
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
|
||||||
|
|
||||||
|
|
||||||
|
class TestVikiSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||||
|
IE = VikiIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
|
||||||
|
|
||||||
|
|
||||||
|
class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||||
|
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
|
||||||
|
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
|
||||||
|
url = 'theplatform:JFUjUE1_ehvq'
|
||||||
|
IE = ThePlatformIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||||
|
|
||||||
|
|
||||||
|
class TestRtveSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||||
|
IE = RTVEALaCartaIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
print('Skipping, only available from Spain')
|
||||||
|
return
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['es']))
|
||||||
|
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
||||||
|
IE = FunnyOrDieIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@@ -34,8 +34,8 @@ def _make_testfunc(testfile):
|
|||||||
def test_func(self):
|
def test_func(self):
|
||||||
as_file = os.path.join(TEST_DIR, testfile)
|
as_file = os.path.join(TEST_DIR, testfile)
|
||||||
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
|
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
|
||||||
if ((not os.path.exists(swf_file))
|
if ((not os.path.exists(swf_file)) or
|
||||||
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
||||||
# Recompile
|
# Recompile
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([
|
subprocess.check_call([
|
||||||
|
@@ -17,13 +17,22 @@ IGNORED_FILES = [
|
|||||||
'buildserver.py',
|
'buildserver.py',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
IGNORED_DIRS = [
|
||||||
|
'.git',
|
||||||
|
'.tox',
|
||||||
|
]
|
||||||
|
|
||||||
from test.helper import assertRegexpMatches
|
from test.helper import assertRegexpMatches
|
||||||
|
|
||||||
|
|
||||||
class TestUnicodeLiterals(unittest.TestCase):
|
class TestUnicodeLiterals(unittest.TestCase):
|
||||||
def test_all_files(self):
|
def test_all_files(self):
|
||||||
for dirpath, _, filenames in os.walk(rootDir):
|
for dirpath, dirnames, filenames in os.walk(rootDir):
|
||||||
|
for ignore_dir in IGNORED_DIRS:
|
||||||
|
if ignore_dir in dirnames:
|
||||||
|
# If we remove the directory from dirnames os.walk won't
|
||||||
|
# recurse into it
|
||||||
|
dirnames.remove(ignore_dir)
|
||||||
for basename in filenames:
|
for basename in filenames:
|
||||||
if not basename.endswith('.py'):
|
if not basename.endswith('.py'):
|
||||||
continue
|
continue
|
||||||
|
@@ -24,6 +24,7 @@ from youtube_dl.utils import (
|
|||||||
encodeFilename,
|
encodeFilename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
escape_url,
|
escape_url,
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
@@ -38,6 +39,8 @@ from youtube_dl.utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
|
sanitize_path,
|
||||||
|
sanitize_url_path_consecutive_slashes,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@@ -52,6 +55,7 @@ from youtube_dl.utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
xpath_text,
|
||||||
render_table,
|
render_table,
|
||||||
match_str,
|
match_str,
|
||||||
)
|
)
|
||||||
@@ -86,6 +90,11 @@ class TestUtil(unittest.TestCase):
|
|||||||
sanitize_filename('New World record at 0:12:34'),
|
sanitize_filename('New World record at 0:12:34'),
|
||||||
'New World record at 0_12_34')
|
'New World record at 0_12_34')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
|
||||||
|
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
|
||||||
|
self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
|
||||||
|
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
|
||||||
|
|
||||||
forbidden = '"\0\\/'
|
forbidden = '"\0\\/'
|
||||||
for fc in forbidden:
|
for fc in forbidden:
|
||||||
for fbc in forbidden:
|
for fbc in forbidden:
|
||||||
@@ -126,6 +135,62 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
||||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||||
|
|
||||||
|
def test_sanitize_path(self):
|
||||||
|
if sys.platform != 'win32':
|
||||||
|
return
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('abc'), 'abc')
|
||||||
|
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc|def'), 'abc#def')
|
||||||
|
self.assertEqual(sanitize_path('<>:"|?*'), '#######')
|
||||||
|
self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
|
||||||
|
'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
|
||||||
|
'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
|
||||||
|
self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
|
||||||
|
self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('../abc'), '..\\abc')
|
||||||
|
self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
|
||||||
|
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||||
|
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||||
|
|
||||||
|
def test_sanitize_url_path_consecutive_slashes(self):
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||||
|
'http://hostname/')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||||
|
'http://hostname/')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||||
|
'http://hostname/abc/')
|
||||||
|
|
||||||
def test_ordered_set(self):
|
def test_ordered_set(self):
|
||||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||||
self.assertEqual(orderedSet([]), [])
|
self.assertEqual(orderedSet([]), [])
|
||||||
@@ -135,6 +200,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
|
|
||||||
def test_unescape_html(self):
|
def test_unescape_html(self):
|
||||||
self.assertEqual(unescapeHTML('%20;'), '%20;')
|
self.assertEqual(unescapeHTML('%20;'), '%20;')
|
||||||
|
self.assertEqual(unescapeHTML('/'), '/')
|
||||||
|
self.assertEqual(unescapeHTML('/'), '/')
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
unescapeHTML('é'), 'é')
|
unescapeHTML('é'), 'é')
|
||||||
|
|
||||||
@@ -187,6 +254,17 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||||
|
|
||||||
|
def test_xpath_text(self):
|
||||||
|
testxml = '''<root>
|
||||||
|
<div>
|
||||||
|
<p>Foo</p>
|
||||||
|
</div>
|
||||||
|
</root>'''
|
||||||
|
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||||
|
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||||
|
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||||
|
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
||||||
|
|
||||||
def test_smuggle_url(self):
|
def test_smuggle_url(self):
|
||||||
data = {"ö": "ö", "abc": [3]}
|
data = {"ö": "ö", "abc": [3]}
|
||||||
url = 'https://foo.bar/baz?x=y#a'
|
url = 'https://foo.bar/baz?x=y#a'
|
||||||
@@ -244,6 +322,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
||||||
self.assertEqual(parse_duration('02:03:04'), 7384)
|
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||||
|
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||||
|
|
||||||
def test_fix_xml_ampersands(self):
|
def test_fix_xml_ampersands(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
7
tox.ini
7
tox.ini
@@ -1,8 +1,11 @@
|
|||||||
[tox]
|
[tox]
|
||||||
envlist = py26,py27,py33
|
envlist = py26,py27,py33,py34
|
||||||
[testenv]
|
[testenv]
|
||||||
deps =
|
deps =
|
||||||
nose
|
nose
|
||||||
coverage
|
coverage
|
||||||
commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
|
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
|
||||||
|
--exclude test_subtitles.py --exclude test_write_annotations.py
|
||||||
|
--exclude test_youtube_lists.py
|
||||||
|
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||||
# test.test_download:TestDownload.test_NowVideo
|
# test.test_download:TestDownload.test_NowVideo
|
||||||
|
@@ -4,8 +4,10 @@
|
|||||||
from __future__ import absolute_import, unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
import contextlib
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
|
import fileinput
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
@@ -28,6 +30,7 @@ from .compat import (
|
|||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
|
compat_get_terminal_size,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
compat_str,
|
compat_str,
|
||||||
@@ -46,18 +49,19 @@ from .utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
get_term_width,
|
|
||||||
locked_file,
|
locked_file,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
PagedList,
|
PagedList,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
|
PerRequestProxyHandler,
|
||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
platform_name,
|
platform_name,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
render_table,
|
render_table,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
|
sanitize_path,
|
||||||
std_headers,
|
std_headers,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
takewhile_inclusive,
|
takewhile_inclusive,
|
||||||
@@ -154,7 +158,7 @@ class YoutubeDL(object):
|
|||||||
allsubtitles: Downloads all the subtitles of the video
|
allsubtitles: Downloads all the subtitles of the video
|
||||||
(requires writesubtitles or writeautomaticsub)
|
(requires writesubtitles or writeautomaticsub)
|
||||||
listsubtitles: Lists all available subtitles for the video
|
listsubtitles: Lists all available subtitles for the video
|
||||||
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
|
subtitlesformat: The format code for subtitles
|
||||||
subtitleslangs: List of languages of the subtitles to download
|
subtitleslangs: List of languages of the subtitles to download
|
||||||
keepvideo: Keep the video file after post-processing
|
keepvideo: Keep the video file after post-processing
|
||||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||||
@@ -181,6 +185,8 @@ class YoutubeDL(object):
|
|||||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||||
At the moment, this is only supported by YouTube.
|
At the moment, this is only supported by YouTube.
|
||||||
proxy: URL of the proxy server to use
|
proxy: URL of the proxy server to use
|
||||||
|
cn_verification_proxy: URL of the proxy to use for IP address verification
|
||||||
|
on Chinese sites. (Experimental)
|
||||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||||
support, using fridibi
|
support, using fridibi
|
||||||
@@ -247,10 +253,10 @@ class YoutubeDL(object):
|
|||||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the downloader (see youtube_dl/downloader/common.py):
|
||||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||||
xattr_set_filesize.
|
xattr_set_filesize, external_downloader_args.
|
||||||
|
|
||||||
The following options are used by the post processors:
|
The following options are used by the post processors:
|
||||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||||
@@ -284,7 +290,7 @@ class YoutubeDL(object):
|
|||||||
try:
|
try:
|
||||||
import pty
|
import pty
|
||||||
master, slave = pty.openpty()
|
master, slave = pty.openpty()
|
||||||
width = get_term_width()
|
width = compat_get_terminal_size().columns
|
||||||
if width is None:
|
if width is None:
|
||||||
width_args = []
|
width_args = []
|
||||||
else:
|
else:
|
||||||
@@ -308,8 +314,8 @@ class YoutubeDL(object):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
|
||||||
and not params.get('restrictfilenames', False)):
|
not params.get('restrictfilenames', False)):
|
||||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Assuming --restrict-filenames since file system encoding '
|
'Assuming --restrict-filenames since file system encoding '
|
||||||
@@ -317,8 +323,10 @@ class YoutubeDL(object):
|
|||||||
'Set the LC_ALL environment variable to fix this.')
|
'Set the LC_ALL environment variable to fix this.')
|
||||||
self.params['restrictfilenames'] = True
|
self.params['restrictfilenames'] = True
|
||||||
|
|
||||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
if isinstance(params.get('outtmpl'), bytes):
|
||||||
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
self.report_warning(
|
||||||
|
'Parameter outtmpl is bytes, but should be a unicode string. '
|
||||||
|
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||||
|
|
||||||
self._setup_opener()
|
self._setup_opener()
|
||||||
|
|
||||||
@@ -557,7 +565,7 @@ class YoutubeDL(object):
|
|||||||
if v is not None)
|
if v is not None)
|
||||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||||
|
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
|
||||||
tmpl = compat_expanduser(outtmpl)
|
tmpl = compat_expanduser(outtmpl)
|
||||||
filename = tmpl % template_dict
|
filename = tmpl % template_dict
|
||||||
# Temporary fix for #4787
|
# Temporary fix for #4787
|
||||||
@@ -624,7 +632,7 @@ class YoutubeDL(object):
|
|||||||
Returns a list with a dictionary for each video we find.
|
Returns a list with a dictionary for each video we find.
|
||||||
If 'download', also downloads the videos.
|
If 'download', also downloads the videos.
|
||||||
extra_info is a dict containing the extra values to add to each result
|
extra_info is a dict containing the extra values to add to each result
|
||||||
'''
|
'''
|
||||||
|
|
||||||
if ie_key:
|
if ie_key:
|
||||||
ies = [self.get_info_extractor(ie_key)]
|
ies = [self.get_info_extractor(ie_key)]
|
||||||
@@ -1008,6 +1016,15 @@ class YoutubeDL(object):
|
|||||||
info_dict['timestamp'])
|
info_dict['timestamp'])
|
||||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||||
|
|
||||||
|
if self.params.get('listsubtitles', False):
|
||||||
|
if 'automatic_captions' in info_dict:
|
||||||
|
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
|
||||||
|
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
|
||||||
|
return
|
||||||
|
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||||
|
info_dict['id'], info_dict.get('subtitles'),
|
||||||
|
info_dict.get('automatic_captions'))
|
||||||
|
|
||||||
# This extractors handle format selection themselves
|
# This extractors handle format selection themselves
|
||||||
if info_dict['extractor'] in ['Youku']:
|
if info_dict['extractor'] in ['Youku']:
|
||||||
if download:
|
if download:
|
||||||
@@ -1071,8 +1088,7 @@ class YoutubeDL(object):
|
|||||||
if req_format is None:
|
if req_format is None:
|
||||||
req_format = 'best'
|
req_format = 'best'
|
||||||
formats_to_download = []
|
formats_to_download = []
|
||||||
# The -1 is for supporting YoutubeIE
|
if req_format == 'all':
|
||||||
if req_format in ('-1', 'all'):
|
|
||||||
formats_to_download = formats
|
formats_to_download = formats
|
||||||
else:
|
else:
|
||||||
for rfstr in req_format.split(','):
|
for rfstr in req_format.split(','):
|
||||||
@@ -1136,6 +1152,55 @@ class YoutubeDL(object):
|
|||||||
info_dict.update(formats_to_download[-1])
|
info_dict.update(formats_to_download[-1])
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||||
|
"""Select the requested subtitles and their format"""
|
||||||
|
available_subs = {}
|
||||||
|
if normal_subtitles and self.params.get('writesubtitles'):
|
||||||
|
available_subs.update(normal_subtitles)
|
||||||
|
if automatic_captions and self.params.get('writeautomaticsub'):
|
||||||
|
for lang, cap_info in automatic_captions.items():
|
||||||
|
if lang not in available_subs:
|
||||||
|
available_subs[lang] = cap_info
|
||||||
|
|
||||||
|
if (not self.params.get('writesubtitles') and not
|
||||||
|
self.params.get('writeautomaticsub') or not
|
||||||
|
available_subs):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if self.params.get('allsubtitles', False):
|
||||||
|
requested_langs = available_subs.keys()
|
||||||
|
else:
|
||||||
|
if self.params.get('subtitleslangs', False):
|
||||||
|
requested_langs = self.params.get('subtitleslangs')
|
||||||
|
elif 'en' in available_subs:
|
||||||
|
requested_langs = ['en']
|
||||||
|
else:
|
||||||
|
requested_langs = [list(available_subs.keys())[0]]
|
||||||
|
|
||||||
|
formats_query = self.params.get('subtitlesformat', 'best')
|
||||||
|
formats_preference = formats_query.split('/') if formats_query else []
|
||||||
|
subs = {}
|
||||||
|
for lang in requested_langs:
|
||||||
|
formats = available_subs.get(lang)
|
||||||
|
if formats is None:
|
||||||
|
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
|
||||||
|
continue
|
||||||
|
for ext in formats_preference:
|
||||||
|
if ext == 'best':
|
||||||
|
f = formats[-1]
|
||||||
|
break
|
||||||
|
matches = list(filter(lambda f: f['ext'] == ext, formats))
|
||||||
|
if matches:
|
||||||
|
f = matches[-1]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
f = formats[-1]
|
||||||
|
self.report_warning(
|
||||||
|
'No subtitle format found matching "%s" for language %s, '
|
||||||
|
'using %s' % (formats_query, lang, f['ext']))
|
||||||
|
subs[lang] = f
|
||||||
|
return subs
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
|
|
||||||
@@ -1150,9 +1215,6 @@ class YoutubeDL(object):
|
|||||||
if len(info_dict['title']) > 200:
|
if len(info_dict['title']) > 200:
|
||||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||||
|
|
||||||
# Keep for backwards compatibility
|
|
||||||
info_dict['stitle'] = info_dict['title']
|
|
||||||
|
|
||||||
if 'format' not in info_dict:
|
if 'format' not in info_dict:
|
||||||
info_dict['format'] = info_dict['ext']
|
info_dict['format'] = info_dict['ext']
|
||||||
|
|
||||||
@@ -1198,7 +1260,7 @@ class YoutubeDL(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dn = os.path.dirname(encodeFilename(filename))
|
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
|
||||||
if dn and not os.path.exists(dn):
|
if dn and not os.path.exists(dn):
|
||||||
os.makedirs(dn)
|
os.makedirs(dn)
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
@@ -1238,15 +1300,23 @@ class YoutubeDL(object):
|
|||||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||||
self.params.get('writeautomaticsub')])
|
self.params.get('writeautomaticsub')])
|
||||||
|
|
||||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
if subtitles_are_requested and info_dict.get('requested_subtitles'):
|
||||||
# subtitles download errors are already managed as troubles in relevant IE
|
# subtitles download errors are already managed as troubles in relevant IE
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
subtitles = info_dict['subtitles']
|
subtitles = info_dict['requested_subtitles']
|
||||||
sub_format = self.params.get('subtitlesformat', 'srt')
|
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||||
for sub_lang in subtitles.keys():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub = subtitles[sub_lang]
|
sub_format = sub_info['ext']
|
||||||
if sub is None:
|
if sub_info.get('data') is not None:
|
||||||
continue
|
sub_data = sub_info['data']
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
sub_data = ie._download_webpage(
|
||||||
|
sub_info['url'], info_dict['id'], note=False)
|
||||||
|
except ExtractorError as err:
|
||||||
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
|
(sub_lang, compat_str(err.cause)))
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
@@ -1254,7 +1324,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||||
subfile.write(sub)
|
subfile.write(sub_data)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
return
|
return
|
||||||
@@ -1366,8 +1436,8 @@ class YoutubeDL(object):
|
|||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
if (len(url_list) > 1 and
|
if (len(url_list) > 1 and
|
||||||
'%' not in outtmpl
|
'%' not in outtmpl and
|
||||||
and self.params.get('max_downloads') != 1):
|
self.params.get('max_downloads') != 1):
|
||||||
raise SameFileError(outtmpl)
|
raise SameFileError(outtmpl)
|
||||||
|
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
@@ -1386,8 +1456,11 @@ class YoutubeDL(object):
|
|||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
def download_with_info_file(self, info_filename):
|
def download_with_info_file(self, info_filename):
|
||||||
with io.open(info_filename, 'r', encoding='utf-8') as f:
|
with contextlib.closing(fileinput.FileInput(
|
||||||
info = json.load(f)
|
[info_filename], mode='r',
|
||||||
|
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
||||||
|
# FileInput doesn't have a read method, we can't call json.load
|
||||||
|
info = json.loads('\n'.join(f))
|
||||||
try:
|
try:
|
||||||
self.process_ie_result(info, download=True)
|
self.process_ie_result(info, download=True)
|
||||||
except DownloadError:
|
except DownloadError:
|
||||||
@@ -1564,6 +1637,17 @@ class YoutubeDL(object):
|
|||||||
['ID', 'width', 'height', 'URL'],
|
['ID', 'width', 'height', 'URL'],
|
||||||
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
||||||
|
|
||||||
|
def list_subtitles(self, video_id, subtitles, name='subtitles'):
|
||||||
|
if not subtitles:
|
||||||
|
self.to_screen('%s has no %s' % (video_id, name))
|
||||||
|
return
|
||||||
|
self.to_screen(
|
||||||
|
'Available %s for %s:' % (name, video_id))
|
||||||
|
self.to_screen(render_table(
|
||||||
|
['Language', 'formats'],
|
||||||
|
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
|
||||||
|
for lang, formats in subtitles.items()]))
|
||||||
|
|
||||||
def urlopen(self, req):
|
def urlopen(self, req):
|
||||||
""" Start an HTTP download """
|
""" Start an HTTP download """
|
||||||
|
|
||||||
@@ -1617,10 +1701,10 @@ class YoutubeDL(object):
|
|||||||
out = out.decode().strip()
|
out = out.decode().strip()
|
||||||
if re.match('[0-9a-f]+', out):
|
if re.match('[0-9a-f]+', out):
|
||||||
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
||||||
except:
|
except Exception:
|
||||||
try:
|
try:
|
||||||
sys.exc_clear()
|
sys.exc_clear()
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
self._write_string('[debug] Python version %s - %s\n' % (
|
self._write_string('[debug] Python version %s - %s\n' % (
|
||||||
platform.python_version(), platform_name()))
|
platform.python_version(), platform_name()))
|
||||||
@@ -1680,13 +1764,22 @@ class YoutubeDL(object):
|
|||||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||||
if 'http' in proxies and 'https' not in proxies:
|
if 'http' in proxies and 'https' not in proxies:
|
||||||
proxies['https'] = proxies['http']
|
proxies['https'] = proxies['http']
|
||||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
proxy_handler = PerRequestProxyHandler(proxies)
|
||||||
|
|
||||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||||
|
# The ssl context is only available in python 2.7.9 and 3.x
|
||||||
|
if hasattr(https_handler, '_context'):
|
||||||
|
ctx = https_handler._context
|
||||||
|
# get_ca_certs is unavailable prior to python 3.4
|
||||||
|
if hasattr(ctx, 'get_ca_certs') and len(ctx.get_ca_certs()) == 0:
|
||||||
|
self.report_warning(
|
||||||
|
'No ssl certificates were loaded, urls that use https '
|
||||||
|
'won\'t work')
|
||||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||||
opener = compat_urllib_request.build_opener(
|
opener = compat_urllib_request.build_opener(
|
||||||
https_handler, proxy_handler, cookie_processor, ydlh)
|
proxy_handler, https_handler, cookie_processor, ydlh)
|
||||||
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||||
|
@@ -9,6 +9,7 @@ import codecs
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import shlex
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
@@ -170,6 +171,9 @@ def _real_main(argv=None):
|
|||||||
if opts.recodevideo is not None:
|
if opts.recodevideo is not None:
|
||||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
||||||
parser.error('invalid video recode format specified')
|
parser.error('invalid video recode format specified')
|
||||||
|
if opts.convertsubtitles is not None:
|
||||||
|
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
||||||
|
parser.error('invalid subtitle format specified')
|
||||||
|
|
||||||
if opts.date is not None:
|
if opts.date is not None:
|
||||||
date = DateRange.day(opts.date)
|
date = DateRange.day(opts.date)
|
||||||
@@ -189,14 +193,14 @@ def _real_main(argv=None):
|
|||||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||||
if opts.outtmpl is not None:
|
if opts.outtmpl is not None:
|
||||||
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
||||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
|
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
|
||||||
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
|
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
|
||||||
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
|
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
|
||||||
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
|
(opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
|
||||||
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
|
(opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
|
||||||
or (opts.useid and '%(id)s.%(ext)s')
|
(opts.useid and '%(id)s.%(ext)s') or
|
||||||
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
|
(opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
|
||||||
or DEFAULT_OUTTMPL)
|
DEFAULT_OUTTMPL)
|
||||||
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||||
parser.error('Cannot download a video and extract audio into the same'
|
parser.error('Cannot download a video and extract audio into the same'
|
||||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||||
@@ -209,6 +213,11 @@ def _real_main(argv=None):
|
|||||||
# PostProcessors
|
# PostProcessors
|
||||||
postprocessors = []
|
postprocessors = []
|
||||||
# Add the metadata pp first, the other pps will copy it
|
# Add the metadata pp first, the other pps will copy it
|
||||||
|
if opts.metafromtitle:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'MetadataFromTitle',
|
||||||
|
'titleformat': opts.metafromtitle
|
||||||
|
})
|
||||||
if opts.addmetadata:
|
if opts.addmetadata:
|
||||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
@@ -223,10 +232,14 @@ def _real_main(argv=None):
|
|||||||
'key': 'FFmpegVideoConvertor',
|
'key': 'FFmpegVideoConvertor',
|
||||||
'preferedformat': opts.recodevideo,
|
'preferedformat': opts.recodevideo,
|
||||||
})
|
})
|
||||||
|
if opts.convertsubtitles:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'FFmpegSubtitlesConvertor',
|
||||||
|
'format': opts.convertsubtitles,
|
||||||
|
})
|
||||||
if opts.embedsubtitles:
|
if opts.embedsubtitles:
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'FFmpegEmbedSubtitle',
|
'key': 'FFmpegEmbedSubtitle',
|
||||||
'subtitlesformat': opts.subtitlesformat,
|
|
||||||
})
|
})
|
||||||
if opts.xattrs:
|
if opts.xattrs:
|
||||||
postprocessors.append({'key': 'XAttrMetadata'})
|
postprocessors.append({'key': 'XAttrMetadata'})
|
||||||
@@ -248,6 +261,9 @@ def _real_main(argv=None):
|
|||||||
xattr # Confuse flake8
|
xattr # Confuse flake8
|
||||||
except ImportError:
|
except ImportError:
|
||||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||||
|
external_downloader_args = None
|
||||||
|
if opts.external_downloader_args:
|
||||||
|
external_downloader_args = shlex.split(opts.external_downloader_args)
|
||||||
match_filter = (
|
match_filter = (
|
||||||
None if opts.match_filter is None
|
None if opts.match_filter is None
|
||||||
else match_filter_func(opts.match_filter))
|
else match_filter_func(opts.match_filter))
|
||||||
@@ -352,6 +368,8 @@ def _real_main(argv=None):
|
|||||||
'no_color': opts.no_color,
|
'no_color': opts.no_color,
|
||||||
'ffmpeg_location': opts.ffmpeg_location,
|
'ffmpeg_location': opts.ffmpeg_location,
|
||||||
'hls_prefer_native': opts.hls_prefer_native,
|
'hls_prefer_native': opts.hls_prefer_native,
|
||||||
|
'external_downloader_args': external_downloader_args,
|
||||||
|
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
@@ -1,9 +1,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import collections
|
||||||
import getpass
|
import getpass
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
@@ -364,6 +366,33 @@ def workaround_optparse_bug9161():
|
|||||||
return real_add_option(self, *bargs, **bkwargs)
|
return real_add_option(self, *bargs, **bkwargs)
|
||||||
optparse.OptionGroup.add_option = _compat_add_option
|
optparse.OptionGroup.add_option = _compat_add_option
|
||||||
|
|
||||||
|
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
|
||||||
|
compat_get_terminal_size = shutil.get_terminal_size
|
||||||
|
else:
|
||||||
|
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
|
||||||
|
|
||||||
|
def compat_get_terminal_size():
|
||||||
|
columns = compat_getenv('COLUMNS', None)
|
||||||
|
if columns:
|
||||||
|
columns = int(columns)
|
||||||
|
else:
|
||||||
|
columns = None
|
||||||
|
lines = compat_getenv('LINES', None)
|
||||||
|
if lines:
|
||||||
|
lines = int(lines)
|
||||||
|
else:
|
||||||
|
lines = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
sp = subprocess.Popen(
|
||||||
|
['stty', 'size'],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
out, err = sp.communicate()
|
||||||
|
lines, columns = map(int, out.split())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return _terminal_size(columns, lines)
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
@@ -371,6 +400,7 @@ __all__ = [
|
|||||||
'compat_chr',
|
'compat_chr',
|
||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
'compat_expanduser',
|
'compat_expanduser',
|
||||||
|
'compat_get_terminal_size',
|
||||||
'compat_getenv',
|
'compat_getenv',
|
||||||
'compat_getpass',
|
'compat_getpass',
|
||||||
'compat_html_entities',
|
'compat_html_entities',
|
||||||
|
@@ -42,6 +42,8 @@ class FileDownloader(object):
|
|||||||
max_filesize: Skip files larger than this size
|
max_filesize: Skip files larger than this size
|
||||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||||
(experimenatal)
|
(experimenatal)
|
||||||
|
external_downloader_args: A list of additional command-line arguments for the
|
||||||
|
external downloader.
|
||||||
|
|
||||||
Subclasses of this one must re-define the real_download method.
|
Subclasses of this one must re-define the real_download method.
|
||||||
"""
|
"""
|
||||||
@@ -202,7 +204,7 @@ class FileDownloader(object):
|
|||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
os.utime(filename, (time.time(), filetime))
|
os.utime(filename, (time.time(), filetime))
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return filetime
|
return filetime
|
||||||
|
|
||||||
@@ -311,14 +313,14 @@ class FileDownloader(object):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
nooverwrites_and_exists = (
|
nooverwrites_and_exists = (
|
||||||
self.params.get('nooverwrites', False)
|
self.params.get('nooverwrites', False) and
|
||||||
and os.path.exists(encodeFilename(filename))
|
os.path.exists(encodeFilename(filename))
|
||||||
)
|
)
|
||||||
|
|
||||||
continuedl_and_exists = (
|
continuedl_and_exists = (
|
||||||
self.params.get('continuedl', False)
|
self.params.get('continuedl', True) and
|
||||||
and os.path.isfile(encodeFilename(filename))
|
os.path.isfile(encodeFilename(filename)) and
|
||||||
and not self.params.get('nopart', False)
|
not self.params.get('nopart', False)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check file already present
|
# Check file already present
|
||||||
|
@@ -51,6 +51,13 @@ class ExternalFD(FileDownloader):
|
|||||||
return []
|
return []
|
||||||
return [command_option, source_address]
|
return [command_option, source_address]
|
||||||
|
|
||||||
|
def _configuration_args(self, default=[]):
|
||||||
|
ex_args = self.params.get('external_downloader_args')
|
||||||
|
if ex_args is None:
|
||||||
|
return default
|
||||||
|
assert isinstance(ex_args, list)
|
||||||
|
return ex_args
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
""" Either overwrite this or implement _make_cmd """
|
""" Either overwrite this or implement _make_cmd """
|
||||||
cmd = self._make_cmd(tmpfilename, info_dict)
|
cmd = self._make_cmd(tmpfilename, info_dict)
|
||||||
@@ -79,6 +86,7 @@ class CurlFD(ExternalFD):
|
|||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._source_address('--interface')
|
cmd += self._source_address('--interface')
|
||||||
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
@@ -89,15 +97,16 @@ class WgetFD(ExternalFD):
|
|||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._source_address('--bind-address')
|
cmd += self._source_address('--bind-address')
|
||||||
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
class Aria2cFD(ExternalFD):
|
class Aria2cFD(ExternalFD):
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [
|
cmd = [self.exe, '-c']
|
||||||
self.exe, '-c',
|
cmd += self._configuration_args([
|
||||||
'--min-split-size', '1M', '--max-connection-per-server', '4']
|
'--min-split-size', '1M', '--max-connection-per-server', '4'])
|
||||||
dn = os.path.dirname(tmpfilename)
|
dn = os.path.dirname(tmpfilename)
|
||||||
if dn:
|
if dn:
|
||||||
cmd += ['--dir', dn]
|
cmd += ['--dir', dn]
|
||||||
|
@@ -11,6 +11,7 @@ from .common import FileDownloader
|
|||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_urllib_error,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_pack,
|
struct_pack,
|
||||||
@@ -121,7 +122,8 @@ class FlvReader(io.BytesIO):
|
|||||||
|
|
||||||
self.read_unsigned_int() # BootstrapinfoVersion
|
self.read_unsigned_int() # BootstrapinfoVersion
|
||||||
# Profile,Live,Update,Reserved
|
# Profile,Live,Update,Reserved
|
||||||
self.read(1)
|
flags = self.read_unsigned_char()
|
||||||
|
live = flags & 0x20 != 0
|
||||||
# time scale
|
# time scale
|
||||||
self.read_unsigned_int()
|
self.read_unsigned_int()
|
||||||
# CurrentMediaTime
|
# CurrentMediaTime
|
||||||
@@ -160,6 +162,7 @@ class FlvReader(io.BytesIO):
|
|||||||
return {
|
return {
|
||||||
'segments': segments,
|
'segments': segments,
|
||||||
'fragments': fragments,
|
'fragments': fragments,
|
||||||
|
'live': live,
|
||||||
}
|
}
|
||||||
|
|
||||||
def read_bootstrap_info(self):
|
def read_bootstrap_info(self):
|
||||||
@@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
|
|||||||
for segment, fragments_count in segment_run_table['segment_run']:
|
for segment, fragments_count in segment_run_table['segment_run']:
|
||||||
for _ in range(fragments_count):
|
for _ in range(fragments_count):
|
||||||
res.append((segment, next(fragments_counter)))
|
res.append((segment, next(fragments_counter)))
|
||||||
|
|
||||||
|
if boot_info['live']:
|
||||||
|
res = res[-2:]
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@@ -246,6 +253,38 @@ class F4mFD(FileDownloader):
|
|||||||
self.report_error('Unsupported DRM')
|
self.report_error('Unsupported DRM')
|
||||||
return media
|
return media
|
||||||
|
|
||||||
|
def _get_bootstrap_from_url(self, bootstrap_url):
|
||||||
|
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
||||||
|
return read_bootstrap_info(bootstrap)
|
||||||
|
|
||||||
|
def _update_live_fragments(self, bootstrap_url, latest_fragment):
|
||||||
|
fragments_list = []
|
||||||
|
retries = 30
|
||||||
|
while (not fragments_list) and (retries > 0):
|
||||||
|
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||||
|
fragments_list = build_fragments_list(boot_info)
|
||||||
|
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
|
||||||
|
if not fragments_list:
|
||||||
|
# Retry after a while
|
||||||
|
time.sleep(5.0)
|
||||||
|
retries -= 1
|
||||||
|
|
||||||
|
if not fragments_list:
|
||||||
|
self.report_error('Failed to update fragments')
|
||||||
|
|
||||||
|
return fragments_list
|
||||||
|
|
||||||
|
def _parse_bootstrap_node(self, node, base_url):
|
||||||
|
if node.text is None:
|
||||||
|
bootstrap_url = compat_urlparse.urljoin(
|
||||||
|
base_url, node.attrib['url'])
|
||||||
|
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||||
|
else:
|
||||||
|
bootstrap_url = None
|
||||||
|
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||||
|
boot_info = read_bootstrap_info(bootstrap)
|
||||||
|
return (boot_info, bootstrap_url)
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
requested_bitrate = info_dict.get('tbr')
|
requested_bitrate = info_dict.get('tbr')
|
||||||
@@ -265,18 +304,13 @@ class F4mFD(FileDownloader):
|
|||||||
|
|
||||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||||
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
||||||
if bootstrap_node.text is None:
|
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
|
||||||
bootstrap_url = compat_urlparse.urljoin(
|
live = boot_info['live']
|
||||||
base_url, bootstrap_node.attrib['url'])
|
|
||||||
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
|
||||||
else:
|
|
||||||
bootstrap = base64.b64decode(bootstrap_node.text)
|
|
||||||
metadata_node = media.find(_add_ns('metadata'))
|
metadata_node = media.find(_add_ns('metadata'))
|
||||||
if metadata_node is not None:
|
if metadata_node is not None:
|
||||||
metadata = base64.b64decode(metadata_node.text)
|
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
|
||||||
else:
|
else:
|
||||||
metadata = None
|
metadata = None
|
||||||
boot_info = read_bootstrap_info(bootstrap)
|
|
||||||
|
|
||||||
fragments_list = build_fragments_list(boot_info)
|
fragments_list = build_fragments_list(boot_info)
|
||||||
if self.params.get('test', False):
|
if self.params.get('test', False):
|
||||||
@@ -301,7 +335,8 @@ class F4mFD(FileDownloader):
|
|||||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||||
|
|
||||||
write_flv_header(dest_stream)
|
write_flv_header(dest_stream)
|
||||||
write_metadata_tag(dest_stream, metadata)
|
if not live:
|
||||||
|
write_metadata_tag(dest_stream, metadata)
|
||||||
|
|
||||||
# This dict stores the download progress, it's updated by the progress
|
# This dict stores the download progress, it's updated by the progress
|
||||||
# hook
|
# hook
|
||||||
@@ -325,8 +360,8 @@ class F4mFD(FileDownloader):
|
|||||||
state['frag_index'] += 1
|
state['frag_index'] += 1
|
||||||
|
|
||||||
estimated_size = (
|
estimated_size = (
|
||||||
(state['downloaded_bytes'] + frag_total_bytes)
|
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||||
/ (state['frag_index'] + 1) * total_frags)
|
(state['frag_index'] + 1) * total_frags)
|
||||||
time_now = time.time()
|
time_now = time.time()
|
||||||
state['total_bytes_estimate'] = estimated_size
|
state['total_bytes_estimate'] = estimated_size
|
||||||
state['elapsed'] = time_now - start
|
state['elapsed'] = time_now - start
|
||||||
@@ -348,24 +383,45 @@ class F4mFD(FileDownloader):
|
|||||||
http_dl.add_progress_hook(frag_progress_hook)
|
http_dl.add_progress_hook(frag_progress_hook)
|
||||||
|
|
||||||
frags_filenames = []
|
frags_filenames = []
|
||||||
for (seg_i, frag_i) in fragments_list:
|
while fragments_list:
|
||||||
|
seg_i, frag_i = fragments_list.pop(0)
|
||||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||||
url = base_url + name
|
url = base_url + name
|
||||||
if akamai_pv:
|
if akamai_pv:
|
||||||
url += '?' + akamai_pv.strip(';')
|
url += '?' + akamai_pv.strip(';')
|
||||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||||
success = http_dl.download(frag_filename, {'url': url})
|
try:
|
||||||
if not success:
|
success = http_dl.download(frag_filename, {'url': url})
|
||||||
return False
|
if not success:
|
||||||
with open(frag_filename, 'rb') as down:
|
return False
|
||||||
down_data = down.read()
|
with open(frag_filename, 'rb') as down:
|
||||||
reader = FlvReader(down_data)
|
down_data = down.read()
|
||||||
while True:
|
reader = FlvReader(down_data)
|
||||||
_, box_type, box_data = reader.read_box_info()
|
while True:
|
||||||
if box_type == b'mdat':
|
_, box_type, box_data = reader.read_box_info()
|
||||||
dest_stream.write(box_data)
|
if box_type == b'mdat':
|
||||||
break
|
dest_stream.write(box_data)
|
||||||
frags_filenames.append(frag_filename)
|
break
|
||||||
|
if live:
|
||||||
|
os.remove(frag_filename)
|
||||||
|
else:
|
||||||
|
frags_filenames.append(frag_filename)
|
||||||
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
|
if live and (err.code == 404 or err.code == 410):
|
||||||
|
# We didn't keep up with the live window. Continue
|
||||||
|
# with the next available fragment.
|
||||||
|
msg = 'Fragment %d unavailable' % frag_i
|
||||||
|
self.report_warning(msg)
|
||||||
|
fragments_list = []
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
if not fragments_list and live and bootstrap_url:
|
||||||
|
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
||||||
|
total_frags += len(fragments_list)
|
||||||
|
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
||||||
|
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||||
|
self.report_warning(msg)
|
||||||
|
|
||||||
dest_stream.close()
|
dest_stream.close()
|
||||||
|
|
||||||
|
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
|
|||||||
|
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
if resume_len != 0:
|
if resume_len != 0:
|
||||||
if self.params.get('continuedl', False):
|
if self.params.get('continuedl', True):
|
||||||
self.report_resuming_byte(resume_len)
|
self.report_resuming_byte(resume_len)
|
||||||
request.add_header('Range', 'bytes=%d-' % resume_len)
|
request.add_header('Range', 'bytes=%d-' % resume_len)
|
||||||
open_mode = 'ab'
|
open_mode = 'ab'
|
||||||
@@ -92,6 +92,8 @@ class HttpFD(FileDownloader):
|
|||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'downloaded_bytes': resume_len,
|
||||||
|
'total_bytes': resume_len,
|
||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
@@ -218,12 +220,6 @@ class HttpFD(FileDownloader):
|
|||||||
if tmpfilename != '-':
|
if tmpfilename != '-':
|
||||||
stream.close()
|
stream.close()
|
||||||
|
|
||||||
self._hook_progress({
|
|
||||||
'downloaded_bytes': byte_counter,
|
|
||||||
'total_bytes': data_len,
|
|
||||||
'tmpfilename': tmpfilename,
|
|
||||||
'status': 'error',
|
|
||||||
})
|
|
||||||
if data_len is not None and byte_counter != data_len:
|
if data_len is not None and byte_counter != data_len:
|
||||||
raise ContentTooShortError(byte_counter, int(data_len))
|
raise ContentTooShortError(byte_counter, int(data_len))
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
|
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
|
|||||||
protocol = info_dict.get('rtmp_protocol', None)
|
protocol = info_dict.get('rtmp_protocol', None)
|
||||||
real_time = info_dict.get('rtmp_real_time', False)
|
real_time = info_dict.get('rtmp_real_time', False)
|
||||||
no_resume = info_dict.get('no_resume', False)
|
no_resume = info_dict.get('no_resume', False)
|
||||||
continue_dl = info_dict.get('continuedl', False)
|
continue_dl = info_dict.get('continuedl', True)
|
||||||
|
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
@@ -119,7 +119,9 @@ class RtmpFD(FileDownloader):
|
|||||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||||
# the connection was interrumpted and resuming appears to be
|
# the connection was interrumpted and resuming appears to be
|
||||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||||
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
basic_args = [
|
||||||
|
'rtmpdump', '--verbose', '-r', url,
|
||||||
|
'-o', encodeFilename(tmpfilename, True)]
|
||||||
if player_url is not None:
|
if player_url is not None:
|
||||||
basic_args += ['--swfVfy', player_url]
|
basic_args += ['--swfVfy', player_url]
|
||||||
if page_url is not None:
|
if page_url is not None:
|
||||||
|
@@ -8,6 +8,7 @@ from .adobetv import AdobeTVIE
|
|||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
from .aftenposten import AftenpostenIE
|
from .aftenposten import AftenpostenIE
|
||||||
from .aftonbladet import AftonbladetIE
|
from .aftonbladet import AftonbladetIE
|
||||||
|
from .airmozilla import AirMozillaIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
from .anitube import AnitubeIE
|
from .anitube import AnitubeIE
|
||||||
@@ -36,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
|
|||||||
from .bbccouk import BBCCoUkIE
|
from .bbccouk import BBCCoUkIE
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
|
from .beatportpro import BeatportProIE
|
||||||
from .bet import BetIE
|
from .bet import BetIE
|
||||||
from .bild import BildIE
|
from .bild import BildIE
|
||||||
from .bilibili import BiliBiliIE
|
from .bilibili import BiliBiliIE
|
||||||
@@ -58,10 +60,15 @@ from .canalplus import CanalplusIE
|
|||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import CBSNewsIE
|
from .cbsnews import CBSNewsIE
|
||||||
|
from .cbssports import CBSSportsIE
|
||||||
from .ccc import CCCIE
|
from .ccc import CCCIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .chilloutzone import ChilloutzoneIE
|
from .chilloutzone import ChilloutzoneIE
|
||||||
|
from .chirbit import (
|
||||||
|
ChirbitIE,
|
||||||
|
ChirbitProfileIE,
|
||||||
|
)
|
||||||
from .cinchcast import CinchcastIE
|
from .cinchcast import CinchcastIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
@@ -99,17 +106,21 @@ from .dbtv import DBTVIE
|
|||||||
from .dctp import DctpTvIE
|
from .dctp import DctpTvIE
|
||||||
from .deezer import DeezerPlaylistIE
|
from .deezer import DeezerPlaylistIE
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
|
from .dhm import DHMIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
|
from .douyutv import DouyuTVIE
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drbonanza import DRBonanzaIE
|
from .drbonanza import DRBonanzaIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
from .drtv import DRTVIE
|
from .drtv import DRTVIE
|
||||||
from .dvtv import DVTVIE
|
from .dvtv import DVTVIE
|
||||||
from .dump import DumpIE
|
from .dump import DumpIE
|
||||||
|
from .dumpert import DumpertIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
from .divxstage import DivxStageIE
|
from .divxstage import DivxStageIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .echomsk import EchoMskIE
|
from .echomsk import EchoMskIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
@@ -144,6 +155,7 @@ from .fktv import (
|
|||||||
)
|
)
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
from .folketinget import FolketingetIE
|
from .folketinget import FolketingetIE
|
||||||
|
from .footyroom import FootyRoomIE
|
||||||
from .fourtube import FourTubeIE
|
from .fourtube import FourTubeIE
|
||||||
from .foxgay import FoxgayIE
|
from .foxgay import FoxgayIE
|
||||||
from .foxnews import FoxNewsIE
|
from .foxnews import FoxNewsIE
|
||||||
@@ -168,6 +180,7 @@ from .gameone import (
|
|||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
|
from .gazeta import GazetaIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
@@ -205,6 +218,7 @@ from .imdb import (
|
|||||||
ImdbIE,
|
ImdbIE,
|
||||||
ImdbListIE
|
ImdbListIE
|
||||||
)
|
)
|
||||||
|
from .imgur import ImgurIE
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE, InstagramUserIE
|
from .instagram import InstagramIE, InstagramUserIE
|
||||||
@@ -220,6 +234,8 @@ from .jeuxvideo import JeuxVideoIE
|
|||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
|
from .kaltura import KalturaIE
|
||||||
|
from .kanalplay import KanalPlayIE
|
||||||
from .kankan import KankanIE
|
from .kankan import KankanIE
|
||||||
from .karaoketv import KaraoketvIE
|
from .karaoketv import KaraoketvIE
|
||||||
from .keezmovies import KeezMoviesIE
|
from .keezmovies import KeezMoviesIE
|
||||||
@@ -231,6 +247,12 @@ from .krasview import KrasViewIE
|
|||||||
from .ku6 import Ku6IE
|
from .ku6 import Ku6IE
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
from .laola1tv import Laola1TvIE
|
from .laola1tv import Laola1TvIE
|
||||||
|
from .letv import (
|
||||||
|
LetvIE,
|
||||||
|
LetvTvIE,
|
||||||
|
LetvPlaylistIE
|
||||||
|
)
|
||||||
|
from .libsyn import LibsynIE
|
||||||
from .lifenews import LifeNewsIE
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
@@ -254,6 +276,7 @@ from .metacritic import MetacriticIE
|
|||||||
from .mgoon import MgoonIE
|
from .mgoon import MgoonIE
|
||||||
from .minhateca import MinhatecaIE
|
from .minhateca import MinhatecaIE
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
|
from .miomio import MioMioIE
|
||||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||||
from .mitele import MiTeleIE
|
from .mitele import MiTeleIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
@@ -283,11 +306,14 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
|
|||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
|
from .nationalgeographic import NationalGeographicIE
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import (
|
from .nbc import (
|
||||||
NBCIE,
|
NBCIE,
|
||||||
NBCNewsIE,
|
NBCNewsIE,
|
||||||
|
NBCSportsIE,
|
||||||
|
NBCSportsVPlayerIE,
|
||||||
)
|
)
|
||||||
from .ndr import NDRIE
|
from .ndr import NDRIE
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
@@ -326,12 +352,14 @@ from .npo import (
|
|||||||
)
|
)
|
||||||
from .nrk import (
|
from .nrk import (
|
||||||
NRKIE,
|
NRKIE,
|
||||||
|
NRKPlaylistIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
)
|
)
|
||||||
from .ntvde import NTVDeIE
|
from .ntvde import NTVDeIE
|
||||||
from .ntvru import NTVRuIE
|
from .ntvru import NTVRuIE
|
||||||
from .nytimes import NYTimesIE
|
from .nytimes import NYTimesIE
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .openfilm import OpenFilmIE
|
from .openfilm import OpenFilmIE
|
||||||
@@ -339,6 +367,7 @@ from .orf import (
|
|||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
@@ -346,18 +375,26 @@ from .pbs import PBSIE
|
|||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
from .planetaplay import PlanetaPlayIE
|
from .planetaplay import PlanetaPlayIE
|
||||||
|
from .pladform import PladformIE
|
||||||
from .played import PlayedIE
|
from .played import PlayedIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
|
from .playwire import PlaywireIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import (
|
||||||
|
PornHubIE,
|
||||||
|
PornHubPlaylistIE,
|
||||||
|
)
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
|
from .primesharetv import PrimeShareTVIE
|
||||||
from .promptfile import PromptFileIE
|
from .promptfile import PromptFileIE
|
||||||
from .prosiebensat1 import ProSiebenSat1IE
|
from .prosiebensat1 import ProSiebenSat1IE
|
||||||
|
from .puls4 import Puls4IE
|
||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
from .quickvid import QuickVidIE
|
from .quickvid import QuickVidIE
|
||||||
|
from .r7 import R7IE
|
||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
@@ -377,7 +414,7 @@ from .rtlnow import RTLnowIE
|
|||||||
from .rtl2 import RTL2IE
|
from .rtl2 import RTL2IE
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
@@ -388,6 +425,10 @@ from .rutube import (
|
|||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .sandia import SandiaIE
|
from .sandia import SandiaIE
|
||||||
|
from .safari import (
|
||||||
|
SafariIE,
|
||||||
|
SafariCourseIE,
|
||||||
|
)
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
@@ -418,7 +459,10 @@ from .soundcloud import (
|
|||||||
SoundcloudUserIE,
|
SoundcloudUserIE,
|
||||||
SoundcloudPlaylistIE
|
SoundcloudPlaylistIE
|
||||||
)
|
)
|
||||||
from .soundgasm import SoundgasmIE
|
from .soundgasm import (
|
||||||
|
SoundgasmIE,
|
||||||
|
SoundgasmProfileIE
|
||||||
|
)
|
||||||
from .southpark import (
|
from .southpark import (
|
||||||
SouthParkIE,
|
SouthParkIE,
|
||||||
SouthparkDeIE,
|
SouthparkDeIE,
|
||||||
@@ -432,6 +476,7 @@ from .sport5 import Sport5IE
|
|||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
from .srmediathek import SRMediathekIE
|
from .srmediathek import SRMediathekIE
|
||||||
|
from .ssa import SSAIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
@@ -484,11 +529,16 @@ from .tumblr import TumblrIE
|
|||||||
from .tunein import TuneInIE
|
from .tunein import TuneInIE
|
||||||
from .turbo import TurboIE
|
from .turbo import TurboIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
|
from .tv4 import TV4IE
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE, TvpSeriesIE
|
from .tvp import TvpIE, TvpSeriesIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
from .tweakers import TweakersIE
|
from .tweakers import TweakersIE
|
||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
|
from .twentytwotracks import (
|
||||||
|
TwentyTwoTracksIE,
|
||||||
|
TwentyTwoTracksGenreIE
|
||||||
|
)
|
||||||
from .twitch import (
|
from .twitch import (
|
||||||
TwitchVideoIE,
|
TwitchVideoIE,
|
||||||
TwitchChapterIE,
|
TwitchChapterIE,
|
||||||
@@ -503,12 +553,15 @@ from .udemy import (
|
|||||||
UdemyIE,
|
UdemyIE,
|
||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
)
|
)
|
||||||
|
from .ultimedia import UltimediaIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .urort import UrortIE
|
from .urort import UrortIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
|
from .varzesh3 import Varzesh3IE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .veehd import VeeHDIE
|
from .veehd import VeeHDIE
|
||||||
from .veoh import VeohIE
|
from .veoh import VeohIE
|
||||||
|
from .vessel import VesselIE
|
||||||
from .vesti import VestiIE
|
from .vesti import VestiIE
|
||||||
from .vevo import VevoIE
|
from .vevo import VevoIE
|
||||||
from .vgtv import VGTVIE
|
from .vgtv import VGTVIE
|
||||||
@@ -526,6 +579,7 @@ from .videoweed import VideoWeedIE
|
|||||||
from .vidme import VidmeIE
|
from .vidme import VidmeIE
|
||||||
from .vidzi import VidziIE
|
from .vidzi import VidziIE
|
||||||
from .vier import VierIE, VierVideosIE
|
from .vier import VierIE, VierVideosIE
|
||||||
|
from .viewster import ViewsterIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
VimeoAlbumIE,
|
VimeoAlbumIE,
|
||||||
@@ -582,6 +636,11 @@ from .yahoo import (
|
|||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
)
|
)
|
||||||
from .yam import YamIE
|
from .yam import YamIE
|
||||||
|
from .yandexmusic import (
|
||||||
|
YandexMusicTrackIE,
|
||||||
|
YandexMusicAlbumIE,
|
||||||
|
YandexMusicPlaylistIE,
|
||||||
|
)
|
||||||
from .yesjapan import YesJapanIE
|
from .yesjapan import YesJapanIE
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
@@ -605,6 +664,7 @@ from .youtube import (
|
|||||||
YoutubeUserIE,
|
YoutubeUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
|
from .zapiks import ZapiksIE
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
from .zingmp3 import (
|
from .zingmp3 import (
|
||||||
ZingMp3SongIE,
|
ZingMp3SongIE,
|
||||||
|
@@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player = self._parse_json(
|
player = self._parse_json(
|
||||||
@@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
|
|||||||
self._html_search_meta('datepublished', webpage, 'upload date'))
|
self._html_search_meta('datepublished', webpage, 'upload date'))
|
||||||
|
|
||||||
duration = parse_duration(
|
duration = parse_duration(
|
||||||
self._html_search_meta('duration', webpage, 'duration')
|
self._html_search_meta('duration', webpage, 'duration') or
|
||||||
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
|
self._search_regex(
|
||||||
|
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
||||||
|
@@ -2,13 +2,12 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
xpath_text,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
'title': 'American Dad - Putting Francine Out of Business',
|
'title': 'American Dad - Putting Francine Out of Business',
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
for video in collection.get('videos'):
|
for video in collection.get('videos'):
|
||||||
if video.get('slug') == slug:
|
if video.get('slug') == slug:
|
||||||
return collection, video
|
return collection, video
|
||||||
|
return None, None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, episode_path)
|
webpage = self._download_webpage(url, episode_path)
|
||||||
|
|
||||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||||
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
|
bootstrapped_data = self._parse_json(self._search_regex(
|
||||||
|
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
|
||||||
try:
|
|
||||||
bootstrappedData = json.loads(bootstrappedDataJS)
|
|
||||||
except ValueError as ve:
|
|
||||||
errmsg = '%s: Failed to parse JSON ' % episode_path
|
|
||||||
raise ExtractorError(errmsg, cause=ve)
|
|
||||||
|
|
||||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||||
if is_playlist:
|
if is_playlist:
|
||||||
collections = bootstrappedData['playlists']['collections']
|
collections = bootstrapped_data['playlists']['collections']
|
||||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||||
video_info = self.find_video_info(collection, episode_path)
|
video_info = self.find_video_info(collection, episode_path)
|
||||||
|
|
||||||
show_title = video_info['showTitle']
|
show_title = video_info['showTitle']
|
||||||
segment_ids = [video_info['videoPlaybackID']]
|
segment_ids = [video_info['videoPlaybackID']]
|
||||||
else:
|
else:
|
||||||
collections = bootstrappedData['show']['collections']
|
collections = bootstrapped_data['show']['collections']
|
||||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||||
|
|
||||||
show = bootstrappedData['show']
|
# Video wasn't found in the collections, let's try `slugged_video`.
|
||||||
|
if video_info is None:
|
||||||
|
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||||
|
video_info = bootstrapped_data['slugged_video']
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unable to find video info')
|
||||||
|
|
||||||
|
show = bootstrapped_data['show']
|
||||||
show_title = show['title']
|
show_title = show['title']
|
||||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||||
|
|
||||||
|
@@ -14,10 +14,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AftenpostenIE(InfoExtractor):
|
class AftenpostenIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
|
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
|
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||||
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '21039',
|
'id': '21039',
|
||||||
@@ -30,12 +30,7 @@ class AftenpostenIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(
|
|
||||||
r'data-xs-id="(\d+)"', webpage, 'video id')
|
|
||||||
|
|
||||||
data = self._download_xml(
|
data = self._download_xml(
|
||||||
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
||||||
|
74
youtube_dl/extractor/airmozilla.py
Normal file
74
youtube_dl/extractor/airmozilla.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AirMozillaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
||||||
|
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6x4q2w',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||||
|
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
|
||||||
|
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||||
|
'timestamp': 1422487800,
|
||||||
|
'upload_date': '20150128',
|
||||||
|
'location': 'SFO Commons',
|
||||||
|
'duration': 3780,
|
||||||
|
'view_count': int,
|
||||||
|
'categories': ['Main'],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
|
||||||
|
|
||||||
|
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
||||||
|
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
|
||||||
|
metadata = self._parse_json(jwconfig, video_id)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': source['file'],
|
||||||
|
'ext': source['type'],
|
||||||
|
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
|
||||||
|
'format': source['label'],
|
||||||
|
'height': int(source['label'].rstrip('p')),
|
||||||
|
} for source in metadata['playlist'][0]['sources']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'Views since archived: ([0-9]+)',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
timestamp = parse_iso8601(self._html_search_regex(
|
||||||
|
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'url': self._og_search_url(webpage),
|
||||||
|
'display_id': display_id,
|
||||||
|
'thumbnail': metadata['playlist'][0].get('image'),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
||||||
|
}
|
@@ -11,8 +11,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AppleTrailersIE(InfoExtractor):
|
class AppleTrailersIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'manofsteel',
|
'id': 'manofsteel',
|
||||||
@@ -63,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
||||||
|
|
||||||
|
@@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||||
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||||
|
|
||||||
|
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
|
||||||
|
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||||
|
|
||||||
if re.search(r'[\?&]rss($|[=&])', url):
|
if re.search(r'[\?&]rss($|[=&])', url):
|
||||||
doc = parse_xml(webpage)
|
doc = parse_xml(webpage)
|
||||||
if doc.tag == 'rss':
|
if doc.tag == 'rss':
|
||||||
|
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
|
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
|
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import time
|
import time
|
||||||
import hmac
|
import hmac
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@@ -17,8 +17,9 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AtresPlayerIE(SubtitlesInfoExtractor):
|
class AtresPlayerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||||
|
_NETRC_MACHINE = 'atresplayer'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||||
@@ -144,13 +145,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
|
|||||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||||
if subtitle:
|
if subtitle_url:
|
||||||
subtitles['es'] = subtitle
|
subtitles['es'] = [{
|
||||||
|
'ext': 'srt',
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
'url': subtitle_url,
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
}]
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -159,5 +159,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@@ -2,12 +2,12 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||||
@@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
formats.extend(conn_formats)
|
formats.extend(conn_formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_captions(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for connection in self._extract_connections(media):
|
for connection in self._extract_connections(media):
|
||||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||||
srt = ''
|
srt = ''
|
||||||
|
|
||||||
|
def _extract_text(p):
|
||||||
|
if p.text is not None:
|
||||||
|
stripped_text = p.text.strip()
|
||||||
|
if stripped_text:
|
||||||
|
return stripped_text
|
||||||
|
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
|
||||||
for pos, p in enumerate(ps):
|
for pos, p in enumerate(ps):
|
||||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
|
||||||
p.text.strip() if p.text is not None else '')
|
subtitles[lang] = [
|
||||||
subtitles[lang] = srt
|
{
|
||||||
|
'url': connection.get('href'),
|
||||||
|
'ext': 'ttml',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'data': srt,
|
||||||
|
'ext': 'srt',
|
||||||
|
},
|
||||||
|
]
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _download_media_selector(self, programme_id):
|
def _download_media_selector(self, programme_id):
|
||||||
@@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
elif kind == 'video':
|
elif kind == 'video':
|
||||||
formats.extend(self._extract_video(media, programme_id))
|
formats.extend(self._extract_video(media, programme_id))
|
||||||
elif kind == 'captions':
|
elif kind == 'captions':
|
||||||
subtitles = self._extract_captions(media, programme_id)
|
subtitles = self.extract_subtitles(media, programme_id)
|
||||||
|
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
@@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(programme_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
103
youtube_dl/extractor/beatportpro.py
Normal file
103
youtube_dl/extractor/beatportpro.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class BeatportProIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
|
||||||
|
'md5': 'b3c34d8639a2f6a7f734382358478887',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5379371',
|
||||||
|
'display_id': 'synesthesia-original-mix',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Froxic - Synesthesia (Original Mix)',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
|
||||||
|
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3756896',
|
||||||
|
'display_id': 'love-and-war-original-mix',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
|
||||||
|
'md5': 'a1fd8e8046de3950fd039304c186c05f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4991738',
|
||||||
|
'display_id': 'birds-original-mix',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
track_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
playables = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.Playables\s*=\s*({.+?});', webpage,
|
||||||
|
'playables info', flags=re.DOTALL),
|
||||||
|
track_id)
|
||||||
|
|
||||||
|
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
|
||||||
|
|
||||||
|
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
|
||||||
|
if track['mix']:
|
||||||
|
title += ' (' + track['mix'] + ')'
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for ext, info in track['preview'].items():
|
||||||
|
if not info['url']:
|
||||||
|
continue
|
||||||
|
fmt = {
|
||||||
|
'url': info['url'],
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
||||||
|
if ext == 'mp3':
|
||||||
|
fmt['preference'] = 0
|
||||||
|
fmt['acodec'] = 'mp3'
|
||||||
|
fmt['abr'] = 96
|
||||||
|
fmt['asr'] = 44100
|
||||||
|
elif ext == 'mp4':
|
||||||
|
fmt['preference'] = 1
|
||||||
|
fmt['acodec'] = 'aac'
|
||||||
|
fmt['abr'] = 96
|
||||||
|
fmt['asr'] = 44100
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
images = []
|
||||||
|
for name, info in track['images'].items():
|
||||||
|
image_url = info.get('url')
|
||||||
|
if name == 'dynamic' or not image_url:
|
||||||
|
continue
|
||||||
|
image = {
|
||||||
|
'id': name,
|
||||||
|
'url': image_url,
|
||||||
|
'height': int_or_none(info.get('height')),
|
||||||
|
'width': int_or_none(info.get('width')),
|
||||||
|
}
|
||||||
|
images.append(image)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': compat_str(track.get('id')) or track_id,
|
||||||
|
'display_id': track.get('slug') or display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': images,
|
||||||
|
}
|
@@ -1,40 +1,35 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_start
|
from ..utils import (
|
||||||
|
remove_start,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlinkxIE(InfoExtractor):
|
class BlinkxIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||||
IE_NAME = 'blinkx'
|
IE_NAME = 'blinkx'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
|
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||||
'md5': '2e9a07364af40163a908edbf10bb2492',
|
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8aQUy7GV',
|
'id': 'Da0Gw3xc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Police Car Rolls Away',
|
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||||
'uploader': 'stupidvideos.com',
|
'uploader': 'IGN News',
|
||||||
'upload_date': '20131215',
|
'upload_date': '20150217',
|
||||||
'timestamp': 1387068000,
|
'timestamp': 1424215740,
|
||||||
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
|
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||||
'duration': 14.886,
|
'duration': 47.743333,
|
||||||
'thumbnails': [{
|
|
||||||
'width': 100,
|
|
||||||
'height': 76,
|
|
||||||
'resolution': '100x76',
|
|
||||||
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
|
|
||||||
}],
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, rl):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, rl)
|
video_id = self._match_id(url)
|
||||||
video_id = m.group('id')
|
|
||||||
display_id = video_id[:8]
|
display_id = video_id[:8]
|
||||||
|
|
||||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||||
@@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
|
|||||||
elif m['type'] in ('flv', 'mp4'):
|
elif m['type'] in ('flv', 'mp4'):
|
||||||
vcodec = remove_start(m['vcodec'], 'ff')
|
vcodec = remove_start(m['vcodec'], 'ff')
|
||||||
acodec = remove_start(m['acodec'], 'ff')
|
acodec = remove_start(m['acodec'], 'ff')
|
||||||
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
|
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||||
|
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||||
|
tbr = vbr + abr if vbr and abr else None
|
||||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': m['link'],
|
'url': m['link'],
|
||||||
'vcodec': vcodec,
|
'vcodec': vcodec,
|
||||||
'acodec': acodec,
|
'acodec': acodec,
|
||||||
'abr': int(m['abr']) // 1000,
|
'abr': abr,
|
||||||
'vbr': int(m['vbr']) // 1000,
|
'vbr': vbr,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': int(m['w']),
|
'width': int_or_none(m.get('w')),
|
||||||
'height': int(m['h']),
|
'height': int_or_none(m.get('h')),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
@@ -18,7 +17,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVIE(SubtitlesInfoExtractor):
|
class BlipTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
|
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
categories = [category.text for category in item.findall('category')]
|
categories = [category.text for category in item.findall('category')]
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles_urls = {}
|
||||||
|
|
||||||
media_group = item.find(media('group'))
|
media_group = item.find(media('group'))
|
||||||
for media_content in media_group.findall(media('content')):
|
for media_content in media_group.findall(media('content')):
|
||||||
@@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
}
|
}
|
||||||
lang = role.rpartition('-')[-1].strip().lower()
|
lang = role.rpartition('-')[-1].strip().lower()
|
||||||
langcode = LANGS.get(lang, lang)
|
langcode = LANGS.get(lang, lang)
|
||||||
subtitles[langcode] = url
|
subtitles_urls[langcode] = url
|
||||||
elif media_type.startswith('video/'):
|
elif media_type.startswith('video/'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': real_url,
|
'url': real_url,
|
||||||
@@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
# subtitles
|
subtitles = self.extract_subtitles(video_id, subtitles_urls)
|
||||||
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': video_subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _download_subtitle_url(self, sub_lang, url):
|
def _get_subtitles(self, video_id, subtitles_urls):
|
||||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
subtitles = {}
|
||||||
# when we request with a common UA
|
for lang, url in subtitles_urls.items():
|
||||||
req = compat_urllib_request.Request(url)
|
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||||
req.add_header('User-Agent', 'youtube-dl')
|
# when we request with a common UA
|
||||||
return self._download_webpage(req, None, note=False)
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('User-Agent', 'youtube-dl')
|
||||||
|
subtitles[lang] = [{
|
||||||
|
# The extension is 'srt' but it's actually an 'ass' file
|
||||||
|
'ext': 'ass',
|
||||||
|
'data': self._download_webpage(req, None, note=False),
|
||||||
|
}]
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
|
||||||
class BlipTVUserIE(InfoExtractor):
|
class BlipTVUserIE(InfoExtractor):
|
||||||
|
@@ -6,7 +6,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class BloombergIE(InfoExtractor):
|
class BloombergIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||||
@@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
name = self._match_id(url)
|
||||||
name = mobj.group('name')
|
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
|
|
||||||
f4m_url = self._search_regex(
|
f4m_url = self._search_regex(
|
||||||
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
||||||
'f4m url')
|
'f4m url')
|
||||||
|
@@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
|
|||||||
'tbr': media['bitRate'],
|
'tbr': media['bitRate'],
|
||||||
'width': media['width'],
|
'width': media['width'],
|
||||||
'height': media['height'],
|
'height': media['height'],
|
||||||
} for media in info['media']]
|
} for media in info['media'] if media.get('mediaPurpose') == 'play']
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
formats.append({
|
formats.append({
|
||||||
|
@@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
real_id = self._search_regex(
|
real_id = self._search_regex(
|
||||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||||
|
30
youtube_dl/extractor/cbssports.py
Normal file
30
youtube_dl/extractor/cbssports.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_d5_GbO8p1sT',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'US Open flashbacks: 1990s',
|
||||||
|
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
section = mobj.group('section')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
all_videos = self._download_json(
|
||||||
|
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||||
|
video_id)
|
||||||
|
# The json file contains the info of all the videos in the section
|
||||||
|
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||||
|
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
|||||||
subtitles = {}
|
subtitles = {}
|
||||||
subs = item.get('subtitles')
|
subs = item.get('subtitles')
|
||||||
if subs:
|
if subs:
|
||||||
subtitles['cs'] = subs[0]['url']
|
subtitles = self.extract_subtitles(episode_id, subs)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': episode_id,
|
'id': episode_id,
|
||||||
@@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_subtitles(self, episode_id, subs):
|
||||||
|
original_subtitles = self._download_webpage(
|
||||||
|
subs[0]['url'], episode_id, 'Downloading subtitles')
|
||||||
|
srt_subs = self._fix_subtitles(original_subtitles)
|
||||||
|
return {
|
||||||
|
'cs': [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': srt_subs,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _fix_subtitles(subtitles):
|
def _fix_subtitles(subtitles):
|
||||||
""" Convert millisecond-based subtitles to SRT """
|
""" Convert millisecond-based subtitles to SRT """
|
||||||
if subtitles is None:
|
|
||||||
return subtitles # subtitles not requested
|
|
||||||
|
|
||||||
def _msectotimecode(msec):
|
def _msectotimecode(msec):
|
||||||
""" Helper utility to convert milliseconds to timecode """
|
""" Helper utility to convert milliseconds to timecode """
|
||||||
@@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
yield line
|
yield line
|
||||||
|
|
||||||
fixed_subtitles = {}
|
return "\r\n".join(_fix_subtitle(subtitles))
|
||||||
for k, v in subtitles.items():
|
|
||||||
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
|
|
||||||
return fixed_subtitles
|
|
||||||
|
84
youtube_dl/extractor/chirbit.py
Normal file
84
youtube_dl/extractor/chirbit.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChirbitIE(InfoExtractor):
|
||||||
|
IE_NAME = 'chirbit'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://chirb.it/PrIPv5',
|
||||||
|
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PrIPv5',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Фасадстрой',
|
||||||
|
'duration': 52,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://chirb.it/%s' % audio_id, audio_id)
|
||||||
|
|
||||||
|
audio_url = self._search_regex(
|
||||||
|
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'itemprop="name">([^<]+)', webpage, 'title')
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration', fatal=False))
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'itemprop="playCount"\s*>(\d+)', webpage,
|
||||||
|
'listen count', fatal=False))
|
||||||
|
comment_count = int_or_none(self._search_regex(
|
||||||
|
r'>(\d+) Comments?:', webpage,
|
||||||
|
'comment count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'url': audio_url,
|
||||||
|
'title': title,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ChirbitProfileIE(InfoExtractor):
|
||||||
|
IE_NAME = 'chirbit:profile'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://chirbit.com/ScarletBeauty',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ScarletBeauty',
|
||||||
|
'title': 'Chirbits by ScarletBeauty',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
profile_id = self._match_id(url)
|
||||||
|
|
||||||
|
rss = self._download_xml(
|
||||||
|
'http://chirbit.com/rss/%s' % profile_id, profile_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(audio_url.text, 'Chirbit')
|
||||||
|
for audio_url in rss.findall('./channel/item/link')]
|
||||||
|
|
||||||
|
title = rss.find('./channel/title').text
|
||||||
|
|
||||||
|
return self.playlist_result(entries, profile_id, title)
|
@@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
file_key = self._search_regex(
|
file_key = self._search_regex(
|
||||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||||
|
webpage, 'file_key')
|
||||||
|
|
||||||
return self._extract_video(video_host, video_id, file_key)
|
return self._extract_video(video_host, video_id, file_key)
|
||||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln|ktvk)(?:-ap)?|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
@@ -45,6 +45,9 @@ class CNNIE(InfoExtractor):
|
|||||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||||
'upload_date': '20141222',
|
'upload_date': '20141222',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -250,6 +250,8 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = self._extract_subtitles(cdoc, guid)
|
||||||
|
|
||||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': guid,
|
'id': guid,
|
||||||
@@ -260,6 +262,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'subtitles': subtitles,
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@@ -150,8 +150,14 @@ class InfoExtractor(object):
|
|||||||
If not explicitly set, calculated from timestamp.
|
If not explicitly set, calculated from timestamp.
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
location: Physical location where the video was filmed.
|
location: Physical location where the video was filmed.
|
||||||
subtitles: The subtitle file contents as a dictionary in the format
|
subtitles: The available subtitles as a dictionary in the format
|
||||||
{language: subtitles}.
|
{language: subformats}. "subformats" is a list sorted from
|
||||||
|
lower to higher preference, each element is a dictionary
|
||||||
|
with the "ext" entry and one of:
|
||||||
|
* "data": The subtitles file contents
|
||||||
|
* "url": A url pointing to the subtitles file
|
||||||
|
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||||
|
automatically generated captions
|
||||||
duration: Length of the video in seconds, as an integer.
|
duration: Length of the video in seconds, as an integer.
|
||||||
view_count: How many users have watched the video on the platform.
|
view_count: How many users have watched the video on the platform.
|
||||||
like_count: Number of positive ratings of the video
|
like_count: Number of positive ratings of the video
|
||||||
@@ -391,6 +397,16 @@ class InfoExtractor(object):
|
|||||||
if blocked_iframe:
|
if blocked_iframe:
|
||||||
msg += ' Visit %s for more details' % blocked_iframe
|
msg += ' Visit %s for more details' % blocked_iframe
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
if '<title>The URL you requested has been blocked</title>' in content[:512]:
|
||||||
|
msg = (
|
||||||
|
'Access to this webpage has been blocked by Indian censorship. '
|
||||||
|
'Use a VPN or proxy server (with --proxy) to route around it.')
|
||||||
|
block_msg = self._html_search_regex(
|
||||||
|
r'</h1><p>(.*?)</p>',
|
||||||
|
content, 'block message', default=None)
|
||||||
|
if block_msg:
|
||||||
|
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
@@ -751,6 +767,10 @@ class InfoExtractor(object):
|
|||||||
formats)
|
formats)
|
||||||
|
|
||||||
def _is_valid_url(self, url, video_id, item='video'):
|
def _is_valid_url(self, url, video_id, item='video'):
|
||||||
|
url = self._proto_relative_url(url, scheme='http:')
|
||||||
|
# For now assume non HTTP(S) URLs always valid
|
||||||
|
if not (url.startswith('http://') or url.startswith('https://')):
|
||||||
|
return True
|
||||||
try:
|
try:
|
||||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||||
return True
|
return True
|
||||||
@@ -798,8 +818,8 @@ class InfoExtractor(object):
|
|||||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||||
for i, media_el in enumerate(media_nodes):
|
for i, media_el in enumerate(media_nodes):
|
||||||
if manifest_version == '2.0':
|
if manifest_version == '2.0':
|
||||||
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
|
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
|
||||||
+ (media_el.attrib.get('href') or media_el.attrib.get('url')))
|
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
||||||
@@ -819,11 +839,11 @@ class InfoExtractor(object):
|
|||||||
m3u8_id=None):
|
m3u8_id=None):
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
|
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
'preference': -1,
|
'preference': preference - 1 if preference else -1,
|
||||||
'resolution': 'multiple',
|
'resolution': 'multiple',
|
||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}]
|
}]
|
||||||
@@ -863,8 +883,13 @@ class InfoExtractor(object):
|
|||||||
formats.append({'url': format_url(line)})
|
formats.append({'url': format_url(line)})
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||||
|
format_id = []
|
||||||
|
if m3u8_id:
|
||||||
|
format_id.append(m3u8_id)
|
||||||
|
last_media_name = last_media.get('NAME') if last_media else None
|
||||||
|
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||||
f = {
|
f = {
|
||||||
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
|
'format_id': '-'.join(format_id),
|
||||||
'url': format_url(line.strip()),
|
'url': format_url(line.strip()),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
@@ -905,39 +930,57 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
rtmp_count = 0
|
rtmp_count = 0
|
||||||
for video in smil.findall('./body/switch/video'):
|
if smil.findall('./body/seq/video'):
|
||||||
src = video.get('src')
|
video = smil.findall('./body/seq/video')[0]
|
||||||
if not src:
|
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||||
continue
|
formats.extend(fmts)
|
||||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
else:
|
||||||
width = int_or_none(video.get('width'))
|
for video in smil.findall('./body/switch/video'):
|
||||||
height = int_or_none(video.get('height'))
|
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||||
proto = video.get('proto')
|
formats.extend(fmts)
|
||||||
if not proto:
|
|
||||||
if base:
|
|
||||||
if base.startswith('rtmp'):
|
|
||||||
proto = 'rtmp'
|
|
||||||
elif base.startswith('http'):
|
|
||||||
proto = 'http'
|
|
||||||
ext = video.get('ext')
|
|
||||||
if proto == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
|
|
||||||
elif proto == 'rtmp':
|
|
||||||
rtmp_count += 1
|
|
||||||
streamer = video.get('streamer') or base
|
|
||||||
formats.append({
|
|
||||||
'url': streamer,
|
|
||||||
'play_path': src,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
|
||||||
'tbr': bitrate,
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
||||||
|
src = video.get('src')
|
||||||
|
if not src:
|
||||||
|
return ([], rtmp_count)
|
||||||
|
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||||
|
width = int_or_none(video.get('width'))
|
||||||
|
height = int_or_none(video.get('height'))
|
||||||
|
proto = video.get('proto')
|
||||||
|
if not proto:
|
||||||
|
if base:
|
||||||
|
if base.startswith('rtmp'):
|
||||||
|
proto = 'rtmp'
|
||||||
|
elif base.startswith('http'):
|
||||||
|
proto = 'http'
|
||||||
|
ext = video.get('ext')
|
||||||
|
if proto == 'm3u8':
|
||||||
|
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
|
||||||
|
elif proto == 'rtmp':
|
||||||
|
rtmp_count += 1
|
||||||
|
streamer = video.get('streamer') or base
|
||||||
|
return ([{
|
||||||
|
'url': streamer,
|
||||||
|
'play_path': src,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||||
|
'tbr': bitrate,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
}], rtmp_count)
|
||||||
|
elif proto.startswith('http'):
|
||||||
|
return ([{
|
||||||
|
'url': base + src,
|
||||||
|
'ext': ext or 'flv',
|
||||||
|
'tbr': bitrate,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
}], rtmp_count)
|
||||||
|
|
||||||
def _live_title(self, name):
|
def _live_title(self, name):
|
||||||
""" Generate the title for a live video """
|
""" Generate the title for a live video """
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
@@ -1001,6 +1044,27 @@ class InfoExtractor(object):
|
|||||||
any_restricted = any_restricted or is_restricted
|
any_restricted = any_restricted or is_restricted
|
||||||
return not any_restricted
|
return not any_restricted
|
||||||
|
|
||||||
|
def extract_subtitles(self, *args, **kwargs):
|
||||||
|
if (self._downloader.params.get('writesubtitles', False) or
|
||||||
|
self._downloader.params.get('listsubtitles')):
|
||||||
|
return self._get_subtitles(*args, **kwargs)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _get_subtitles(self, *args, **kwargs):
|
||||||
|
raise NotImplementedError("This method must be implemented by subclasses")
|
||||||
|
|
||||||
|
def extract_automatic_captions(self, *args, **kwargs):
|
||||||
|
if (self._downloader.params.get('writeautomaticsub', False) or
|
||||||
|
self._downloader.params.get('listsubtitles')):
|
||||||
|
return self._get_automatic_captions(*args, **kwargs)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _get_automatic_captions(self, *args, **kwargs):
|
||||||
|
raise NotImplementedError("This method must be implemented by subclasses")
|
||||||
|
|
||||||
|
def _subtitles_timecode(self, seconds):
|
||||||
|
return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@@ -9,7 +9,7 @@ import xml.etree.ElementTree
|
|||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from math import pow, sqrt, floor
|
from math import pow, sqrt, floor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@@ -23,13 +23,12 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
inc,
|
|
||||||
)
|
)
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
class CrunchyrollIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -102,13 +101,6 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
key = obfuscate_key(id)
|
key = obfuscate_key(id)
|
||||||
|
|
||||||
class Counter:
|
|
||||||
__value = iv
|
|
||||||
|
|
||||||
def next_value(self):
|
|
||||||
temp = self.__value
|
|
||||||
self.__value = inc(self.__value)
|
|
||||||
return temp
|
|
||||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||||
return zlib.decompress(decrypted_data)
|
return zlib.decompress(decrypted_data)
|
||||||
|
|
||||||
@@ -187,6 +179,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def _get_subtitles(self, video_id, webpage):
|
||||||
|
subtitles = {}
|
||||||
|
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||||
|
sub_page = self._download_webpage(
|
||||||
|
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||||
|
video_id, note='Downloading subtitles for ' + sub_name)
|
||||||
|
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||||
|
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||||
|
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||||
|
if not id or not iv or not data:
|
||||||
|
continue
|
||||||
|
id = int(id)
|
||||||
|
iv = base64.b64decode(iv)
|
||||||
|
data = base64.b64decode(data)
|
||||||
|
|
||||||
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||||
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||||
|
if not lang_code:
|
||||||
|
continue
|
||||||
|
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||||
|
subtitles[lang_code] = [
|
||||||
|
{
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': self._convert_subtitles_to_srt(sub_root),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'ext': 'ass',
|
||||||
|
'data': self._convert_subtitles_to_ass(sub_root),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
return subtitles
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
@@ -249,34 +273,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'format_id': video_format,
|
'format_id': video_format,
|
||||||
})
|
})
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
|
||||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
|
||||||
sub_page = self._download_webpage(
|
|
||||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
|
||||||
video_id, note='Downloading subtitles for ' + sub_name)
|
|
||||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
|
||||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
|
||||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
|
||||||
if not id or not iv or not data:
|
|
||||||
continue
|
|
||||||
id = int(id)
|
|
||||||
iv = base64.b64decode(iv)
|
|
||||||
data = base64.b64decode(data)
|
|
||||||
|
|
||||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
|
||||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
|
||||||
if not lang_code:
|
|
||||||
continue
|
|
||||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
|
||||||
if sub_format == 'ass':
|
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
|
|
||||||
else:
|
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
|
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -6,7 +6,6 @@ import json
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
@@ -26,12 +25,11 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
def _build_request(url):
|
def _build_request(url):
|
||||||
"""Build a request with the family filter disabled"""
|
"""Build a request with the family filter disabled"""
|
||||||
request = compat_urllib_request.Request(url)
|
request = compat_urllib_request.Request(url)
|
||||||
request.add_header('Cookie', 'family_filter=off')
|
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||||
request.add_header('Cookie', 'ff=off')
|
|
||||||
return request
|
return request
|
||||||
|
|
||||||
|
|
||||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
"""Information Extractor for Dailymotion"""
|
"""Information Extractor for Dailymotion"""
|
||||||
|
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||||
@@ -47,13 +45,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||||
'md5': '392c4b85a60a90dc4792da41ce3144eb',
|
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'x33vw9',
|
'id': 'x2iuewm',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': 'Amphora Alex and Van .',
|
'uploader': 'IGN',
|
||||||
'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
|
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Vevo video
|
# Vevo video
|
||||||
@@ -113,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
||||||
|
|
||||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
||||||
embed_page = self._download_webpage(embed_url, video_id,
|
embed_request = self._build_request(embed_url)
|
||||||
'Downloading embed page')
|
embed_page = self._download_webpage(
|
||||||
|
embed_request, video_id, 'Downloading embed page')
|
||||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||||
'video info', flags=re.MULTILINE)
|
'video info', flags=re.MULTILINE)
|
||||||
info = json.loads(info)
|
info = json.loads(info)
|
||||||
@@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, webpage)
|
|
||||||
return
|
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
||||||
@@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
sub_list = self._download_webpage(
|
sub_list = self._download_webpage(
|
||||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
||||||
@@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
return {}
|
return {}
|
||||||
info = json.loads(sub_list)
|
info = json.loads(sub_list)
|
||||||
if (info['total'] > 0):
|
if (info['total'] > 0):
|
||||||
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
|
sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
|
||||||
return sub_lang_list
|
return sub_lang_list
|
||||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||||
return {}
|
return {}
|
||||||
|
@@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
|
|||||||
r"flashvars.pvg_id=\"(\d+)\";",
|
r"flashvars.pvg_id=\"(\d+)\";",
|
||||||
webpage, 'ID')
|
webpage, 'ID')
|
||||||
|
|
||||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
json_url = (
|
||||||
+ video_id)
|
'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
|
||||||
|
video_id)
|
||||||
info = self._download_json(json_url, title, 'Downloading JSON config')
|
info = self._download_json(json_url, title, 'Downloading JSON config')
|
||||||
video_url = info['renditions'][0]['url']
|
video_url = info['renditions'][0]['url']
|
||||||
|
|
||||||
|
73
youtube_dl/extractor/dhm.py
Normal file
73
youtube_dl/extractor/dhm.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DHMIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
|
||||||
|
'md5': '11c475f670209bf6acca0b2b7ef51827',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-marshallplan-at-work-in-west-germany',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
|
||||||
|
'description': 'md5:1fabd480c153f97b07add61c44407c82',
|
||||||
|
'duration': 660,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
|
||||||
|
'md5': '09890226332476a3e3f6f2cb74734aa5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rolle-1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'ROLLE 1',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
playlist_url = self._search_regex(
|
||||||
|
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
||||||
|
|
||||||
|
playlist = self._download_xml(playlist_url, video_id)
|
||||||
|
|
||||||
|
track = playlist.find(
|
||||||
|
'./{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
|
||||||
|
|
||||||
|
video_url = xpath_text(
|
||||||
|
track, './{http://xspf.org/ns/0/}location',
|
||||||
|
'video url', fatal=True)
|
||||||
|
thumbnail = xpath_text(
|
||||||
|
track, './{http://xspf.org/ns/0/}image',
|
||||||
|
'thumbnail')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
||||||
|
webpage, 'title').strip()
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<p><strong>Description:</strong>(.+?)</p>',
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
||||||
|
webpage, 'duration', default=None))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
112
youtube_dl/extractor/douyutv.py
Normal file
112
youtube_dl/extractor/douyutv.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import time
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (ExtractorError, unescapeHTML)
|
||||||
|
from ..compat import (compat_str, compat_basestring)
|
||||||
|
|
||||||
|
|
||||||
|
class DouyuTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.douyutv.com/iseven',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17732',
|
||||||
|
'display_id': 'iseven',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': '7师傅',
|
||||||
|
'uploader_id': '431925',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.douyutv.com/85982',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85982',
|
||||||
|
'display_id': '85982',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'douyu小漠',
|
||||||
|
'uploader_id': '3769985',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
if video_id.isdigit():
|
||||||
|
room_id = video_id
|
||||||
|
else:
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
room_id = self._html_search_regex(
|
||||||
|
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||||
|
|
||||||
|
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||||
|
room_id, int(time.time()))
|
||||||
|
|
||||||
|
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||||
|
config = self._download_json(
|
||||||
|
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
data = config['data']
|
||||||
|
|
||||||
|
error_code = config.get('error', 0)
|
||||||
|
if error_code is not 0:
|
||||||
|
error_desc = 'Server reported error %i' % error_code
|
||||||
|
if isinstance(data, (compat_str, compat_basestring)):
|
||||||
|
error_desc += ': ' + data
|
||||||
|
raise ExtractorError(error_desc, expected=True)
|
||||||
|
|
||||||
|
show_status = data.get('show_status')
|
||||||
|
# 1 = live, 2 = offline
|
||||||
|
if show_status == '2':
|
||||||
|
raise ExtractorError(
|
||||||
|
'Live stream is offline', expected=True)
|
||||||
|
|
||||||
|
base_url = data['rtmp_url']
|
||||||
|
live_path = data['rtmp_live']
|
||||||
|
|
||||||
|
title = self._live_title(unescapeHTML(data['room_name']))
|
||||||
|
description = data.get('show_details')
|
||||||
|
thumbnail = data.get('room_src')
|
||||||
|
|
||||||
|
uploader = data.get('nickname')
|
||||||
|
uploader_id = data.get('owner_uid')
|
||||||
|
|
||||||
|
multi_formats = data.get('rtmp_multi_bitrate')
|
||||||
|
if not isinstance(multi_formats, dict):
|
||||||
|
multi_formats = {}
|
||||||
|
multi_formats['live'] = live_path
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': '%s/%s' % (base_url, format_path),
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': 1 if format_id == 'live' else 0,
|
||||||
|
} for format_id, format_path in multi_formats.items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': room_id,
|
||||||
|
'display_id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
@@ -1,11 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor, ExtractorError
|
||||||
from .common import ExtractorError
|
|
||||||
from ..utils import parse_iso8601
|
from ..utils import parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
class DRTVIE(SubtitlesInfoExtractor):
|
class DRTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
}
|
}
|
||||||
for subs in subtitles_list:
|
for subs in subtitles_list:
|
||||||
lang = subs['Language']
|
lang = subs['Language']
|
||||||
subtitles[LANGS.get(lang, lang)] = subs['Uri']
|
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
|
||||||
|
|
||||||
if not formats and restricted_to_denmark:
|
if not formats and restricted_to_denmark:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
56
youtube_dl/extractor/dumpert.py
Normal file
56
youtube_dl/extractor/dumpert.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import qualities
|
||||||
|
|
||||||
|
|
||||||
|
class DumpertIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
||||||
|
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6646981/951bc60f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ik heb nieuws voor je',
|
||||||
|
'description': 'Niet schrikken hoor',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
files_base64 = self._search_regex(
|
||||||
|
r'data-files="([^"]+)"', webpage, 'data files')
|
||||||
|
|
||||||
|
files = self._parse_json(
|
||||||
|
base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': quality(format_id),
|
||||||
|
} for format_id, video_url in files.items() if format_id != 'still']
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'title', webpage) or self._og_search_title(webpage)
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage) or self._og_search_description(webpage)
|
||||||
|
thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats
|
||||||
|
}
|
98
youtube_dl/extractor/eagleplatform.py
Normal file
98
youtube_dl/extractor/eagleplatform.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EaglePlatformIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
eagleplatform:(?P<custom_host>[^/]+):|
|
||||||
|
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
# http://lenta.ru/news/2015/03/06/navalny/
|
||||||
|
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
||||||
|
'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '227304',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Навальный вышел на свободу',
|
||||||
|
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 87,
|
||||||
|
'view_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# http://muz-tv.ru/play/7129/
|
||||||
|
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
||||||
|
'url': 'eagleplatform:media.clipyou.ru:12820',
|
||||||
|
'md5': '6c2ebeab03b739597ce8d86339d5a905',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12820',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "'O Sole Mio",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 216,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _handle_error(self, response):
|
||||||
|
status = int_or_none(response.get('status', 200))
|
||||||
|
if status != 200:
|
||||||
|
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||||
|
|
||||||
|
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||||
|
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||||
|
self._handle_error(response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
|
||||||
|
|
||||||
|
player_data = self._download_json(
|
||||||
|
'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
|
||||||
|
|
||||||
|
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
|
||||||
|
|
||||||
|
title = media['title']
|
||||||
|
description = media.get('description')
|
||||||
|
thumbnail = media.get('snapshot')
|
||||||
|
duration = int_or_none(media.get('duration'))
|
||||||
|
view_count = int_or_none(media.get('views'))
|
||||||
|
|
||||||
|
age_restriction = media.get('age_restriction')
|
||||||
|
age_limit = None
|
||||||
|
if age_restriction:
|
||||||
|
age_limit = 0 if age_restriction == 'allow_all' else 18
|
||||||
|
|
||||||
|
m3u8_data = self._download_json(
|
||||||
|
media['sources']['secure_m3u8']['auto'],
|
||||||
|
video_id, 'Downloading m3u8 JSON')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_data['data'][0], video_id,
|
||||||
|
'mp4', entry_protocol='m3u8_native')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
playlist_id = self._match_id(url)
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
json_like = self._search_regex(
|
data = self._parse_json(
|
||||||
r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
|
self._search_regex(
|
||||||
data = json.loads(json_like)
|
r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
|
||||||
|
playlist_id)
|
||||||
|
|
||||||
session = str(random.randint(0, 1000000000))
|
session = str(random.randint(0, 1000000000))
|
||||||
mix_id = data['id']
|
mix_id = data['id']
|
||||||
track_count = data['tracks_count']
|
track_count = data['tracks_count']
|
||||||
duration = data['duration']
|
duration = data['duration']
|
||||||
avg_song_duration = float(duration) / track_count
|
avg_song_duration = float(duration) / track_count
|
||||||
|
# duration is sometimes negative, use predefined avg duration
|
||||||
|
if avg_song_duration <= 0:
|
||||||
|
avg_song_duration = 300
|
||||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||||
next_url = first_url
|
next_url = first_url
|
||||||
entries = []
|
entries = []
|
||||||
|
@@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
|
|||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>(.*?) - EPORNER', webpage, 'title')
|
r'<title>(.*?) - EPORNER', webpage, 'title')
|
||||||
|
|
||||||
redirect_code = self._html_search_regex(
|
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
|
||||||
r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
|
|
||||||
webpage, 'redirect_code')
|
|
||||||
redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
|
|
||||||
player_code = self._download_webpage(
|
player_code = self._download_webpage(
|
||||||
redirect_url, display_id, note='Downloading player config')
|
redirect_url, display_id, note='Downloading player config')
|
||||||
|
|
||||||
@@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': self._rta_search(webpage),
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
@@ -1,11 +1,17 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class EroProfileIE(InfoExtractor):
|
class EroProfileIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
|
||||||
|
_NETRC_MACHINE = 'eroprofile'
|
||||||
|
_TESTS = [{
|
||||||
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
|
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
|
||||||
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
|
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):
|
|||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
|
||||||
|
'md5': '1baa9602ede46ce904c431f5418d8916',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1133519',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'skip': 'Requires login',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
query = compat_urllib_parse.urlencode({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'url': 'http://www.eroprofile.com/',
|
||||||
|
})
|
||||||
|
login_url = self._LOGIN_URL + query
|
||||||
|
login_page = self._download_webpage(login_url, None, False)
|
||||||
|
|
||||||
|
m = re.search(r'Your username or password was incorrect\.', login_page)
|
||||||
|
if m:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Wrong username and/or password.', expected=True)
|
||||||
|
|
||||||
|
self.report_login()
|
||||||
|
redirect_url = self._search_regex(
|
||||||
|
r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
|
||||||
|
self._download_webpage(redirect_url, None, False)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
m = re.search(r'You must be logged in to view this video\.', webpage)
|
||||||
|
if m:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video requires login. Please specify a username and password and try again.', expected=True)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||||
webpage, 'video id', default=None)
|
webpage, 'video id', default=None)
|
||||||
|
@@ -3,15 +3,18 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
parse_duration,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EscapistIE(InfoExtractor):
|
class EscapistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||||
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||||
@@ -22,46 +25,68 @@ class EscapistIE(InfoExtractor):
|
|||||||
'uploader_id': 'the-escapist-presents',
|
'uploader_id': 'the-escapist-presents',
|
||||||
'uploader': 'The Escapist Presents',
|
'uploader': 'The Escapist Presents',
|
||||||
'title': "Breaking Down Baldur's Gate",
|
'title': "Breaking Down Baldur's Gate",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 264,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage_req = compat_urllib_request.Request(url)
|
||||||
|
webpage_req.add_header('User-Agent', self._USER_AGENT)
|
||||||
|
webpage = self._download_webpage(webpage_req, video_id)
|
||||||
|
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r"<h1 class='headline'><a href='/videos/view/(.*?)'",
|
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
|
||||||
webpage, 'uploader ID', fatal=False)
|
webpage, 'uploader ID', fatal=False)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r"<h1 class='headline'>(.*?)</a>",
|
r"<h1\s+class='headline'>(.*?)</a>",
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
|
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||||
|
|
||||||
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
||||||
title = raw_title.partition(' : ')[2]
|
title = raw_title.partition(' : ')[2]
|
||||||
|
|
||||||
player_url = self._og_search_video_url(webpage, name='player URL')
|
config_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||||
config_url = compat_urllib_parse.unquote(self._search_regex(
|
r'''(?x)
|
||||||
r'config=(.*)$', player_url, 'config URL'))
|
(?:
|
||||||
|
<param\s+name="flashvars".*?\s+value="config=|
|
||||||
|
flashvars="config=
|
||||||
|
)
|
||||||
|
(https?://[^"&]+)
|
||||||
|
''',
|
||||||
|
webpage, 'config URL'))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
ad_formats = []
|
||||||
|
|
||||||
def _add_format(name, cfgurl, quality):
|
def _add_format(name, cfg_url, quality):
|
||||||
|
cfg_req = compat_urllib_request.Request(cfg_url)
|
||||||
|
cfg_req.add_header('User-Agent', self._USER_AGENT)
|
||||||
config = self._download_json(
|
config = self._download_json(
|
||||||
cfgurl, video_id,
|
cfg_req, video_id,
|
||||||
'Downloading ' + name + ' configuration',
|
'Downloading ' + name + ' configuration',
|
||||||
'Unable to download ' + name + ' configuration',
|
'Unable to download ' + name + ' configuration',
|
||||||
transform_source=js_to_json)
|
transform_source=js_to_json)
|
||||||
|
|
||||||
playlist = config['playlist']
|
playlist = config['playlist']
|
||||||
video_url = next(
|
for p in playlist:
|
||||||
p['url'] for p in playlist
|
if p.get('eventCategory') == 'Video':
|
||||||
if p.get('eventCategory') == 'Video')
|
ar = formats
|
||||||
formats.append({
|
elif p.get('eventCategory') == 'Video Postroll':
|
||||||
'url': video_url,
|
ar = ad_formats
|
||||||
'format_id': name,
|
else:
|
||||||
'quality': quality,
|
continue
|
||||||
})
|
|
||||||
|
ar.append({
|
||||||
|
'url': p['url'],
|
||||||
|
'format_id': name,
|
||||||
|
'quality': quality,
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': self._USER_AGENT,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
_add_format('normal', config_url, quality=0)
|
_add_format('normal', config_url, quality=0)
|
||||||
hq_url = (config_url +
|
hq_url = (config_url +
|
||||||
@@ -70,10 +95,12 @@ class EscapistIE(InfoExtractor):
|
|||||||
_add_format('hq', hq_url, quality=1)
|
_add_format('hq', hq_url, quality=1)
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
pass # That's fine, we'll just use normal quality
|
pass # That's fine, we'll just use normal quality
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
if '/escapist/sales-marketing/' in formats[-1]['url']:
|
||||||
|
raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
|
||||||
|
|
||||||
|
res = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
@@ -81,5 +108,21 @@ class EscapistIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': description,
|
'description': description,
|
||||||
'player_url': player_url,
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self._downloader.params.get('include_ads') and ad_formats:
|
||||||
|
self._sort_formats(ad_formats)
|
||||||
|
ad_res = {
|
||||||
|
'id': '%s-ad' % video_id,
|
||||||
|
'title': '%s (Postroll)' % title,
|
||||||
|
'formats': ad_formats,
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': [res, ad_res],
|
||||||
|
'title': title,
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
return res
|
||||||
|
@@ -4,11 +4,11 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_parse_qs,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
qualities,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '652431',
|
'id': '652431',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -49,19 +49,27 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
flash_vars = compat_parse_qs(self._search_regex(
|
||||||
r'video_url=(.+?)&', webpage, 'video_url'))
|
r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
|
||||||
format = path.split('/')[5].split('_')[:2]
|
formats = []
|
||||||
format = "-".join(format)
|
quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
|
||||||
|
for k, vals in flash_vars.items():
|
||||||
|
m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
|
||||||
|
if m is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': m.group('quality'),
|
||||||
|
'quality': quality(m.group('quality')),
|
||||||
|
'url': vals[0],
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
|
'formats': formats,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'url': video_url,
|
|
||||||
'format': format,
|
|
||||||
'format_id': format,
|
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
@@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
|
|||||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||||
params = json.loads(params_raw)
|
params = json.loads(params_raw)
|
||||||
video_data = params['video_data'][0]
|
video_data = params['video_data'][0]
|
||||||
video_url = video_data.get('hd_src')
|
|
||||||
if not video_url:
|
formats = []
|
||||||
video_url = video_data['sd_src']
|
for quality in ['sd', 'hd']:
|
||||||
if not video_url:
|
src = video_data.get('%s_src' % quality)
|
||||||
raise ExtractorError('Cannot find video URL')
|
if src is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': quality,
|
||||||
|
'url': src,
|
||||||
|
})
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('Cannot find video formats')
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
|
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
|
||||||
@@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'duration': int_or_none(video_data.get('video_duration')),
|
'duration': int_or_none(video_data.get('video_duration')),
|
||||||
'thumbnail': video_data.get('thumbnail_src'),
|
'thumbnail': video_data.get('thumbnail_src'),
|
||||||
}
|
}
|
||||||
|
@@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
|
|||||||
IE_NAME = '5min'
|
IE_NAME = '5min'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||||
|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
|
||||||
5min:)
|
5min:)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
|
41
youtube_dl/extractor/footyroom.py
Normal file
41
youtube_dl/extractor/footyroom.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class FootyRoomIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'schalke-04-0-2-real-madrid-2015-02',
|
||||||
|
'title': 'Schalke 04 0 – 2 Real Madrid',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
|
||||||
|
playlist_id)
|
||||||
|
|
||||||
|
playlist_title = self._og_search_title(webpage)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for video in playlist:
|
||||||
|
payload = video.get('payload')
|
||||||
|
if not payload:
|
||||||
|
continue
|
||||||
|
playwire_url = self._search_regex(
|
||||||
|
r'data-config="([^"]+)"', payload,
|
||||||
|
'playwire url', default=None)
|
||||||
|
if playwire_url:
|
||||||
|
entries.append(self.url_result(playwire_url, 'Playwire'))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title)
|
@@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
bitrates.sort()
|
bitrates.sort()
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for bitrate in bitrates:
|
for bitrate in bitrates:
|
||||||
for link in links:
|
for link in links:
|
||||||
formats.append({
|
formats.append({
|
||||||
@@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'vbr': bitrate,
|
'vbr': bitrate,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
|
||||||
|
subtitles[src_lang] = [{
|
||||||
|
'ext': src.split('/')[-1],
|
||||||
|
'url': 'http://www.funnyordie.com%s' % src,
|
||||||
|
}]
|
||||||
|
|
||||||
post_json = self._search_regex(
|
post_json = self._search_regex(
|
||||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||||
post = json.loads(post_json)
|
post = json.loads(post_json)
|
||||||
@@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'description': post.get('description'),
|
'description': post.get('description'),
|
||||||
'thumbnail': post.get('picture'),
|
'thumbnail': post.get('picture'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,8 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -31,7 +33,7 @@ class GameStarIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
og_title = self._og_search_title(webpage)
|
og_title = self._og_search_title(webpage)
|
||||||
title = og_title.replace(' - Video bei GameStar.de', '').strip()
|
title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
|
||||||
|
|
||||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||||
|
|
||||||
|
38
youtube_dl/extractor/gazeta.py
Normal file
38
youtube_dl/extractor/gazeta.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class GazetaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
||||||
|
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '205566',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '«70–80 процентов гражданских в Донецке на грани голода»',
|
||||||
|
'description': 'md5:38617526050bd17b234728e7f9620a71',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
embed_url = '%s?p=embed' % mobj.group('url')
|
||||||
|
embed_page = self._download_webpage(
|
||||||
|
embed_url, display_id, 'Downloading embed page')
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
|
@@ -7,10 +7,12 @@ from ..compat import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
|
from ..utils import remove_end
|
||||||
|
|
||||||
|
|
||||||
class GDCVaultIE(InfoExtractor):
|
class GDCVaultIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||||
|
_NETRC_MACHINE = 'gdcvault'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
|
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
|
||||||
@@ -65,10 +67,12 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
|
|
||||||
def _parse_flv(self, xml_description):
|
def _parse_flv(self, xml_description):
|
||||||
video_formats = []
|
video_formats = []
|
||||||
akami_url = xml_description.find('./metadata/akamaiHost').text
|
akamai_url = xml_description.find('./metadata/akamaiHost').text
|
||||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||||
video_formats.append({
|
video_formats.append({
|
||||||
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
|
'play_path': remove_end(slide_video_path, '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
'format_note': 'slide deck video',
|
'format_note': 'slide deck video',
|
||||||
'quality': -2,
|
'quality': -2,
|
||||||
'preference': -2,
|
'preference': -2,
|
||||||
@@ -76,7 +80,9 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||||
video_formats.append({
|
video_formats.append({
|
||||||
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
|
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
'format_note': 'speaker video',
|
'format_note': 'speaker video',
|
||||||
'quality': -1,
|
'quality': -1,
|
||||||
'preference': -1,
|
'preference': -1,
|
||||||
|
@@ -26,8 +26,10 @@ from ..utils import (
|
|||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
|
from .nbc import NBCSportsVPlayerIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .smotri import SmotriIE
|
from .smotri import SmotriIE
|
||||||
@@ -526,13 +528,24 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Viddler'],
|
'add_ie': ['Viddler'],
|
||||||
},
|
},
|
||||||
|
# Libsyn embed
|
||||||
|
{
|
||||||
|
'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3377616',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||||
|
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||||
|
'upload_date': '20150220',
|
||||||
|
},
|
||||||
|
},
|
||||||
# jwplayer YouTube
|
# jwplayer YouTube
|
||||||
{
|
{
|
||||||
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Mrj4DVp2zeA',
|
'id': 'Mrj4DVp2zeA',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20150204',
|
'upload_date': '20150212',
|
||||||
'uploader': 'The National Archives UK',
|
'uploader': 'The National Archives UK',
|
||||||
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
||||||
'uploader_id': 'NationalArchives08',
|
'uploader_id': 'NationalArchives08',
|
||||||
@@ -547,6 +560,96 @@ class GenericIE(InfoExtractor):
|
|||||||
'id': 'aanslagen-kopenhagen',
|
'id': 'aanslagen-kopenhagen',
|
||||||
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
# Zapiks embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '118046',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Kaltura embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_eergr3h1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20150226',
|
||||||
|
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
|
||||||
|
'timestamp': int,
|
||||||
|
'title': 'John Carlson Postgame 2/25/15',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# Eagle.Platform embed (generic URL)
|
||||||
|
{
|
||||||
|
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '227304',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Навальный вышел на свободу',
|
||||||
|
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 87,
|
||||||
|
'view_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# ClipYou (Eagle.Platform) embed (custom URL)
|
||||||
|
{
|
||||||
|
'url': 'http://muz-tv.ru/play/7129/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12820',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "'O Sole Mio",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 216,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# Pladform embed
|
||||||
|
{
|
||||||
|
'url': 'http://muz-tv.ru/kinozal/view/7400/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100183293',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
|
||||||
|
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 694,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# 5min embed
|
||||||
|
{
|
||||||
|
'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
|
||||||
|
'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '518726732',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Facebook Creates "On This Day" | Crunch Report',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# RSS feed with enclosure
|
||||||
|
{
|
||||||
|
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'upload_date': '20150228',
|
||||||
|
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# NBC Sports vplayer embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ln7x1qSThw4k',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': "PFT Live: New leader in the 'new-look' defense",
|
||||||
|
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -559,11 +662,24 @@ class GenericIE(InfoExtractor):
|
|||||||
playlist_desc_el = doc.find('./channel/description')
|
playlist_desc_el = doc.find('./channel/description')
|
||||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||||
|
|
||||||
entries = [{
|
entries = []
|
||||||
'_type': 'url',
|
for it in doc.findall('./channel/item'):
|
||||||
'url': e.find('link').text,
|
next_url = xpath_text(it, 'link', fatal=False)
|
||||||
'title': e.find('title').text,
|
if not next_url:
|
||||||
} for e in doc.findall('./channel/item')]
|
enclosure_nodes = it.findall('./enclosure')
|
||||||
|
for e in enclosure_nodes:
|
||||||
|
next_url = e.attrib.get('url')
|
||||||
|
if next_url:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not next_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': next_url,
|
||||||
|
'title': it.find('title').text,
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
@@ -922,6 +1038,19 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for NYTimes player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for Libsyn player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||||
@@ -1098,6 +1227,47 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Livestream')
|
return self.url_result(mobj.group('url'), 'Livestream')
|
||||||
|
|
||||||
|
# Look for Zapiks embed
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||||
|
|
||||||
|
# Look for Kaltura embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||||
|
|
||||||
|
# Look for Eagle.Platform embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'EaglePlatform')
|
||||||
|
|
||||||
|
# Look for ClipYou (uses Eagle.Platform) embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
|
||||||
|
|
||||||
|
# Look for Pladform embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Pladform')
|
||||||
|
|
||||||
|
# Look for 5min embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
|
||||||
|
|
||||||
|
# Look for NBC Sports VPlayer embeds
|
||||||
|
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
|
||||||
|
if nbc_sports_url:
|
||||||
|
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
@@ -1154,10 +1324,16 @@ class GenericIE(InfoExtractor):
|
|||||||
# HTML5 video
|
# HTML5 video
|
||||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
|
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||||
found = re.search(
|
found = re.search(
|
||||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||||
webpage)
|
webpage)
|
||||||
|
if not found:
|
||||||
|
# Look also in Refresh HTTP header
|
||||||
|
refresh_header = head_response.headers.get('Refresh')
|
||||||
|
if refresh_header:
|
||||||
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
new_url = found.group(1)
|
new_url = found.group(1)
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
@@ -1193,7 +1369,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return entries[0]
|
return entries[0]
|
||||||
else:
|
else:
|
||||||
for num, e in enumerate(entries, start=1):
|
for num, e in enumerate(entries, start=1):
|
||||||
e['title'] = '%s (%d)' % (e['title'], num)
|
# 'url' results don't have a title
|
||||||
|
if e.get('title') is not None:
|
||||||
|
e['title'] = '%s (%d)' % (e['title'], num)
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
|
@@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
|
|||||||
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
||||||
|
|
||||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
|
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
||||||
|
|
||||||
_VIDEOID_REGEXES = [
|
_VIDEOID_REGEXES = [
|
||||||
r'\bdata-video-id="(\d+)"',
|
r'\bdata-video-id="(\d+)"',
|
||||||
|
@@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
|
|||||||
|
|
||||||
if webpage is not None:
|
if webpage is not None:
|
||||||
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||||
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
|
return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
|
||||||
|
|
||||||
return (webpage, None)
|
return webpage, None
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self.ts = int(time.time() * 1000) # timestamp in millis
|
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||||
@@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
|
|||||||
swf_referer = None
|
swf_referer = None
|
||||||
if self.do_playerpage_request:
|
if self.do_playerpage_request:
|
||||||
(_, player_objs) = self._get_playerpage(url)
|
(_, player_objs) = self._get_playerpage(url)
|
||||||
if player_objs is not None:
|
if player_objs:
|
||||||
swf_referer = self._build_swf_referer(url, player_objs[0])
|
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||||
self.to_screen('SWF Referer: %s' % swf_referer)
|
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||||
|
|
||||||
|
97
youtube_dl/extractor/imgur.py
Normal file
97
youtube_dl/extractor/imgur.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A61SaA1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
|
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/A61SaA1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A61SaA1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
|
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
width = int_or_none(self._search_regex(
|
||||||
|
r'<param name="width" value="([0-9]+)"',
|
||||||
|
webpage, 'width', fatal=False))
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'<param name="height" value="([0-9]+)"',
|
||||||
|
webpage, 'height', fatal=False))
|
||||||
|
|
||||||
|
video_elements = self._search_regex(
|
||||||
|
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||||
|
webpage, 'video elements', default=None)
|
||||||
|
if not video_elements:
|
||||||
|
raise ExtractorError(
|
||||||
|
'No sources found for video %s. Maybe an image?' % video_id,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
||||||
|
formats.append({
|
||||||
|
'format_id': m.group('type').partition('/')[2],
|
||||||
|
'url': self._proto_relative_url(m.group('src')),
|
||||||
|
'ext': mimetype2ext(m.group('type')),
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
gif_json = self._search_regex(
|
||||||
|
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
||||||
|
webpage, 'GIF code', fatal=False)
|
||||||
|
if gif_json:
|
||||||
|
gifd = self._parse_json(
|
||||||
|
gif_json, video_id, transform_source=js_to_json)
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'gif',
|
||||||
|
'preference': -10,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'ext': 'gif',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'gif',
|
||||||
|
'container': 'gif',
|
||||||
|
'url': self._proto_relative_url(gifd['gifUrl']),
|
||||||
|
'filesize': gifd.get('size'),
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
}
|
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5182',
|
'id': '114765',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
title = mobj.group(1)
|
title = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
xml_link = self._html_search_regex(
|
title = self._html_search_meta('name', webpage)
|
||||||
r'<param name="flashvars" value="config=(.*?)" />',
|
config_url = self._html_search_regex(
|
||||||
|
r'data-src="(/contenu/medias/video.php.*?)"',
|
||||||
webpage, 'config URL')
|
webpage, 'config URL')
|
||||||
|
config_url = 'http://www.jeuxvideo.com' + config_url
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'id=(\d+)',
|
||||||
xml_link, 'video ID')
|
config_url, 'video ID')
|
||||||
|
|
||||||
config = self._download_xml(
|
config = self._download_json(
|
||||||
xml_link, title, 'Downloading XML config')
|
config_url, title, 'Downloading JSON config')
|
||||||
info_json = config.find('format.json').text
|
|
||||||
info = json.loads(info_json)['versions'][0]
|
|
||||||
|
|
||||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
formats = [{
|
||||||
|
'url': source['file'],
|
||||||
|
'format_id': source['label'],
|
||||||
|
'resolution': source['label'],
|
||||||
|
} for source in reversed(config['sources'])]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': config.find('titre_video').text,
|
'title': title,
|
||||||
'ext': 'mp4',
|
'formats': formats,
|
||||||
'url': video_url,
|
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': config.find('image').text,
|
'thumbnail': config.get('image'),
|
||||||
}
|
}
|
||||||
|
138
youtube_dl/extractor/kaltura.py
Normal file
138
youtube_dl/extractor/kaltura.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KalturaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:kaltura:|
|
||||||
|
https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
|
||||||
|
)(?P<partner_id>\d+)
|
||||||
|
(?::|
|
||||||
|
/(?:[^/]+/)*?entry_id/
|
||||||
|
)(?P<id>[0-9a-z_]+)'''
|
||||||
|
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'kaltura:269692:1_1jc2y3e4',
|
||||||
|
'md5': '3adcbdb3dcc02d647539e53f284ba171',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_1jc2y3e4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Track 4',
|
||||||
|
'upload_date': '20131219',
|
||||||
|
'uploader_id': 'mlundberg@wolfgangsvault.com',
|
||||||
|
'description': 'The Allman Brothers Band, 12/16/1981',
|
||||||
|
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||||
|
'timestamp': int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
|
||||||
|
params = actions[0]
|
||||||
|
if len(actions) > 1:
|
||||||
|
for i, a in enumerate(actions[1:], start=1):
|
||||||
|
for k, v in a.items():
|
||||||
|
params['%d:%s' % (i, k)] = v
|
||||||
|
|
||||||
|
query = compat_urllib_parse.urlencode(params)
|
||||||
|
url = self._API_BASE + query
|
||||||
|
data = self._download_json(url, video_id, *args, **kwargs)
|
||||||
|
|
||||||
|
status = data if len(actions) == 1 else data[0]
|
||||||
|
if status.get('objectType') == 'KalturaAPIException':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, status['message']))
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _get_kaltura_signature(self, video_id, partner_id):
|
||||||
|
actions = [{
|
||||||
|
'apiVersion': '3.1',
|
||||||
|
'expiry': 86400,
|
||||||
|
'format': 1,
|
||||||
|
'service': 'session',
|
||||||
|
'action': 'startWidgetSession',
|
||||||
|
'widgetId': '_%s' % partner_id,
|
||||||
|
}]
|
||||||
|
return self._kaltura_api_call(
|
||||||
|
video_id, actions, note='Downloading Kaltura signature')['ks']
|
||||||
|
|
||||||
|
def _get_video_info(self, video_id, partner_id):
|
||||||
|
signature = self._get_kaltura_signature(video_id, partner_id)
|
||||||
|
actions = [
|
||||||
|
{
|
||||||
|
'action': 'null',
|
||||||
|
'apiVersion': '3.1.5',
|
||||||
|
'clientTag': 'kdp:v3.8.5',
|
||||||
|
'format': 1, # JSON, 2 = XML, 3 = PHP
|
||||||
|
'service': 'multirequest',
|
||||||
|
'ks': signature,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'action': 'get',
|
||||||
|
'entryId': video_id,
|
||||||
|
'service': 'baseentry',
|
||||||
|
'version': '-1',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'action': 'getContextData',
|
||||||
|
'contextDataParams:objectType': 'KalturaEntryContextDataParams',
|
||||||
|
'contextDataParams:referrer': 'http://www.kaltura.com/',
|
||||||
|
'contextDataParams:streamerType': 'http',
|
||||||
|
'entryId': video_id,
|
||||||
|
'service': 'baseentry',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
return self._kaltura_api_call(
|
||||||
|
video_id, actions, note='Downloading video info JSON')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
|
||||||
|
|
||||||
|
info, source_data = self._get_video_info(entry_id, partner_id)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||||
|
'ext': f['fileExt'],
|
||||||
|
'tbr': f['bitrate'],
|
||||||
|
'fps': f.get('frameRate'),
|
||||||
|
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
|
||||||
|
'container': f.get('containerFormat'),
|
||||||
|
'vcodec': f.get('videoCodecId'),
|
||||||
|
'height': f.get('height'),
|
||||||
|
'width': f.get('width'),
|
||||||
|
'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
|
||||||
|
} for f in source_data['flavorAssets']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info['name'],
|
||||||
|
'formats': formats,
|
||||||
|
'description': info.get('description'),
|
||||||
|
'thumbnail': info.get('thumbnailUrl'),
|
||||||
|
'duration': info.get('duration'),
|
||||||
|
'timestamp': info.get('createdAt'),
|
||||||
|
'uploader_id': info.get('userId'),
|
||||||
|
'view_count': info.get('plays'),
|
||||||
|
}
|
96
youtube_dl/extractor/kanalplay.py
Normal file
96
youtube_dl/extractor/kanalplay.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KanalPlayIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Kanal 5/9/11 Play'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3270012277',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Saknar både dusch och avlopp',
|
||||||
|
'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
|
||||||
|
'duration': 2636.36,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _fix_subtitles(self, subs):
|
||||||
|
return '\r\n\r\n'.join(
|
||||||
|
'%s\r\n%s --> %s\r\n%s'
|
||||||
|
% (
|
||||||
|
num,
|
||||||
|
self._subtitles_timecode(item['startMillis'] / 1000.0),
|
||||||
|
self._subtitles_timecode(item['endMillis'] / 1000.0),
|
||||||
|
item['text'],
|
||||||
|
) for num, item in enumerate(subs, 1))
|
||||||
|
|
||||||
|
def _get_subtitles(self, channel_id, video_id):
|
||||||
|
subs = self._download_json(
|
||||||
|
'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
|
||||||
|
video_id, 'Downloading subtitles JSON', fatal=False)
|
||||||
|
return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
channel_id = mobj.group('channel_id')
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
reasons_for_no_streams = video.get('reasonsForNoStreams')
|
||||||
|
if reasons_for_no_streams:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
description = video.get('description')
|
||||||
|
duration = float_or_none(video.get('length'), 1000)
|
||||||
|
thumbnail = video.get('posterUrl')
|
||||||
|
|
||||||
|
stream_base_url = video['streamBaseUrl']
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': stream_base_url,
|
||||||
|
'play_path': stream['source'],
|
||||||
|
'ext': 'flv',
|
||||||
|
'tbr': float_or_none(stream.get('bitrate'), 1000),
|
||||||
|
'rtmp_real_time': True,
|
||||||
|
} for stream in video['streams']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
if video.get('hasSubtitle'):
|
||||||
|
subtitles = self.extract_subtitles(channel_id, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
|
|||||||
description = self._og_search_description(webpage, default=None)
|
description = self._og_search_description(webpage, default=None)
|
||||||
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
||||||
duration = int_or_none(flashvars.get('duration'))
|
duration = int_or_none(flashvars.get('duration'))
|
||||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
width = int_or_none(self._og_search_property(
|
||||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
'video:width', webpage, 'video width', default=None))
|
||||||
|
height = int_or_none(self._og_search_property(
|
||||||
|
'video:height', webpage, 'video height', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,31 +1,32 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Laola1TvIE(InfoExtractor):
|
class Laola1TvIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
|
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '250019',
|
'id': '227883',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bitburger Open Grand Prix Gold - Court 1',
|
'title': 'Straubing Tigers - Kölner Haie',
|
||||||
'categories': ['Badminton'],
|
'categories': ['Eishockey'],
|
||||||
'uploader': 'BWF - Badminton World Federation',
|
'is_live': False,
|
||||||
'is_live': True,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_BROKEN = True # Not really - extractor works fine, but f4m downloader does not support live streams yet.
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
@@ -43,15 +44,22 @@ class Laola1TvIE(InfoExtractor):
|
|||||||
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
|
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
|
||||||
flashvars = dict((m[0], m[1]) for m in flashvars_m)
|
flashvars = dict((m[0], m[1]) for m in flashvars_m)
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
|
||||||
|
|
||||||
xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
|
xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
|
||||||
'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
|
'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
|
||||||
video_id, portal, lang))
|
video_id, partner_id, portal, lang))
|
||||||
hd_doc = self._download_xml(xml_url, video_id)
|
hd_doc = self._download_xml(xml_url, video_id)
|
||||||
|
|
||||||
title = hd_doc.find('.//video/title').text
|
title = xpath_text(hd_doc, './/video/title', fatal=True)
|
||||||
flash_url = hd_doc.find('.//video/url').text
|
flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
|
||||||
categories = hd_doc.find('.//video/meta_sports').text.split(',')
|
uploader = xpath_text(hd_doc, './/video/meta_organistation')
|
||||||
uploader = hd_doc.find('.//video/meta_organistation').text
|
is_live = xpath_text(hd_doc, './/video/islive') == 'true'
|
||||||
|
|
||||||
|
categories = xpath_text(hd_doc, './/video/meta_sports')
|
||||||
|
if categories:
|
||||||
|
categories = categories.split(',')
|
||||||
|
|
||||||
ident = random.randint(10000000, 99999999)
|
ident = random.randint(10000000, 99999999)
|
||||||
token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % (
|
token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % (
|
||||||
@@ -60,15 +68,16 @@ class Laola1TvIE(InfoExtractor):
|
|||||||
token_doc = self._download_xml(
|
token_doc = self._download_xml(
|
||||||
token_url, video_id, note='Downloading token')
|
token_url, video_id, note='Downloading token')
|
||||||
token_attrib = token_doc.find('.//token').attrib
|
token_attrib = token_doc.find('.//token').attrib
|
||||||
if token_attrib.get('auth') == 'blocked':
|
if token_attrib.get('auth') in ('blocked', 'restricted'):
|
||||||
raise ExtractorError('Token error: ' % token_attrib.get('comment'))
|
raise ExtractorError(
|
||||||
|
'Token error: %s' % token_attrib.get('comment'), expected=True)
|
||||||
|
|
||||||
video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
|
video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
|
||||||
token_attrib['url'], token_attrib['auth'])
|
token_attrib['url'], token_attrib['auth'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'is_live': True,
|
'is_live': is_live,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
207
youtube_dl/extractor/letv.py
Normal file
207
youtube_dl/extractor/letv.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LetvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.letv.com/ptv/vplay/22005890.html',
|
||||||
|
'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '22005890',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
|
||||||
|
'timestamp': 1424747397,
|
||||||
|
'upload_date': '20150224',
|
||||||
|
'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.letv.com/ptv/vplay/1415246.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1415246',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '美人天下01',
|
||||||
|
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'This video is available only in Mainland China, thus a proxy is needed',
|
||||||
|
'url': 'http://www.letv.com/ptv/vplay/1118082.html',
|
||||||
|
'md5': 'f80936fbe20fb2f58648e81386ff7927',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1118082',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '与龙共舞 完整版',
|
||||||
|
'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'cn_verification_proxy': 'http://proxy.uku.im:8888'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def urshift(val, n):
|
||||||
|
return val >> n if val >= 0 else (val + 0x100000000) >> n
|
||||||
|
|
||||||
|
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
|
||||||
|
def ror(self, param1, param2):
|
||||||
|
_loc3_ = 0
|
||||||
|
while _loc3_ < param2:
|
||||||
|
param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
|
||||||
|
_loc3_ += 1
|
||||||
|
return param1
|
||||||
|
|
||||||
|
def calc_time_key(self, param1):
|
||||||
|
_loc2_ = 773625421
|
||||||
|
_loc3_ = self.ror(param1, _loc2_ % 13)
|
||||||
|
_loc3_ = _loc3_ ^ _loc2_
|
||||||
|
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||||
|
return _loc3_
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
media_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, media_id)
|
||||||
|
params = {
|
||||||
|
'id': media_id,
|
||||||
|
'platid': 1,
|
||||||
|
'splatid': 101,
|
||||||
|
'format': 1,
|
||||||
|
'tkey': self.calc_time_key(int(time.time())),
|
||||||
|
'domain': 'www.letv.com'
|
||||||
|
}
|
||||||
|
play_json_req = compat_urllib_request.Request(
|
||||||
|
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||||
|
)
|
||||||
|
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||||
|
if cn_verification_proxy:
|
||||||
|
play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||||
|
|
||||||
|
play_json = self._download_json(
|
||||||
|
play_json_req,
|
||||||
|
media_id, 'Downloading playJson data')
|
||||||
|
|
||||||
|
# Check for errors
|
||||||
|
playstatus = play_json['playstatus']
|
||||||
|
if playstatus['status'] == 0:
|
||||||
|
flag = playstatus['flag']
|
||||||
|
if flag == 1:
|
||||||
|
msg = 'Country %s auth error' % playstatus['country']
|
||||||
|
else:
|
||||||
|
msg = 'Generic error. flag = %d' % flag
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
playurl = play_json['playurl']
|
||||||
|
|
||||||
|
formats = ['350', '1000', '1300', '720p', '1080p']
|
||||||
|
dispatch = playurl['dispatch']
|
||||||
|
|
||||||
|
urls = []
|
||||||
|
for format_id in formats:
|
||||||
|
if format_id in dispatch:
|
||||||
|
media_url = playurl['domain'][0] + dispatch[format_id][0]
|
||||||
|
|
||||||
|
# Mimic what flvxz.com do
|
||||||
|
url_parts = list(compat_urlparse.urlparse(media_url))
|
||||||
|
qs = dict(compat_urlparse.parse_qs(url_parts[4]))
|
||||||
|
qs.update({
|
||||||
|
'platid': '14',
|
||||||
|
'splatid': '1401',
|
||||||
|
'tss': 'no',
|
||||||
|
'retry': 1
|
||||||
|
})
|
||||||
|
url_parts[4] = compat_urllib_parse.urlencode(qs)
|
||||||
|
media_url = compat_urlparse.urlunparse(url_parts)
|
||||||
|
|
||||||
|
url_info_dict = {
|
||||||
|
'url': media_url,
|
||||||
|
'ext': determine_ext(dispatch[format_id][1]),
|
||||||
|
'format_id': format_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
if format_id[-1:] == 'p':
|
||||||
|
url_info_dict['height'] = format_id[:-1]
|
||||||
|
|
||||||
|
urls.append(url_info_dict)
|
||||||
|
|
||||||
|
publish_time = parse_iso8601(self._html_search_regex(
|
||||||
|
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
||||||
|
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||||
|
description = self._html_search_meta('description', page, fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': media_id,
|
||||||
|
'formats': urls,
|
||||||
|
'title': playurl['title'],
|
||||||
|
'thumbnail': playurl['pic'],
|
||||||
|
'description': description,
|
||||||
|
'timestamp': publish_time,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LetvTvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.letv.com/tv/46177.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '46177',
|
||||||
|
'title': '美人天下',
|
||||||
|
'description': 'md5:395666ff41b44080396e59570dbac01c'
|
||||||
|
},
|
||||||
|
'playlist_count': 35
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
media_urls = list(set(re.findall(
|
||||||
|
r'http://www.letv.com/ptv/vplay/\d+.html', page)))
|
||||||
|
entries = [self.url_result(media_url, ie='Letv')
|
||||||
|
for media_url in media_urls]
|
||||||
|
|
||||||
|
title = self._html_search_meta('keywords', page,
|
||||||
|
fatal=False).split(',')[0]
|
||||||
|
description = self._html_search_meta('description', page, fatal=False)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title=title,
|
||||||
|
playlist_description=description)
|
||||||
|
|
||||||
|
|
||||||
|
class LetvPlaylistIE(LetvTvIE):
|
||||||
|
_VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://tv.letv.com/izt/wuzetian/index.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wuzetian',
|
||||||
|
'title': '武媚娘传奇',
|
||||||
|
'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
|
||||||
|
},
|
||||||
|
# This playlist contains some extra videos other than the drama itself
|
||||||
|
'playlist_mincount': 96
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lswjzzjc',
|
||||||
|
# The title should be "劲舞青春", but I can't find a simple way to
|
||||||
|
# determine the playlist title
|
||||||
|
'title': '乐视午间自制剧场',
|
||||||
|
'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 7
|
||||||
|
}]
|
59
youtube_dl/extractor/libsyn.py
Normal file
59
youtube_dl/extractor/libsyn.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class LibsynIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||||
|
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3377616',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||||
|
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||||
|
'upload_date': '20150220',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': media_url,
|
||||||
|
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||||
|
|
||||||
|
podcast_title = self._search_regex(
|
||||||
|
r'<h2>([^<]+)</h2>', webpage, 'title')
|
||||||
|
episode_title = self._search_regex(
|
||||||
|
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
release_date = unified_strdate(self._search_regex(
|
||||||
|
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': release_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -40,6 +41,13 @@ class LivestreamIE(InfoExtractor):
|
|||||||
'id': '2245590',
|
'id': '2245590',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
|
}, {
|
||||||
|
'url': 'http://new.livestream.com/chess24/tatasteelchess',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Tata Steel Chess',
|
||||||
|
'id': '3705884',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 60,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@@ -117,6 +125,30 @@ class LivestreamIE(InfoExtractor):
|
|||||||
'view_count': video_data.get('views'),
|
'view_count': video_data.get('views'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_event(self, info):
|
||||||
|
event_id = compat_str(info['id'])
|
||||||
|
account = compat_str(info['owner_account_id'])
|
||||||
|
root_url = (
|
||||||
|
'https://new.livestream.com/api/accounts/{account}/events/{event}/'
|
||||||
|
'feed.json'.format(account=account, event=event_id))
|
||||||
|
|
||||||
|
def _extract_videos():
|
||||||
|
last_video = None
|
||||||
|
for i in itertools.count(1):
|
||||||
|
if last_video is None:
|
||||||
|
info_url = root_url
|
||||||
|
else:
|
||||||
|
info_url = '{root}?&id={id}&newer=-1&type=video'.format(
|
||||||
|
root=root_url, id=last_video)
|
||||||
|
videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data']
|
||||||
|
videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
|
||||||
|
if not videos_info:
|
||||||
|
break
|
||||||
|
for v in videos_info:
|
||||||
|
yield self._extract_video_info(v)
|
||||||
|
last_video = videos_info[-1]['id']
|
||||||
|
return self.playlist_result(_extract_videos(), event_id, info['full_name'])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
@@ -144,14 +176,13 @@ class LivestreamIE(InfoExtractor):
|
|||||||
result = result and compat_str(vdata['data']['id']) == vid
|
result = result and compat_str(vdata['data']['id']) == vid
|
||||||
return result
|
return result
|
||||||
|
|
||||||
videos = [self._extract_video_info(video_data['data'])
|
|
||||||
for video_data in info['feed']['data']
|
|
||||||
if is_relevant(video_data, video_id)]
|
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
# This is an event page:
|
# This is an event page:
|
||||||
return self.playlist_result(
|
return self._extract_event(info)
|
||||||
videos, '%s' % info['id'], info['full_name'])
|
|
||||||
else:
|
else:
|
||||||
|
videos = [self._extract_video_info(video_data['data'])
|
||||||
|
for video_data in info['feed']['data']
|
||||||
|
if is_relevant(video_data, video_id)]
|
||||||
if not videos:
|
if not videos:
|
||||||
raise ExtractorError('Cannot find video %s' % video_id)
|
raise ExtractorError('Cannot find video %s' % video_id)
|
||||||
return videos[0]
|
return videos[0]
|
||||||
|
@@ -52,6 +52,7 @@ class LRTIE(InfoExtractor):
|
|||||||
'url': data['streamer'],
|
'url': data['streamer'],
|
||||||
'play_path': 'mp4:%s' % data['file'],
|
'play_path': 'mp4:%s' % data['file'],
|
||||||
'preference': -1,
|
'preference': -1,
|
||||||
|
'rtmp_real_time': True,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
formats.extend(
|
formats.extend(
|
||||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
@@ -16,19 +15,74 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LyndaIE(SubtitlesInfoExtractor):
|
class LyndaBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'lynda'
|
|
||||||
IE_DESC = 'lynda.com videos'
|
|
||||||
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
|
|
||||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||||
|
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||||
|
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||||
|
_NETRC_MACHINE = 'lynda'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_form = {
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'remember': 'false',
|
||||||
|
'stayPut': 'false'
|
||||||
|
}
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
|
# Not (yet) logged in
|
||||||
|
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
||||||
|
if m is not None:
|
||||||
|
response = m.group('json')
|
||||||
|
response_json = json.loads(response)
|
||||||
|
state = response_json['state']
|
||||||
|
|
||||||
|
if state == 'notlogged':
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login, incorrect username and/or password',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
# This is when we get popup:
|
||||||
|
# > You're already logged in to lynda.com on two devices.
|
||||||
|
# > If you log in here, we'll log you out of another device.
|
||||||
|
# So, we need to confirm this.
|
||||||
|
if state == 'conflicted':
|
||||||
|
confirm_form = {
|
||||||
|
'username': '',
|
||||||
|
'password': '',
|
||||||
|
'resolve': 'true',
|
||||||
|
'remember': 'false',
|
||||||
|
'stayPut': 'false',
|
||||||
|
}
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
request, None,
|
||||||
|
'Confirming log in and log out from another device')
|
||||||
|
|
||||||
|
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||||
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
|
class LyndaIE(LyndaBaseIE):
|
||||||
|
IE_NAME = 'lynda'
|
||||||
|
IE_DESC = 'lynda.com videos'
|
||||||
|
_VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
|
||||||
_NETRC_MACHINE = 'lynda'
|
_NETRC_MACHINE = 'lynda'
|
||||||
|
|
||||||
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
|
||||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||||
|
|
||||||
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
_TESTS = [{
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||||
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -37,25 +91,27 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
'title': 'Using the exercise files',
|
'title': 'Using the exercise files',
|
||||||
'duration': 68
|
'duration': 68
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
|
||||||
def _real_initialize(self):
|
'only_matching': True,
|
||||||
self._login()
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
|
page = self._download_webpage(
|
||||||
'Downloading video JSON')
|
'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
video_json = json.loads(page)
|
video_json = json.loads(page)
|
||||||
|
|
||||||
if 'Status' in video_json:
|
if 'Status' in video_json:
|
||||||
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
|
raise ExtractorError(
|
||||||
|
'lynda returned error: %s' % video_json['Message'], expected=True)
|
||||||
|
|
||||||
if video_json['HasAccess'] is False:
|
if video_json['HasAccess'] is False:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
'Video %s is only available for members. '
|
||||||
|
% video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||||
|
|
||||||
video_id = compat_str(video_json['ID'])
|
video_id = compat_str(video_json['ID'])
|
||||||
duration = video_json['DurationInSeconds']
|
duration = video_json['DurationInSeconds']
|
||||||
@@ -88,11 +144,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
self._check_formats(formats, video_id)
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
subtitles = self.extract_subtitles(video_id, page)
|
||||||
self._list_available_subtitles(video_id, page)
|
|
||||||
return
|
|
||||||
|
|
||||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -102,83 +154,37 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
||||||
def _login(self):
|
def _fix_subtitles(self, subs):
|
||||||
(username, password) = self._get_login_info()
|
srt = ''
|
||||||
if username is None:
|
seq_counter = 0
|
||||||
return
|
for pos in range(0, len(subs) - 1):
|
||||||
|
seq_current = subs[pos]
|
||||||
login_form = {
|
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
|
||||||
'username': username,
|
if m_current is None:
|
||||||
'password': password,
|
|
||||||
'remember': 'false',
|
|
||||||
'stayPut': 'false'
|
|
||||||
}
|
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
|
||||||
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
|
|
||||||
|
|
||||||
# Not (yet) logged in
|
|
||||||
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
|
||||||
if m is not None:
|
|
||||||
response = m.group('json')
|
|
||||||
response_json = json.loads(response)
|
|
||||||
state = response_json['state']
|
|
||||||
|
|
||||||
if state == 'notlogged':
|
|
||||||
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
|
|
||||||
|
|
||||||
# This is when we get popup:
|
|
||||||
# > You're already logged in to lynda.com on two devices.
|
|
||||||
# > If you log in here, we'll log you out of another device.
|
|
||||||
# So, we need to confirm this.
|
|
||||||
if state == 'conflicted':
|
|
||||||
confirm_form = {
|
|
||||||
'username': '',
|
|
||||||
'password': '',
|
|
||||||
'resolve': 'true',
|
|
||||||
'remember': 'false',
|
|
||||||
'stayPut': 'false',
|
|
||||||
}
|
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
|
||||||
login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
|
|
||||||
|
|
||||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
|
||||||
raise ExtractorError('Unable to log in')
|
|
||||||
|
|
||||||
def _fix_subtitles(self, subtitles):
|
|
||||||
if subtitles is None:
|
|
||||||
return subtitles # subtitles not requested
|
|
||||||
|
|
||||||
fixed_subtitles = {}
|
|
||||||
for k, v in subtitles.items():
|
|
||||||
subs = json.loads(v)
|
|
||||||
if len(subs) == 0:
|
|
||||||
continue
|
continue
|
||||||
srt = ''
|
seq_next = subs[pos + 1]
|
||||||
for pos in range(0, len(subs) - 1):
|
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
|
||||||
seq_current = subs[pos]
|
if m_next is None:
|
||||||
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
|
continue
|
||||||
if m_current is None:
|
appear_time = m_current.group('timecode')
|
||||||
continue
|
disappear_time = m_next.group('timecode')
|
||||||
seq_next = subs[pos + 1]
|
text = seq_current['Caption'].strip()
|
||||||
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
|
if text:
|
||||||
if m_next is None:
|
seq_counter += 1
|
||||||
continue
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
|
||||||
appear_time = m_current.group('timecode')
|
if srt:
|
||||||
disappear_time = m_next.group('timecode')
|
return srt
|
||||||
text = seq_current['Caption']
|
|
||||||
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
|
|
||||||
if srt:
|
|
||||||
fixed_subtitles[k] = srt
|
|
||||||
return fixed_subtitles
|
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||||
sub = self._download_webpage(url, None, False)
|
subs = self._download_json(url, None, False)
|
||||||
sub_json = json.loads(sub)
|
if subs:
|
||||||
return {'en': url} if len(sub_json) > 0 else {}
|
return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
|
||||||
|
else:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class LyndaCourseIE(InfoExtractor):
|
class LyndaCourseIE(LyndaBaseIE):
|
||||||
IE_NAME = 'lynda:course'
|
IE_NAME = 'lynda:course'
|
||||||
IE_DESC = 'lynda.com online courses'
|
IE_DESC = 'lynda.com online courses'
|
||||||
|
|
||||||
@@ -191,35 +197,37 @@ class LyndaCourseIE(InfoExtractor):
|
|||||||
course_path = mobj.group('coursepath')
|
course_path = mobj.group('coursepath')
|
||||||
course_id = mobj.group('courseid')
|
course_id = mobj.group('courseid')
|
||||||
|
|
||||||
page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
page = self._download_webpage(
|
||||||
course_id, 'Downloading course JSON')
|
'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
|
||||||
|
course_id, 'Downloading course JSON')
|
||||||
course_json = json.loads(page)
|
course_json = json.loads(page)
|
||||||
|
|
||||||
if 'Status' in course_json and course_json['Status'] == 'NotFound':
|
if 'Status' in course_json and course_json['Status'] == 'NotFound':
|
||||||
raise ExtractorError('Course %s does not exist' % course_id, expected=True)
|
raise ExtractorError(
|
||||||
|
'Course %s does not exist' % course_id, expected=True)
|
||||||
|
|
||||||
unaccessible_videos = 0
|
unaccessible_videos = 0
|
||||||
videos = []
|
videos = []
|
||||||
(username, _) = self._get_login_info()
|
|
||||||
|
|
||||||
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||||
# by single video API anymore
|
# by single video API anymore
|
||||||
|
|
||||||
for chapter in course_json['Chapters']:
|
for chapter in course_json['Chapters']:
|
||||||
for video in chapter['Videos']:
|
for video in chapter['Videos']:
|
||||||
if username is None and video['HasAccess'] is False:
|
if video['HasAccess'] is False:
|
||||||
unaccessible_videos += 1
|
unaccessible_videos += 1
|
||||||
continue
|
continue
|
||||||
videos.append(video['ID'])
|
videos.append(video['ID'])
|
||||||
|
|
||||||
if unaccessible_videos > 0:
|
if unaccessible_videos > 0:
|
||||||
self._downloader.report_warning('%s videos are only available for members and will not be downloaded. '
|
self._downloader.report_warning(
|
||||||
% unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT)
|
'%s videos are only available for members (or paid members) and will not be downloaded. '
|
||||||
|
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('http://www.lynda.com/%s/%s-4.html' %
|
self.url_result(
|
||||||
(course_path, video_id),
|
'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
|
||||||
'Lynda')
|
'Lynda')
|
||||||
for video_id in videos]
|
for video_id in videos]
|
||||||
|
|
||||||
course_title = course_json['Title']
|
course_title = course_json['Title']
|
||||||
|
93
youtube_dl/extractor/miomio.py
Normal file
93
youtube_dl/extractor/miomio.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MioMioIE(InfoExtractor):
|
||||||
|
IE_NAME = 'miomio.tv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.miomio.tv/watch/cc179734/',
|
||||||
|
'md5': '48de02137d0739c15b440a224ad364b9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '179734',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '手绘动漫鬼泣但丁全程画法',
|
||||||
|
'duration': 354,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.miomio.tv/watch/cc184024/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '43729',
|
||||||
|
'title': '《动漫同人插画绘制》',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 86,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'description', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
|
mioplayer_path = self._search_regex(
|
||||||
|
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
|
||||||
|
|
||||||
|
xml_config = self._search_regex(
|
||||||
|
r'flashvars="type=sina&(.+?)&',
|
||||||
|
webpage, 'xml config')
|
||||||
|
|
||||||
|
# skipping the following page causes lags and eventually connection drop-outs
|
||||||
|
self._request_webpage(
|
||||||
|
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
# the following xml contains the actual configuration information on the video file(s)
|
||||||
|
vid_config = self._download_xml(
|
||||||
|
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
http_headers = {
|
||||||
|
'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for f in vid_config.findall('./durl'):
|
||||||
|
segment_url = xpath_text(f, 'url', 'video url')
|
||||||
|
if not segment_url:
|
||||||
|
continue
|
||||||
|
order = xpath_text(f, 'order', 'order')
|
||||||
|
segment_id = video_id
|
||||||
|
segment_title = title
|
||||||
|
if order:
|
||||||
|
segment_id += '-%s' % order
|
||||||
|
segment_title += ' part %s' % order
|
||||||
|
entries.append({
|
||||||
|
'id': segment_id,
|
||||||
|
'url': segment_url,
|
||||||
|
'title': segment_title,
|
||||||
|
'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
|
||||||
|
'http_headers': http_headers,
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(entries) == 1:
|
||||||
|
segment = entries[0]
|
||||||
|
segment['id'] = video_id
|
||||||
|
segment['title'] = title
|
||||||
|
return segment
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'id': video_id,
|
||||||
|
'entries': entries,
|
||||||
|
'title': title,
|
||||||
|
'http_headers': http_headers,
|
||||||
|
}
|
@@ -5,9 +5,6 @@ import json
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import (
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'upload_date': '20121109',
|
'upload_date': '20121109',
|
||||||
'uploader_id': 'MIT',
|
'uploader_id': 'MIT',
|
||||||
'uploader': 'MIT OpenCourseWare',
|
'uploader': 'MIT OpenCourseWare',
|
||||||
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'uploader_id': 'MIT',
|
'uploader_id': 'MIT',
|
||||||
'uploader': 'MIT OpenCourseWare',
|
'uploader': 'MIT OpenCourseWare',
|
||||||
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
||||||
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor):
|
|||||||
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
|
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
|
||||||
metadata = re.split(r', ?', metadata)
|
metadata = re.split(r', ?', metadata)
|
||||||
yt = metadata[1]
|
yt = metadata[1]
|
||||||
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
|
|
||||||
else:
|
else:
|
||||||
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
|
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
|
||||||
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
|
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
|
||||||
@@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor):
|
|||||||
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
|
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
|
||||||
metadata = re.split(r', ?', metadata)
|
metadata = re.split(r', ?', metadata)
|
||||||
yt = metadata[1]
|
yt = metadata[1]
|
||||||
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
|
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unable to find embedded YouTube video.')
|
raise ExtractorError('Unable to find embedded YouTube video.')
|
||||||
video_id = YoutubeIE.extract_id(yt)
|
video_id = YoutubeIE.extract_id(yt)
|
||||||
@@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'url': yt,
|
'url': yt,
|
||||||
'url_transparent'
|
|
||||||
'subtitles': subs,
|
|
||||||
'ie_key': 'Youtube',
|
'ie_key': 'Youtube',
|
||||||
}
|
}
|
||||||
|
@@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
|
|||||||
IE_NAME = 'mitele.es'
|
IE_NAME = 'mitele.es'
|
||||||
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||||
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
|
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
|
|||||||
'display_id': 'programa-144',
|
'display_id': 'programa-144',
|
||||||
'duration': 2913,
|
'duration': 2913,
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
episode = self._match_id(url)
|
episode = self._match_id(url)
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -10,7 +11,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
parse_iso8601,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -27,8 +27,6 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'Daniel Holbach',
|
||||||
'uploader_id': 'dholbach',
|
'uploader_id': 'dholbach',
|
||||||
'upload_date': '20111115',
|
|
||||||
'timestamp': 1321359578,
|
|
||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
@@ -37,31 +35,30 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
||||||
'ext': 'm4a',
|
'ext': 'mp3',
|
||||||
'title': 'Electric Relaxation vol. 3',
|
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
||||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||||
'uploader': 'Daniel Drumz',
|
'uploader': 'Gilles Peterson Worldwide',
|
||||||
'uploader_id': 'gillespeterson',
|
'uploader_id': 'gillespeterson',
|
||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*/images/',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_url(self, track_id, template_url):
|
def _get_url(self, track_id, template_url, server_number):
|
||||||
server_count = 30
|
boundaries = (1, 30)
|
||||||
for i in range(server_count):
|
for nr in server_numbers(server_number, boundaries):
|
||||||
url = template_url % i
|
url = template_url % nr
|
||||||
try:
|
try:
|
||||||
# We only want to know if the request succeed
|
# We only want to know if the request succeed
|
||||||
# don't download the whole file
|
# don't download the whole file
|
||||||
self._request_webpage(
|
self._request_webpage(
|
||||||
HEADRequest(url), track_id,
|
HEADRequest(url), track_id,
|
||||||
'Checking URL %d/%d ...' % (i + 1, server_count + 1))
|
'Checking URL %d/%d ...' % (nr, boundaries[-1]))
|
||||||
return url
|
return url
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -75,17 +72,18 @@ class MixcloudIE(InfoExtractor):
|
|||||||
preview_url = self._search_regex(
|
preview_url = self._search_regex(
|
||||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
||||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||||
|
server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
|
||||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||||
final_song_url = self._get_url(track_id, template_url)
|
final_song_url = self._get_url(track_id, template_url, server_number)
|
||||||
if final_song_url is None:
|
if final_song_url is None:
|
||||||
self.to_screen('Trying with m4a extension')
|
self.to_screen('Trying with m4a extension')
|
||||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||||
final_song_url = self._get_url(track_id, template_url)
|
final_song_url = self._get_url(track_id, template_url, server_number)
|
||||||
if final_song_url is None:
|
if final_song_url is None:
|
||||||
raise ExtractorError('Unable to extract track url')
|
raise ExtractorError('Unable to extract track url')
|
||||||
|
|
||||||
PREFIX = (
|
PREFIX = (
|
||||||
r'<span class="play-button[^"]*?"'
|
r'm-play-on-spacebar[^>]+'
|
||||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||||
@@ -99,16 +97,12 @@ class MixcloudIE(InfoExtractor):
|
|||||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
like_count = str_to_int(self._search_regex(
|
like_count = str_to_int(self._search_regex(
|
||||||
[r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
|
||||||
r'/favorites/?">([0-9]+)<'],
|
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||||
r'/listeners/?">([0-9,.]+)</a>'],
|
r'/listeners/?">([0-9,.]+)</a>'],
|
||||||
webpage, 'play count', fatal=False))
|
webpage, 'play count', fatal=False))
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
|
||||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
|
||||||
webpage, 'upload date', default=None))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
@@ -118,7 +112,38 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'timestamp': timestamp,
|
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def server_numbers(first, boundaries):
|
||||||
|
""" Server numbers to try in descending order of probable availability.
|
||||||
|
Starting from first (i.e. the number of the server hosting the preview file)
|
||||||
|
and going further and further up to the higher boundary and down to the
|
||||||
|
lower one in an alternating fashion. Namely:
|
||||||
|
|
||||||
|
server_numbers(2, (1, 5))
|
||||||
|
|
||||||
|
# Where the preview server is 2, min number is 1 and max is 5.
|
||||||
|
# Yields: 2, 3, 1, 4, 5
|
||||||
|
|
||||||
|
Why not random numbers or increasing sequences? Since from what I've seen,
|
||||||
|
full length files seem to be hosted on servers whose number is closer to
|
||||||
|
that of the preview; to be confirmed.
|
||||||
|
"""
|
||||||
|
zip_longest = getattr(itertools, 'zip_longest', None)
|
||||||
|
if zip_longest is None:
|
||||||
|
# python 2.x
|
||||||
|
zip_longest = itertools.izip_longest
|
||||||
|
|
||||||
|
if len(boundaries) != 2:
|
||||||
|
raise ValueError("boundaries should be a two-element tuple")
|
||||||
|
min, max = boundaries
|
||||||
|
highs = range(first + 1, max + 1)
|
||||||
|
lows = range(first - 1, min - 1, -1)
|
||||||
|
rest = filter(
|
||||||
|
None, itertools.chain.from_iterable(zip_longest(highs, lows)))
|
||||||
|
yield first
|
||||||
|
for n in rest:
|
||||||
|
yield n
|
||||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MLBIE(InfoExtractor):
|
class MLBIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
_VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||||
@@ -80,6 +80,10 @@ class MLBIE(InfoExtractor):
|
|||||||
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
|
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -5,7 +5,7 @@ from ..utils import int_or_none
|
|||||||
|
|
||||||
|
|
||||||
class MporaIE(InfoExtractor):
|
class MporaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
|
_VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
|
||||||
IE_NAME = 'MPORA'
|
IE_NAME = 'MPORA'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@@ -25,7 +25,9 @@ class MporaIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data_json = self._search_regex(
|
data_json = self._search_regex(
|
||||||
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
|
[r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
|
||||||
|
r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
|
||||||
|
webpage, 'json')
|
||||||
data = self._parse_json(data_json, video_id)
|
data = self._parse_json(data_json, video_id)
|
||||||
|
|
||||||
uploader = data['info_overlay'].get('username')
|
uploader = data['info_overlay'].get('username')
|
||||||
|
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@@ -23,7 +23,7 @@ def _media_xml_tag(tag):
|
|||||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||||
|
|
||||||
|
|
||||||
class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
_MOBILE_TEMPLATE = None
|
_MOBILE_TEMPLATE = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _extract_subtitles(self, mdoc, mtvn_id):
|
def _extract_subtitles(self, mdoc, mtvn_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
FORMATS = {
|
|
||||||
'scc': 'cea-608',
|
|
||||||
'eia-608': 'cea-608',
|
|
||||||
'xml': 'ttml',
|
|
||||||
}
|
|
||||||
subtitles_format = FORMATS.get(
|
|
||||||
self._downloader.params.get('subtitlesformat'), 'ttml')
|
|
||||||
for transcript in mdoc.findall('.//transcript'):
|
for transcript in mdoc.findall('.//transcript'):
|
||||||
if transcript.get('kind') != 'captions':
|
if transcript.get('kind') != 'captions':
|
||||||
continue
|
continue
|
||||||
lang = transcript.get('srclang')
|
lang = transcript.get('srclang')
|
||||||
for typographic in transcript.findall('./typographic'):
|
subtitles[lang] = [{
|
||||||
captions_format = typographic.get('format')
|
'url': compat_str(typographic.get('src')),
|
||||||
if captions_format == subtitles_format:
|
'ext': typographic.get('format')
|
||||||
subtitles[lang] = compat_str(typographic.get('src'))
|
} for typographic in transcript.findall('./typographic')]
|
||||||
break
|
return subtitles
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(mtvn_id, subtitles)
|
|
||||||
return self.extract_subtitles(mtvn_id, subtitles)
|
|
||||||
|
|
||||||
def _get_video_info(self, itemdoc):
|
def _get_video_info(self, itemdoc):
|
||||||
uri = itemdoc.find('guid').text
|
uri = itemdoc.find('guid').text
|
||||||
@@ -196,8 +186,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
|||||||
webpage, 'mgid')
|
webpage, 'mgid')
|
||||||
|
|
||||||
videos_info = self._get_videos_info(mgid)
|
videos_info = self._get_videos_info(mgid)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
return
|
|
||||||
return videos_info
|
return videos_info
|
||||||
|
|
||||||
|
|
||||||
|
@@ -3,17 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
parse_duration,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MusicVaultIE(InfoExtractor):
|
class MusicVaultIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
|
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
|
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
|
||||||
'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
|
'md5': '3adcbdb3dcc02d647539e53f284ba171',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1010863',
|
'id': '1010863',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -22,9 +18,10 @@ class MusicVaultIE(InfoExtractor):
|
|||||||
'duration': 244,
|
'duration': 244,
|
||||||
'uploader': 'The Allman Brothers Band',
|
'uploader': 'The Allman Brothers Band',
|
||||||
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||||
'upload_date': '19811216',
|
'upload_date': '20131219',
|
||||||
'location': 'Capitol Theatre (Passaic, NJ)',
|
'location': 'Capitol Theatre (Passaic, NJ)',
|
||||||
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
|
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
|
||||||
|
'timestamp': int,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,34 +40,24 @@ class MusicVaultIE(InfoExtractor):
|
|||||||
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
|
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h2.*?>(.*?)</h2>', data_div, 'title')
|
r'<h2.*?>(.*?)</h2>', data_div, 'title')
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
|
||||||
r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
|
|
||||||
location = self._html_search_regex(
|
location = self._html_search_regex(
|
||||||
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
|
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
|
||||||
|
|
||||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
|
||||||
|
|
||||||
VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
|
|
||||||
kaltura_id = self._search_regex(
|
kaltura_id = self._search_regex(
|
||||||
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
|
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
|
||||||
webpage, 'kaltura ID')
|
webpage, 'kaltura ID')
|
||||||
video_url = VIDEO_URL_TEMPLATE % {
|
wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
|
||||||
'entry_id': kaltura_id,
|
|
||||||
'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
|
|
||||||
'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': mobj.group('id'),
|
'id': mobj.group('id'),
|
||||||
'url': video_url,
|
'_type': 'url_transparent',
|
||||||
'ext': 'mp4',
|
'url': 'kaltura:%s:%s' % (wid, kaltura_id),
|
||||||
|
'ie_key': 'Kaltura',
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'uploader_id': mobj.group('uploader_id'),
|
'uploader_id': mobj.group('uploader_id'),
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': self._html_search_meta('description', webpage),
|
'description': self._html_search_meta('description', webpage),
|
||||||
'upload_date': upload_date,
|
|
||||||
'location': location,
|
'location': location,
|
||||||
'title': title,
|
'title': title,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'duration': duration,
|
|
||||||
}
|
}
|
||||||
|
38
youtube_dl/extractor/nationalgeographic.py
Normal file
38
youtube_dl/extractor/nationalgeographic.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
url_basename,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NationalGeographicIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4DmDACA6Qtk_',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Mating Crabs Busted by Sharks',
|
||||||
|
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
name = url_basename(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, name)
|
||||||
|
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||||
|
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
||||||
|
|
||||||
|
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||||
|
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||||
|
theplatform_id = url_basename(content.attrib.get('url'))
|
||||||
|
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||||
|
# For some reason, the normal links don't work and we must force the use of f4m
|
||||||
|
{'force_smil_url': True}))
|
@@ -14,17 +14,17 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NBCIE(InfoExtractor):
|
class NBCIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
_VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||||
# md5 checksum is not stable
|
# md5 checksum is not stable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bTmnLCvIbaaH',
|
'id': 'c9xnCo0YPOPH',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'I Am a Firefighter',
|
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):
|
|||||||
return self.url_result(theplatform_url)
|
return self.url_result(theplatform_url)
|
||||||
|
|
||||||
|
|
||||||
|
class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9CsDKds0kvHI',
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||||
|
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
iframe_m = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
||||||
|
if iframe_m:
|
||||||
|
return iframe_m.group('url')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
theplatform_url = self._og_search_video_url(webpage)
|
||||||
|
return self.url_result(theplatform_url, 'ThePlatform')
|
||||||
|
|
||||||
|
|
||||||
|
class NBCSportsIE(InfoExtractor):
|
||||||
|
# Does not include https becuase its certificate is invalid
|
||||||
|
_VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PHJSaFWbrTY9',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||||
|
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
return self.url_result(
|
||||||
|
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(InfoExtractor):
|
class NBCNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||||
(?:video/.+?/(?P<id>\d+)|
|
(?:video/.+?/(?P<id>\d+)|
|
||||||
|
@@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
|
|||||||
'timestamp': 1344858571,
|
'timestamp': 1344858571,
|
||||||
'age_limit': 12,
|
'age_limit': 12,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Download only works from Germany',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -22,7 +22,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
IE_NAME = 'niconico'
|
IE_NAME = 'niconico'
|
||||||
IE_DESC = 'ニコニコ動画'
|
IE_DESC = 'ニコニコ動画'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -39,9 +39,26 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'username': 'ydl.niconico@gmail.com',
|
'username': 'ydl.niconico@gmail.com',
|
||||||
'password': 'youtube-dl',
|
'password': 'youtube-dl',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.nicovideo.jp/watch/nm14296458',
|
||||||
|
'md5': '8db08e0158457cf852a31519fceea5bc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nm14296458',
|
||||||
|
'ext': 'swf',
|
||||||
|
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
||||||
|
'description': 'md5:',
|
||||||
|
'uploader': 'りょうた',
|
||||||
|
'uploader_id': '18822557',
|
||||||
|
'upload_date': '20110429',
|
||||||
|
'duration': 209,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'username': 'ydl.niconico@gmail.com',
|
||||||
|
'password': 'youtube-dl',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
# Determine whether the downloader used authentication to download video
|
# Determine whether the downloader used authentication to download video
|
||||||
_AUTHENTICATED = False
|
_AUTHENTICATED = False
|
||||||
@@ -76,8 +93,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
# Get video webpage. We are not actually interested in it, but need
|
# Get video webpage. We are not actually interested in it, but need
|
||||||
# the cookies in order to be able to download the info webpage
|
# the cookies in order to be able to download the info webpage
|
||||||
@@ -90,7 +106,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
if self._AUTHENTICATED:
|
if self._AUTHENTICATED:
|
||||||
# Get flv info
|
# Get flv info
|
||||||
flv_info_webpage = self._download_webpage(
|
flv_info_webpage = self._download_webpage(
|
||||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||||
video_id, 'Downloading flv info')
|
video_id, 'Downloading flv info')
|
||||||
else:
|
else:
|
||||||
# Get external player info
|
# Get external player info
|
||||||
|
@@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
@@ -12,7 +11,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NPOBaseIE(SubtitlesInfoExtractor):
|
class NPOBaseIE(InfoExtractor):
|
||||||
def _get_token(self, video_id):
|
def _get_token(self, video_id):
|
||||||
token_page = self._download_webpage(
|
token_page = self._download_webpage(
|
||||||
'http://ida.omroep.nl/npoplayer/i.js',
|
'http://ida.omroep.nl/npoplayer/i.js',
|
||||||
@@ -164,13 +163,10 @@ class NPOIE(NPOBaseIE):
|
|||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
if metadata.get('tt888') == 'ja':
|
if metadata.get('tt888') == 'ja':
|
||||||
subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
|
subtitles['nl'] = [{
|
||||||
|
'ext': 'vtt',
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
'url': 'http://e.omroep.nl/tt888/%s' % video_id,
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
}]
|
||||||
return
|
|
||||||
|
|
||||||
subtitles = self.extract_subtitles(video_id, subtitles)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -223,7 +219,8 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
if streams:
|
if streams:
|
||||||
for stream in streams:
|
for stream in streams:
|
||||||
stream_type = stream.get('type').lower()
|
stream_type = stream.get('type').lower()
|
||||||
if stream_type == 'ss':
|
# smooth streaming is not supported
|
||||||
|
if stream_type in ['ss', 'ms']:
|
||||||
continue
|
continue
|
||||||
stream_info = self._download_json(
|
stream_info = self._download_json(
|
||||||
'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
|
'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
|
||||||
@@ -234,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
stream_url = self._download_json(
|
stream_url = self._download_json(
|
||||||
stream_info['stream'], display_id,
|
stream_info['stream'], display_id,
|
||||||
'Downloading %s URL' % stream_type,
|
'Downloading %s URL' % stream_type,
|
||||||
transform_source=strip_jsonp)
|
'Unable to download %s URL' % stream_type,
|
||||||
|
transform_source=strip_jsonp, fatal=False)
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
if stream_type == 'hds':
|
if stream_type == 'hds':
|
||||||
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
|
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
|
||||||
# f4m downloader downloads only piece of live stream
|
# f4m downloader downloads only piece of live stream
|
||||||
@@ -246,6 +246,7 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': stream_url,
|
'url': stream_url,
|
||||||
|
'preference': -10,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@@ -4,56 +4,58 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class NRKIE(InfoExtractor):
|
class NRKIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
|
_VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
|
'md5': 'bccd850baebefe23b56d708a113229c2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150533',
|
'id': '150533',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Dompap og andre fugler i Piip-Show',
|
'title': 'Dompap og andre fugler i Piip-Show',
|
||||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
|
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||||
|
'duration': 263,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
|
'url': 'http://www.nrk.no/video/PS*154915',
|
||||||
'md5': '3471f2a51718195164e88f46bf427668',
|
'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '154915',
|
'id': '154915',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Slik høres internett ut når du er blind',
|
'title': 'Slik høres internett ut når du er blind',
|
||||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||||
|
'duration': 20,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
|
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
|
'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
|
||||||
|
video_id, 'Downloading media JSON')
|
||||||
|
|
||||||
if data['usageRights']['isGeoBlocked']:
|
if data['usageRights']['isGeoBlocked']:
|
||||||
raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
|
raise ExtractorError(
|
||||||
|
'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
|
video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
|
||||||
|
|
||||||
|
duration = parse_duration(data.get('duration'))
|
||||||
|
|
||||||
images = data.get('images')
|
images = data.get('images')
|
||||||
if images:
|
if images:
|
||||||
@@ -69,11 +71,52 @@ class NRKIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
'description': data['description'],
|
'description': data['description'],
|
||||||
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NRKTVIE(SubtitlesInfoExtractor):
|
class NRKPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||||
|
'title': 'Gjenopplev den historiske solformørkelsen',
|
||||||
|
'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rivertonprisen-til-karin-fossum-1.12266449',
|
||||||
|
'title': 'Rivertonprisen til Karin Fossum',
|
||||||
|
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
||||||
|
},
|
||||||
|
'playlist_count': 5,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('nrk:%s' % video_id, 'NRK')
|
||||||
|
for video_id in re.findall(
|
||||||
|
r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
|
||||||
|
webpage)
|
||||||
|
]
|
||||||
|
|
||||||
|
playlist_title = self._og_search_title(webpage)
|
||||||
|
playlist_description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
|
||||||
|
class NRKTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@@ -149,28 +192,29 @@ class NRKTVIE(SubtitlesInfoExtractor):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _seconds2str(self, s):
|
|
||||||
return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
|
|
||||||
|
|
||||||
def _debug_print(self, txt):
|
def _debug_print(self, txt):
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self.to_screen('[debug] %s' % txt)
|
self.to_screen('[debug] %s' % txt)
|
||||||
|
|
||||||
def _extract_captions(self, subtitlesurl, video_id, baseurl):
|
def _get_subtitles(self, subtitlesurl, video_id, baseurl):
|
||||||
url = "%s%s" % (baseurl, subtitlesurl)
|
url = "%s%s" % (baseurl, subtitlesurl)
|
||||||
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
|
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
|
||||||
captions = self._download_xml(url, video_id, 'Downloading subtitles')
|
captions = self._download_xml(
|
||||||
|
url, video_id, 'Downloading subtitles',
|
||||||
|
transform_source=lambda s: s.replace(r'<br />', '\r\n'))
|
||||||
lang = captions.get('lang', 'no')
|
lang = captions.get('lang', 'no')
|
||||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
|
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
|
||||||
srt = ''
|
srt = ''
|
||||||
for pos, p in enumerate(ps):
|
for pos, p in enumerate(ps):
|
||||||
begin = parse_duration(p.get('begin'))
|
begin = parse_duration(p.get('begin'))
|
||||||
duration = parse_duration(p.get('dur'))
|
duration = parse_duration(p.get('dur'))
|
||||||
starttime = self._seconds2str(begin)
|
starttime = self._subtitles_timecode(begin)
|
||||||
endtime = self._seconds2str(begin + duration)
|
endtime = self._subtitles_timecode(begin + duration)
|
||||||
text = '\n'.join(p.itertext())
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
|
||||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
|
return {lang: [
|
||||||
return {lang: srt}
|
{'ext': 'ttml', 'url': url},
|
||||||
|
{'ext': 'srt', 'data': srt},
|
||||||
|
]}
|
||||||
|
|
||||||
def _extract_f4m(self, manifest_url, video_id):
|
def _extract_f4m(self, manifest_url, video_id):
|
||||||
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
|
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
|
||||||
@@ -243,10 +287,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
|
|||||||
webpage, 'subtitle URL', default=None)
|
webpage, 'subtitle URL', default=None)
|
||||||
subtitles = None
|
subtitles = None
|
||||||
if subtitles_url:
|
if subtitles_url:
|
||||||
subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
|
subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user