Compare commits
425 Commits
2015.03.03
...
2015.04.17
Author | SHA1 | Date | |
---|---|---|---|
|
3220c50f9a | ||
|
024ebb2706 | ||
|
be531ef1ec | ||
|
65c1a750f5 | ||
|
5141249c59 | ||
|
6225984681 | ||
|
5cb91ceaa5 | ||
|
89c09e2a08 | ||
|
fbbb219409 | ||
|
820b064804 | ||
|
355c524bfa | ||
|
c052ce6cde | ||
|
c9a779695d | ||
|
a685ae511a | ||
|
5edea45fab | ||
|
8afff9f849 | ||
|
a2043572aa | ||
|
5d98908b26 | ||
|
d6fd958c5f | ||
|
d0eb724e22 | ||
|
9fc03aa87c | ||
|
c798f15b98 | ||
|
2dcc114f84 | ||
|
0dfe9bc9d2 | ||
|
4d1cdb5bfe | ||
|
9c5335a027 | ||
|
ae849ca170 | ||
|
94c1255782 | ||
|
476e1095fa | ||
|
8da1bb0418 | ||
|
01c58f8473 | ||
|
edfcf7abe2 | ||
|
37b44fe7c1 | ||
|
8f02ad4f12 | ||
|
51f1244600 | ||
|
7bd930368c | ||
|
fb69240ca0 | ||
|
830d53bfae | ||
|
c36a959549 | ||
|
e91b2d14e3 | ||
|
ac58e68bc3 | ||
|
504c1cedfe | ||
|
9a4d8fae82 | ||
|
7d2ba6394c | ||
|
b04b94da5f | ||
|
9933857f67 | ||
|
ed5641e249 | ||
|
a4257017ef | ||
|
18153f1b32 | ||
|
7a91d1fc43 | ||
|
af14ded75e | ||
|
65939effb5 | ||
|
66ee7b3234 | ||
|
cd47a628fc | ||
|
d7c78decb0 | ||
|
8749477ed0 | ||
|
7088f5b5fa | ||
|
5bb6328cb9 | ||
|
ce9f47de99 | ||
|
4c4780c25e | ||
|
64f1aba8f1 | ||
|
3359fb661f | ||
|
58a9f1b864 | ||
|
6ac41a4ef5 | ||
|
aa2af7ba74 | ||
|
ce73839fe4 | ||
|
1dc2726f8d | ||
|
af76e8174d | ||
|
402a3efc92 | ||
|
372f08c990 | ||
|
dd29eb7f81 | ||
|
bca788ab1d | ||
|
aef8fdba11 | ||
|
0a1603634b | ||
|
a662163fd5 | ||
|
bd7a6478a2 | ||
|
4a20c9f628 | ||
|
418c5cc3fc | ||
|
cc55d08832 | ||
|
de5c545648 | ||
|
a35099bd33 | ||
|
5f4b5cf044 | ||
|
beb10f843f | ||
|
29713e4268 | ||
|
8e4b83b96b | ||
|
ae603b500e | ||
|
d97aae7572 | ||
|
a55e2f04a0 | ||
|
6e53c91608 | ||
|
d2272fcf6e | ||
|
c7ac5dce8c | ||
|
5c1d459ae9 | ||
|
2e7daef502 | ||
|
6410229681 | ||
|
e40bd5f06b | ||
|
06b491eb7b | ||
|
3a9fadd6df | ||
|
0de9312a7e | ||
|
27fe5e3473 | ||
|
f67dcc09f5 | ||
|
fefc9d121d | ||
|
a319c33d8b | ||
|
218d6bcc05 | ||
|
7d25463972 | ||
|
aff84bec07 | ||
|
ac651e974e | ||
|
e21a55abcc | ||
|
bc03228ab5 | ||
|
f05d0e73c6 | ||
|
aed2d4b31e | ||
|
184a197441 | ||
|
ed676e8c0a | ||
|
8e1f937473 | ||
|
1a68d39211 | ||
|
4ba7d5b14c | ||
|
1a48181a9f | ||
|
6b70a4eb7d | ||
|
f01855813b | ||
|
4a3cdf81af | ||
|
f777397aca | ||
|
8fb2e5a4f5 | ||
|
4e8cc1e973 | ||
|
ff02a228e3 | ||
|
424266abb1 | ||
|
3fde134791 | ||
|
7c39a65543 | ||
|
8cf70de428 | ||
|
15ac8413c7 | ||
|
79c21abba7 | ||
|
d5c418f29f | ||
|
536b94e56f | ||
|
5c29dbd0c7 | ||
|
ba9e68f402 | ||
|
e9f65f8749 | ||
|
ae0dd4b298 | ||
|
f1ce35af1a | ||
|
6e617ed0b6 | ||
|
7cf97daf77 | ||
|
3d24d997ae | ||
|
115c281672 | ||
|
cce23e43a9 | ||
|
ff556f5c09 | ||
|
16fa01291b | ||
|
01534bf54f | ||
|
cd341b6e06 | ||
|
185a7e25e7 | ||
|
e81a474603 | ||
|
ff2be6e180 | ||
|
3da4b31359 | ||
|
4bbeb19fc7 | ||
|
a9cbab1735 | ||
|
6b7556a554 | ||
|
a3c7019e06 | ||
|
416b9c29f7 | ||
|
2ec8e04cac | ||
|
e03bfb30ce | ||
|
f5b669113f | ||
|
d08225edf4 | ||
|
8075d4f99d | ||
|
1a944d8a2a | ||
|
7cf02b6619 | ||
|
55cde6ef3c | ||
|
69c3af567d | ||
|
60e1fe0079 | ||
|
4669393070 | ||
|
ce3bfe5d57 | ||
|
2a0c2ca2b8 | ||
|
c89fbfb385 | ||
|
facecb84a1 | ||
|
ed06e9949b | ||
|
e15307a612 | ||
|
5cbb2699ee | ||
|
a2edf2e7ff | ||
|
1d31e7a2fc | ||
|
a2a4d5fa31 | ||
|
a28ccbabc6 | ||
|
edd7344820 | ||
|
c808ef81bb | ||
|
fd203fe357 | ||
|
5bb7ab9928 | ||
|
87270c8416 | ||
|
ebc2f7a2db | ||
|
7700207ec7 | ||
|
4d5d14f5cf | ||
|
72b249bf1f | ||
|
9b4774b21b | ||
|
2ddf083588 | ||
|
8343a03357 | ||
|
ad320e9b83 | ||
|
ecb750a446 | ||
|
5f88e02818 | ||
|
616af2f4b9 | ||
|
5a3b315b5f | ||
|
b7a2268e7b | ||
|
20d729228c | ||
|
af8c93086c | ||
|
79fd11ab8e | ||
|
cb88671e37 | ||
|
ff79552f13 | ||
|
643fe72717 | ||
|
4747e2183a | ||
|
c59e701e35 | ||
|
8e678af4ba | ||
|
70a1165b32 | ||
|
af14000215 | ||
|
998e6cdba0 | ||
|
2315fb5e5f | ||
|
157e9e5aa5 | ||
|
c496ec0848 | ||
|
15b67a268a | ||
|
31c4809827 | ||
|
ac0df2350a | ||
|
223b27f46c | ||
|
425142be60 | ||
|
7e17ec8c71 | ||
|
448830ce7b | ||
|
8896b614a9 | ||
|
a7fce980ad | ||
|
91757b0f37 | ||
|
fbfcc2972b | ||
|
db40364b87 | ||
|
094ce39c45 | ||
|
ae67d082fe | ||
|
8f76df7f37 | ||
|
5c19d18cbf | ||
|
838b93405b | ||
|
2676caf344 | ||
|
17941321ab | ||
|
48c971e073 | ||
|
f5e2efbbf0 | ||
|
5d1f0e607b | ||
|
b0872c19ea | ||
|
9f790b9901 | ||
|
c41a2ec4af | ||
|
575dad3c98 | ||
|
32d687f55e | ||
|
93f787070f | ||
|
f9544f6e8f | ||
|
336d19044c | ||
|
7866c9e173 | ||
|
1a4123de04 | ||
|
cf2e2eb1c0 | ||
|
2051acdeb2 | ||
|
cefdf970cc | ||
|
a1d0aa7b88 | ||
|
49aeedb8cb | ||
|
ef249a2cd7 | ||
|
a09141548a | ||
|
5379a2d40d | ||
|
c9450c7ab1 | ||
|
faa1b5c292 | ||
|
393d9fc6d2 | ||
|
4e6a228689 | ||
|
179d6678b1 | ||
|
85698c5086 | ||
|
a7d9ded45d | ||
|
531980d89c | ||
|
1887ecd4d6 | ||
|
cd32c2caba | ||
|
1c9a1457fc | ||
|
038b0eb1da | ||
|
f20bf146e2 | ||
|
01218f919b | ||
|
2684871bc1 | ||
|
ccf3960eec | ||
|
eecc0685c9 | ||
|
2ed849eccf | ||
|
3378d67a18 | ||
|
f3c0c667a6 | ||
|
0ae8bbac2d | ||
|
cbc3cfcab4 | ||
|
b30ef07c6c | ||
|
73900846b1 | ||
|
d1dc7e3991 | ||
|
3073a6d5e9 | ||
|
aae53774f2 | ||
|
7a757b7194 | ||
|
fa8ce26904 | ||
|
2c2c06e359 | ||
|
ee580538fa | ||
|
c3c5c31517 | ||
|
ed9a25dd61 | ||
|
9ef4f12b53 | ||
|
84f8101606 | ||
|
b1337948eb | ||
|
98f02fdde2 | ||
|
048fdc2292 | ||
|
2ca1c5aa9f | ||
|
674fb0fcc5 | ||
|
00bfe40e4d | ||
|
cd459b1d49 | ||
|
92a4793b3c | ||
|
dc03a42537 | ||
|
219da6bb68 | ||
|
0499cd866e | ||
|
13047f4135 | ||
|
af69cab21d | ||
|
d41a3fa1b4 | ||
|
733be371af | ||
|
576904bce6 | ||
|
cf47794f09 | ||
|
c06a9f8730 | ||
|
2e90dff2c2 | ||
|
90183a46d8 | ||
|
b68eedba23 | ||
|
d5b559393b | ||
|
1de4ac1385 | ||
|
39aa42ffbb | ||
|
ec1b9577ba | ||
|
3b4444f99a | ||
|
613b2d9dc6 | ||
|
8f4cc22455 | ||
|
7c42327e0e | ||
|
873383e9bd | ||
|
8508557e77 | ||
|
4d1652484f | ||
|
88cf6fb368 | ||
|
e7db87f700 | ||
|
2cb434e53e | ||
|
cd65491c30 | ||
|
082b1155a3 | ||
|
9202b1b787 | ||
|
a7e01c438d | ||
|
05be67e77d | ||
|
85741b9986 | ||
|
f247a199fe | ||
|
29171bc2d2 | ||
|
7be5a62ed7 | ||
|
3647136f24 | ||
|
13598940e3 | ||
|
0eb365868e | ||
|
28c6411e49 | ||
|
bba3fc7960 | ||
|
fcd877013e | ||
|
ba1d4c0488 | ||
|
517bcca299 | ||
|
1b53778175 | ||
|
b7a0304d92 | ||
|
545315a985 | ||
|
3f4327520c | ||
|
4a34f69ea6 | ||
|
fb7e68833c | ||
|
486dd09e0b | ||
|
054b99a330 | ||
|
65c5e044c7 | ||
|
11984c7467 | ||
|
3946864c8a | ||
|
b84037013e | ||
|
1dbfc62d75 | ||
|
d7d79106c7 | ||
|
1138491631 | ||
|
71705fa70d | ||
|
602814adab | ||
|
3a77719c5a | ||
|
7e195d0e92 | ||
|
e04793401d | ||
|
a3fbd18824 | ||
|
c6052b8c14 | ||
|
c792b5011f | ||
|
32aaeca775 | ||
|
1593194c63 | ||
|
614a7e1e23 | ||
|
2ebfeacabc | ||
|
f5d8f58a17 | ||
|
937daef4a7 | ||
|
dd77f14c64 | ||
|
c36cbe5a8a | ||
|
41b2194f86 | ||
|
d1e2e8f583 | ||
|
47fe42e1ab | ||
|
4c60393854 | ||
|
f848215dfc | ||
|
dcca581967 | ||
|
d475b3384c | ||
|
dd7831fe94 | ||
|
cc08b11d16 | ||
|
8bba753cca | ||
|
43d6280d0a | ||
|
e5a11a2293 | ||
|
f18ef2d144 | ||
|
1bb5c511a5 | ||
|
d55de57b67 | ||
|
a2aaf4dbc6 | ||
|
bdf6eee0ae | ||
|
8b910bda0c | ||
|
24993e3b39 | ||
|
11101076a1 | ||
|
f838875726 | ||
|
28778d6bae | ||
|
1132eae56d | ||
|
d34e79492d | ||
|
ab205b9dc8 | ||
|
7dcad95d4f | ||
|
8a48223a7b | ||
|
d47ae7f620 | ||
|
135c9c42bf | ||
|
0bf79ac455 | ||
|
98998cded6 | ||
|
14137b5781 | ||
|
a172d96292 | ||
|
23ba76bc0e | ||
|
61e00a9775 | ||
|
d1508cd68d | ||
|
9c85b5376d | ||
|
3c6f245083 | ||
|
f207019ce5 | ||
|
bd05aa4e24 | ||
|
8dc9d361c2 | ||
|
d0e958c71c | ||
|
a0bb7c5593 | ||
|
7feddd9fc7 | ||
|
55969016e9 | ||
|
9609f02e3c | ||
|
5c7495a194 | ||
|
5ee6fc974e | ||
|
c2ebea6580 | ||
|
12a129ec6d | ||
|
f28fe66970 | ||
|
123397317c | ||
|
dc570c4951 | ||
|
22d3628319 | ||
|
50c9949d7a | ||
|
376817c6d4 | ||
|
63fc800057 | ||
|
e0d0572b73 |
@@ -2,6 +2,7 @@ language: python
|
|||||||
python:
|
python:
|
||||||
- "2.6"
|
- "2.6"
|
||||||
- "2.7"
|
- "2.7"
|
||||||
|
- "3.2"
|
||||||
- "3.3"
|
- "3.3"
|
||||||
- "3.4"
|
- "3.4"
|
||||||
before_install:
|
before_install:
|
||||||
|
10
AUTHORS
10
AUTHORS
@@ -113,3 +113,13 @@ Robin de Rooij
|
|||||||
Ryan Schmidt
|
Ryan Schmidt
|
||||||
Leslie P. Polzer
|
Leslie P. Polzer
|
||||||
Duncan Keall
|
Duncan Keall
|
||||||
|
Alexander Mamay
|
||||||
|
Devin J. Pohly
|
||||||
|
Eduardo Ferro Aldama
|
||||||
|
Jeff Buchbinder
|
||||||
|
Amish Bhadeshia
|
||||||
|
Joram Schrijver
|
||||||
|
Will W.
|
||||||
|
Mohammad Teimori Pabandi
|
||||||
|
Roman Le Négrate
|
||||||
|
Matthias Küch
|
||||||
|
@@ -18,7 +18,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
|||||||
|
|
||||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||||
|
|
||||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||||
|
|
||||||
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
|
2
Makefile
2
Makefile
@@ -2,7 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
|
|||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||||
find -name "*.pyc" -delete
|
find . -name "*.pyc" -delete
|
||||||
|
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
BINDIR ?= $(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
|
213
README.md
213
README.md
@@ -45,21 +45,21 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
youtube-dl [OPTIONS] URL [URL...]
|
youtube-dl [OPTIONS] URL [URL...]
|
||||||
|
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
-h, --help print this help text and exit
|
-h, --help Print this help text and exit
|
||||||
--version print program version and exit
|
--version Print program version and exit
|
||||||
-U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
|
-U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
|
||||||
-i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist
|
-i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
|
||||||
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
|
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
|
||||||
--dump-user-agent display the current browser identification
|
--dump-user-agent Display the current browser identification
|
||||||
--list-extractors List all supported extractors and the URLs they would handle
|
--list-extractors List all supported extractors and the URLs they would handle
|
||||||
--extractor-descriptions Output descriptions of all supported extractors
|
--extractor-descriptions Output descriptions of all supported extractors
|
||||||
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
||||||
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
||||||
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
||||||
--ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
|
--ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
|
||||||
in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
|
in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
|
||||||
--flat-playlist Do not extract the videos of a playlist, only list them.
|
--flat-playlist Do not extract the videos of a playlist, only list them.
|
||||||
--no-color Do not emit color codes in output.
|
--no-color Do not emit color codes in output
|
||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
|
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
|
||||||
@@ -71,70 +71,70 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
not present) is used for the actual downloading. (experimental)
|
not present) is used for the actual downloading. (experimental)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||||
--playlist-end NUMBER playlist video to end at (default is last)
|
--playlist-end NUMBER Playlist video to end at (default is last)
|
||||||
--playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
--playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
||||||
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
||||||
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||||
--match-title REGEX download only matching titles (regex or caseless sub-string)
|
--match-title REGEX Download only matching titles (regex or caseless sub-string)
|
||||||
--reject-title REGEX skip download for matching titles (regex or caseless sub-string)
|
--reject-title REGEX Skip download for matching titles (regex or caseless sub-string)
|
||||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||||
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
|
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
|
||||||
--max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
|
--max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
|
||||||
--date DATE download only videos uploaded in this date
|
--date DATE Download only videos uploaded in this date
|
||||||
--datebefore DATE download only videos uploaded on or before this date (i.e. inclusive)
|
--datebefore DATE Download only videos uploaded on or before this date (i.e. inclusive)
|
||||||
--dateafter DATE download only videos uploaded on or after this date (i.e. inclusive)
|
--dateafter DATE Download only videos uploaded on or after this date (i.e. inclusive)
|
||||||
--min-views COUNT Do not download any videos with less than COUNT views
|
--min-views COUNT Do not download any videos with less than COUNT views
|
||||||
--max-views COUNT Do not download any videos with more than COUNT views
|
--max-views COUNT Do not download any videos with more than COUNT views
|
||||||
--match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present,
|
--match-filter FILTER Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present,
|
||||||
!key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
|
!key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
|
||||||
a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
|
a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
|
||||||
operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
|
operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
|
||||||
functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
|
functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
|
||||||
dislike_count <? 50 & description" .
|
dislike_count <? 50 & description" .
|
||||||
--no-playlist If the URL refers to a video and a playlist, download only the video.
|
--no-playlist Download only the video, if the URL refers to a video and a playlist.
|
||||||
--yes-playlist If the URL refers to a video and a playlist, download the playlist.
|
--yes-playlist Download the playlist, if the URL refers to a video and a playlist.
|
||||||
--age-limit YEARS download only videos suitable for the given age
|
--age-limit YEARS Download only videos suitable for the given age
|
||||||
--download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
|
--download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
|
||||||
--include-ads Download advertisements as well (experimental)
|
--include-ads Download advertisements as well (experimental)
|
||||||
|
|
||||||
## Download Options:
|
## Download Options:
|
||||||
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M)
|
-r, --rate-limit LIMIT Maximum download rate in bytes per second (e.g. 50K or 4.2M)
|
||||||
-R, --retries RETRIES number of retries (default is 10), or "infinite".
|
-R, --retries RETRIES Number of retries (default is 10), or "infinite".
|
||||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024)
|
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024)
|
||||||
--no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
|
--no-resize-buffer Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
|
||||||
--playlist-reverse Download playlist videos in reverse order
|
--playlist-reverse Download playlist videos in reverse order
|
||||||
--xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize
|
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||||
--hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg.
|
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
|
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
|
||||||
--external-downloader-args ARGS Give these arguments to the external downloader.
|
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||||
|
|
||||||
## Filesystem Options:
|
## Filesystem Options:
|
||||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
-a, --batch-file FILE File containing URLs to download ('-' for stdin)
|
||||||
--id use only video ID in file name
|
--id Use only video ID in file name
|
||||||
-o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
|
-o, --output TEMPLATE Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
|
||||||
nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
|
nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
|
||||||
the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"),
|
the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"),
|
||||||
%(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
|
%(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
|
||||||
%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
|
%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
|
||||||
%(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
|
%(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
|
||||||
%(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
|
%(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
|
||||||
Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||||
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
|
--autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
|
||||||
--restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
|
--restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
|
||||||
-A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000
|
-A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000
|
||||||
-t, --title [deprecated] use title in file name (default)
|
-t, --title [deprecated] Use title in file name (default)
|
||||||
-l, --literal [deprecated] alias of --title
|
-l, --literal [deprecated] Alias of --title
|
||||||
-w, --no-overwrites do not overwrite files
|
-w, --no-overwrites Do not overwrite files
|
||||||
-c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
|
-c, --continue Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
|
||||||
--no-continue do not resume partially downloaded files (restart from beginning)
|
--no-continue Do not resume partially downloaded files (restart from beginning)
|
||||||
--no-part do not use .part files - write directly into output file
|
--no-part Do not use .part files - write directly into output file
|
||||||
--no-mtime do not use the Last-modified header to set the file modification time
|
--no-mtime Do not use the Last-modified header to set the file modification time
|
||||||
--write-description write video description to a .description file
|
--write-description Write video description to a .description file
|
||||||
--write-info-json write video metadata to a .info.json file
|
--write-info-json Write video metadata to a .info.json file
|
||||||
--write-annotations write video annotations to a .annotation file
|
--write-annotations Write video annotations to a .annotation file
|
||||||
--load-info FILE json file containing the video information (created with the "--write-json" option)
|
--load-info FILE JSON file containing the video information (created with the "--write-info-json" option)
|
||||||
--cookies FILE file to read cookies from and dump cookie jar in
|
--cookies FILE File to read cookies from and dump cookie jar in
|
||||||
--cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
|
--cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
|
||||||
or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
|
or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
|
||||||
change.
|
change.
|
||||||
@@ -142,49 +142,49 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--rm-cache-dir Delete all filesystem cache files
|
--rm-cache-dir Delete all filesystem cache files
|
||||||
|
|
||||||
## Thumbnail images:
|
## Thumbnail images:
|
||||||
--write-thumbnail write thumbnail image to disk
|
--write-thumbnail Write thumbnail image to disk
|
||||||
--write-all-thumbnails write all thumbnail image formats to disk
|
--write-all-thumbnails Write all thumbnail image formats to disk
|
||||||
--list-thumbnails Simulate and list all available thumbnail formats
|
--list-thumbnails Simulate and list all available thumbnail formats
|
||||||
|
|
||||||
## Verbosity / Simulation Options:
|
## Verbosity / Simulation Options:
|
||||||
-q, --quiet activates quiet mode
|
-q, --quiet Activate quiet mode
|
||||||
--no-warnings Ignore warnings
|
--no-warnings Ignore warnings
|
||||||
-s, --simulate do not download the video and do not write anything to disk
|
-s, --simulate Do not download the video and do not write anything to disk
|
||||||
--skip-download do not download the video
|
--skip-download Do not download the video
|
||||||
-g, --get-url simulate, quiet but print URL
|
-g, --get-url Simulate, quiet but print URL
|
||||||
-e, --get-title simulate, quiet but print title
|
-e, --get-title Simulate, quiet but print title
|
||||||
--get-id simulate, quiet but print id
|
--get-id Simulate, quiet but print id
|
||||||
--get-thumbnail simulate, quiet but print thumbnail URL
|
--get-thumbnail Simulate, quiet but print thumbnail URL
|
||||||
--get-description simulate, quiet but print video description
|
--get-description Simulate, quiet but print video description
|
||||||
--get-duration simulate, quiet but print video length
|
--get-duration Simulate, quiet but print video length
|
||||||
--get-filename simulate, quiet but print output filename
|
--get-filename Simulate, quiet but print output filename
|
||||||
--get-format simulate, quiet but print output format
|
--get-format Simulate, quiet but print output format
|
||||||
-j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys.
|
-j, --dump-json Simulate, quiet but print JSON information. See --output for a description of available keys.
|
||||||
-J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
|
-J, --dump-single-json Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
|
||||||
information in a single line.
|
information in a single line.
|
||||||
--print-json Be quiet and print the video information as JSON (video is still being downloaded).
|
--print-json Be quiet and print the video information as JSON (video is still being downloaded).
|
||||||
--newline output progress bar as new lines
|
--newline Output progress bar as new lines
|
||||||
--no-progress do not print progress bar
|
--no-progress Do not print progress bar
|
||||||
--console-title display progress in console titlebar
|
--console-title Display progress in console titlebar
|
||||||
-v, --verbose print various debugging information
|
-v, --verbose Print various debugging information
|
||||||
--dump-intermediate-pages print downloaded pages to debug problems (very verbose)
|
--dump-pages Print downloaded pages to debug problems (very verbose)
|
||||||
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
|
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
|
||||||
--print-traffic Display sent and read HTTP traffic
|
--print-traffic Display sent and read HTTP traffic
|
||||||
-C, --call-home Contact the youtube-dl server for debugging.
|
-C, --call-home Contact the youtube-dl server for debugging
|
||||||
--no-call-home Do NOT contact the youtube-dl server for debugging.
|
--no-call-home Do NOT contact the youtube-dl server for debugging
|
||||||
|
|
||||||
## Workarounds:
|
## Workarounds:
|
||||||
--encoding ENCODING Force the specified encoding (experimental)
|
--encoding ENCODING Force the specified encoding (experimental)
|
||||||
--no-check-certificate Suppress HTTPS certificate validation.
|
--no-check-certificate Suppress HTTPS certificate validation
|
||||||
--prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
|
--prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
|
||||||
--user-agent UA specify a custom user agent
|
--user-agent UA Specify a custom user agent
|
||||||
--referer URL specify a custom referer, use if the video access is restricted to one domain
|
--referer URL Specify a custom referer, use if the video access is restricted to one domain
|
||||||
--add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
|
--add-header FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
|
||||||
--bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
|
--bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
|
||||||
--sleep-interval SECONDS Number of seconds to sleep before each download.
|
--sleep-interval SECONDS Number of seconds to sleep before each download.
|
||||||
|
|
||||||
## Video Format Options:
|
## Video Format Options:
|
||||||
-f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
|
-f, --format FORMAT Video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
|
||||||
extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
|
extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
|
||||||
"worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
|
"worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
|
||||||
This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
|
This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
|
||||||
@@ -194,41 +194,44 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
|
Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
|
||||||
of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
|
of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
|
||||||
bestvideo+bestaudio.
|
bestvideo+bestaudio.
|
||||||
--all-formats download all available video formats
|
--all-formats Download all available video formats
|
||||||
--prefer-free-formats prefer free video formats unless a specific one is requested
|
--prefer-free-formats Prefer free video formats unless a specific one is requested
|
||||||
--max-quality FORMAT highest quality format to download
|
--max-quality FORMAT Highest quality format to download
|
||||||
-F, --list-formats list all available formats
|
-F, --list-formats List all available formats
|
||||||
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
|
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
|
||||||
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
|
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
|
||||||
merge is required
|
merge is required
|
||||||
|
|
||||||
## Subtitle Options:
|
## Subtitle Options:
|
||||||
--write-sub write subtitle file
|
--write-sub Write subtitle file
|
||||||
--write-auto-sub write automatic subtitle file (youtube only)
|
--write-auto-sub Write automatic subtitle file (YouTube only)
|
||||||
--all-subs downloads all the available subtitles of the video
|
--all-subs Download all the available subtitles of the video
|
||||||
--list-subs lists all available subtitles for the video
|
--list-subs List all available subtitles for the video
|
||||||
--sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best"
|
--sub-format FORMAT Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"
|
||||||
--sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
|
--sub-lang LANGS Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
|
||||||
|
|
||||||
## Authentication Options:
|
## Authentication Options:
|
||||||
-u, --username USERNAME login with this account ID
|
-u, --username USERNAME Login with this account ID
|
||||||
-p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively.
|
-p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
|
||||||
-2, --twofactor TWOFACTOR two-factor auth code
|
-2, --twofactor TWOFACTOR Two-factor auth code
|
||||||
-n, --netrc use .netrc authentication data
|
-n, --netrc Use .netrc authentication data
|
||||||
--video-password PASSWORD video password (vimeo, smotri)
|
--video-password PASSWORD Video password (vimeo, smotri)
|
||||||
|
|
||||||
## Post-processing Options:
|
## Post-processing Options:
|
||||||
-x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
-x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
||||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
--audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
||||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K
|
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
|
||||||
(default 5)
|
5)
|
||||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
||||||
-k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default
|
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
|
||||||
--no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default
|
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||||
--embed-subs embed subtitles in the video (only for mp4 videos)
|
--embed-subs Embed subtitles in the video (only for mp4 videos)
|
||||||
--embed-thumbnail embed thumbnail in the audio as cover art
|
--embed-thumbnail Embed thumbnail in the audio as cover art
|
||||||
--add-metadata write metadata to the video file
|
--add-metadata Write metadata to the video file
|
||||||
--xattrs write metadata to the video file's xattrs (using dublin core and xdg standards)
|
--metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
||||||
|
parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
|
||||||
|
%(title)s" matches a title like "Coldplay - Paradise"
|
||||||
|
--xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
|
||||||
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
|
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
|
||||||
fix file if we can, warn otherwise)
|
fix file if we can, warn otherwise)
|
||||||
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
|
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
|
||||||
@@ -404,6 +407,18 @@ A note on the service that they don't host the infringing content, but just link
|
|||||||
|
|
||||||
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
|
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
|
||||||
|
|
||||||
|
### How can I speed up work on my issue?
|
||||||
|
|
||||||
|
(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
|
||||||
|
|
||||||
|
First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
|
||||||
|
|
||||||
|
Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
|
||||||
|
|
||||||
|
If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
|
||||||
|
|
||||||
|
Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
|
||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
@@ -503,6 +518,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
|
|||||||
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
from __future__ import unicode_literals
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
|
||||||
ydl_opts = {}
|
ydl_opts = {}
|
||||||
@@ -515,6 +531,7 @@ Most likely, you'll want to use various options. For a list of what can be done,
|
|||||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
from __future__ import unicode_literals
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
|
||||||
|
|
||||||
@@ -572,7 +589,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
|||||||
|
|
||||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||||
|
|
||||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||||
|
|
||||||
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
|
@@ -28,7 +28,7 @@ for test in get_testcases():
|
|||||||
if METHOD == 'EURISTIC':
|
if METHOD == 'EURISTIC':
|
||||||
try:
|
try:
|
||||||
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||||
except:
|
except Exception:
|
||||||
print('\nFail: {0}'.format(test['name']))
|
print('\nFail: {0}'.format(test['name']))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
42
devscripts/generate_aes_testdata.py
Normal file
42
devscripts/generate_aes_testdata.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.utils import intlist_to_bytes
|
||||||
|
from youtube_dl.aes import aes_encrypt, key_expansion
|
||||||
|
|
||||||
|
secret_msg = b'Secret message goes here'
|
||||||
|
|
||||||
|
|
||||||
|
def hex_str(int_list):
|
||||||
|
return codecs.encode(intlist_to_bytes(int_list), 'hex')
|
||||||
|
|
||||||
|
|
||||||
|
def openssl_encode(algo, key, iv):
|
||||||
|
cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
|
||||||
|
prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||||
|
out, _ = prog.communicate(secret_msg)
|
||||||
|
return out
|
||||||
|
|
||||||
|
iv = key = [0x20, 0x15] + 14 * [0]
|
||||||
|
|
||||||
|
r = openssl_encode('aes-128-cbc', key, iv)
|
||||||
|
print('aes_cbc_decrypt')
|
||||||
|
print(repr(r))
|
||||||
|
|
||||||
|
password = key
|
||||||
|
new_key = aes_encrypt(password, key_expansion(password))
|
||||||
|
r = openssl_encode('aes-128-ctr', new_key, iv)
|
||||||
|
print('aes_decrypt_text 16')
|
||||||
|
print(repr(r))
|
||||||
|
|
||||||
|
password = key + 16 * [0]
|
||||||
|
new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
|
||||||
|
r = openssl_encode('aes-256-ctr', new_key, iv)
|
||||||
|
print('aes_decrypt_text 32')
|
||||||
|
print(repr(r))
|
@@ -2,6 +2,8 @@
|
|||||||
- **1tv**: Первый канал
|
- **1tv**: Первый канал
|
||||||
- **1up.com**
|
- **1up.com**
|
||||||
- **220.ro**
|
- **220.ro**
|
||||||
|
- **22tracks:genre**
|
||||||
|
- **22tracks:track**
|
||||||
- **24video**
|
- **24video**
|
||||||
- **3sat**
|
- **3sat**
|
||||||
- **4tube**
|
- **4tube**
|
||||||
@@ -47,6 +49,7 @@
|
|||||||
- **Bandcamp**
|
- **Bandcamp**
|
||||||
- **Bandcamp:album**
|
- **Bandcamp:album**
|
||||||
- **bbc.co.uk**: BBC iPlayer
|
- **bbc.co.uk**: BBC iPlayer
|
||||||
|
- **BeatportPro**
|
||||||
- **Beeg**
|
- **Beeg**
|
||||||
- **BehindKink**
|
- **BehindKink**
|
||||||
- **Bet**
|
- **Bet**
|
||||||
@@ -95,6 +98,7 @@
|
|||||||
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
- **Criterion**
|
- **Criterion**
|
||||||
|
- **CrooksAndLiars**
|
||||||
- **Crunchyroll**
|
- **Crunchyroll**
|
||||||
- **crunchyroll:playlist**
|
- **crunchyroll:playlist**
|
||||||
- **CSpan**: C-SPAN
|
- **CSpan**: C-SPAN
|
||||||
@@ -108,15 +112,19 @@
|
|||||||
- **DctpTv**
|
- **DctpTv**
|
||||||
- **DeezerPlaylist**
|
- **DeezerPlaylist**
|
||||||
- **defense.gouv.fr**
|
- **defense.gouv.fr**
|
||||||
|
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **divxstage**: DivxStage
|
- **divxstage**: DivxStage
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
|
- **DouyuTV**
|
||||||
- **DRBonanza**
|
- **DRBonanza**
|
||||||
- **Dropbox**
|
- **Dropbox**
|
||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
- **DRTV**
|
- **DRTV**
|
||||||
- **Dump**
|
- **Dump**
|
||||||
|
- **Dumpert**
|
||||||
- **dvtv**: http://video.aktualne.cz/
|
- **dvtv**: http://video.aktualne.cz/
|
||||||
|
- **EaglePlatform**
|
||||||
- **EbaumsWorld**
|
- **EbaumsWorld**
|
||||||
- **EchoMsk**
|
- **EchoMsk**
|
||||||
- **eHow**
|
- **eHow**
|
||||||
@@ -144,6 +152,7 @@
|
|||||||
- **Firstpost**
|
- **Firstpost**
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
|
- **FootyRoom**
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
- **FoxNews**
|
- **FoxNews**
|
||||||
- **france2.fr:generation-quoi**
|
- **france2.fr:generation-quoi**
|
||||||
@@ -158,9 +167,11 @@
|
|||||||
- **Gamekings**
|
- **Gamekings**
|
||||||
- **GameOne**
|
- **GameOne**
|
||||||
- **gameone:playlist**
|
- **gameone:playlist**
|
||||||
|
- **Gamersyde**
|
||||||
- **GameSpot**
|
- **GameSpot**
|
||||||
- **GameStar**
|
- **GameStar**
|
||||||
- **Gametrailers**
|
- **Gametrailers**
|
||||||
|
- **Gazeta**
|
||||||
- **GDCVault**
|
- **GDCVault**
|
||||||
- **generic**: Generic downloader that works on some sites
|
- **generic**: Generic downloader that works on some sites
|
||||||
- **GiantBomb**
|
- **GiantBomb**
|
||||||
@@ -211,6 +222,7 @@
|
|||||||
- **jpopsuki.tv**
|
- **jpopsuki.tv**
|
||||||
- **Jukebox**
|
- **Jukebox**
|
||||||
- **Kaltura**
|
- **Kaltura**
|
||||||
|
- **KanalPlay**: Kanal 5/9/11 Play
|
||||||
- **Kankan**
|
- **Kankan**
|
||||||
- **Karaoketv**
|
- **Karaoketv**
|
||||||
- **keek**
|
- **keek**
|
||||||
@@ -225,6 +237,7 @@
|
|||||||
- **Letv**
|
- **Letv**
|
||||||
- **LetvPlaylist**
|
- **LetvPlaylist**
|
||||||
- **LetvTv**
|
- **LetvTv**
|
||||||
|
- **Libsyn**
|
||||||
- **lifenews**: LIFE | NEWS
|
- **lifenews**: LIFE | NEWS
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
@@ -244,6 +257,7 @@
|
|||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
- **Minhateca**
|
- **Minhateca**
|
||||||
- **MinistryGrid**
|
- **MinistryGrid**
|
||||||
|
- **miomio.tv**
|
||||||
- **mitele.es**
|
- **mitele.es**
|
||||||
- **mixcloud**
|
- **mixcloud**
|
||||||
- **MLB**
|
- **MLB**
|
||||||
@@ -277,6 +291,8 @@
|
|||||||
- **NBA**
|
- **NBA**
|
||||||
- **NBC**
|
- **NBC**
|
||||||
- **NBCNews**
|
- **NBCNews**
|
||||||
|
- **NBCSports**
|
||||||
|
- **NBCSportsVPlayer**
|
||||||
- **ndr**: NDR.de - Mediathek
|
- **ndr**: NDR.de - Mediathek
|
||||||
- **NDTV**
|
- **NDTV**
|
||||||
- **NerdCubedFeed**
|
- **NerdCubedFeed**
|
||||||
@@ -304,6 +320,7 @@
|
|||||||
- **npo.nl:radio**
|
- **npo.nl:radio**
|
||||||
- **npo.nl:radio:fragment**
|
- **npo.nl:radio:fragment**
|
||||||
- **NRK**
|
- **NRK**
|
||||||
|
- **NRKPlaylist**
|
||||||
- **NRKTV**
|
- **NRKTV**
|
||||||
- **ntv.ru**
|
- **ntv.ru**
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
@@ -315,6 +332,7 @@
|
|||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
- **OpenFilm**
|
- **OpenFilm**
|
||||||
- **orf:fm4**: radio FM4
|
- **orf:fm4**: radio FM4
|
||||||
|
- **orf:iptv**: iptv.ORF.at
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
- **parliamentlive.tv**: UK parliament videos
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
@@ -322,10 +340,12 @@
|
|||||||
- **PBS**
|
- **PBS**
|
||||||
- **Phoenix**
|
- **Phoenix**
|
||||||
- **Photobucket**
|
- **Photobucket**
|
||||||
|
- **Pladform**
|
||||||
- **PlanetaPlay**
|
- **PlanetaPlay**
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
- **played.to**
|
- **played.to**
|
||||||
- **Playvid**
|
- **Playvid**
|
||||||
|
- **Playwire**
|
||||||
- **plus.google**: Google Plus
|
- **plus.google**: Google Plus
|
||||||
- **pluzz.francetv.fr**
|
- **pluzz.francetv.fr**
|
||||||
- **podomatic**
|
- **podomatic**
|
||||||
@@ -333,16 +353,22 @@
|
|||||||
- **PornHub**
|
- **PornHub**
|
||||||
- **PornHubPlaylist**
|
- **PornHubPlaylist**
|
||||||
- **Pornotube**
|
- **Pornotube**
|
||||||
|
- **PornoVoisines**
|
||||||
- **PornoXO**
|
- **PornoXO**
|
||||||
|
- **PrimeShareTV**
|
||||||
- **PromptFile**
|
- **PromptFile**
|
||||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
- **Puls4**
|
- **Puls4**
|
||||||
- **Pyvideo**
|
- **Pyvideo**
|
||||||
|
- **QQMusic**
|
||||||
|
- **QQMusicAlbum**
|
||||||
|
- **QQMusicSinger**
|
||||||
- **QuickVid**
|
- **QuickVid**
|
||||||
- **R7**
|
- **R7**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
- **radiobremen**
|
- **radiobremen**
|
||||||
- **radiofrance**
|
- **radiofrance**
|
||||||
|
- **RadioJavan**
|
||||||
- **Rai**
|
- **Rai**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
- **RedTube**
|
- **RedTube**
|
||||||
@@ -359,6 +385,7 @@
|
|||||||
- **RTP**
|
- **RTP**
|
||||||
- **RTS**: RTS.ch
|
- **RTS**: RTS.ch
|
||||||
- **rtve.es:alacarta**: RTVE a la carta
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
|
- **rtve.es:infantil**: RTVE infantil
|
||||||
- **rtve.es:live**: RTVE.es live streams
|
- **rtve.es:live**: RTVE.es live streams
|
||||||
- **RUHD**
|
- **RUHD**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
@@ -367,6 +394,8 @@
|
|||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
|
- **safari**: safaribooksonline.com online video
|
||||||
|
- **safari:course**: safaribooksonline.com online courses
|
||||||
- **Sandia**: Sandia National Laboratories
|
- **Sandia**: Sandia National Laboratories
|
||||||
- **Sapo**: SAPO Vídeos
|
- **Sapo**: SAPO Vídeos
|
||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
@@ -400,6 +429,7 @@
|
|||||||
- **southpark.cc.com**
|
- **southpark.cc.com**
|
||||||
- **southpark.de**
|
- **southpark.de**
|
||||||
- **Space**
|
- **Space**
|
||||||
|
- **SpankBang**
|
||||||
- **Spankwire**
|
- **Spankwire**
|
||||||
- **Spiegel**
|
- **Spiegel**
|
||||||
- **Spiegel:Article**: Articles on spiegel.de
|
- **Spiegel:Article**: Articles on spiegel.de
|
||||||
@@ -408,7 +438,9 @@
|
|||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBox**
|
- **SportBox**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
|
- **Srf**
|
||||||
- **SRMediathek**: Saarländischer Rundfunk
|
- **SRMediathek**: Saarländischer Rundfunk
|
||||||
|
- **SSA**
|
||||||
- **stanfordoc**: Stanford Open ClassRoom
|
- **stanfordoc**: Stanford Open ClassRoom
|
||||||
- **Steam**
|
- **Steam**
|
||||||
- **streamcloud.eu**
|
- **streamcloud.eu**
|
||||||
@@ -478,13 +510,17 @@
|
|||||||
- **Ubu**
|
- **Ubu**
|
||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
|
- **UDNEmbed**
|
||||||
|
- **Ultimedia**
|
||||||
- **Unistra**
|
- **Unistra**
|
||||||
- **Urort**: NRK P3 Urørt
|
- **Urort**: NRK P3 Urørt
|
||||||
- **ustream**
|
- **ustream**
|
||||||
- **ustream:channel**
|
- **ustream:channel**
|
||||||
|
- **Varzesh3**
|
||||||
- **Vbox7**
|
- **Vbox7**
|
||||||
- **VeeHD**
|
- **VeeHD**
|
||||||
- **Veoh**
|
- **Veoh**
|
||||||
|
- **Vessel**
|
||||||
- **Vesti**: Вести.Ru
|
- **Vesti**: Вести.Ru
|
||||||
- **Vevo**
|
- **Vevo**
|
||||||
- **VGTV**
|
- **VGTV**
|
||||||
@@ -505,6 +541,7 @@
|
|||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
- **vier**
|
- **vier**
|
||||||
- **vier:videos**
|
- **vier:videos**
|
||||||
|
- **Viewster**
|
||||||
- **viki**
|
- **viki**
|
||||||
- **vimeo**
|
- **vimeo**
|
||||||
- **vimeo:album**
|
- **vimeo:album**
|
||||||
@@ -551,6 +588,9 @@
|
|||||||
- **XXXYMovies**
|
- **XXXYMovies**
|
||||||
- **Yahoo**: Yahoo screen and movies
|
- **Yahoo**: Yahoo screen and movies
|
||||||
- **Yam**
|
- **Yam**
|
||||||
|
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||||
|
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||||
|
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||||
- **YesJapan**
|
- **YesJapan**
|
||||||
- **Ynet**
|
- **Ynet**
|
||||||
- **YouJizz**
|
- **YouJizz**
|
||||||
@@ -569,7 +609,7 @@
|
|||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
- **youtube:show**: YouTube.com (multi-season) shows
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
- **Zapiks**
|
- **Zapiks**
|
||||||
- **ZDF**
|
- **ZDF**
|
||||||
- **ZDFChannel**
|
- **ZDFChannel**
|
||||||
|
@@ -14,6 +14,9 @@ from test.helper import FakeYDL, assertRegexpMatches
|
|||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.postprocessor.common import PostProcessor
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
|
from youtube_dl.utils import match_filter_func
|
||||||
|
|
||||||
|
TEST_URL = 'http://localhost/sample.mp4'
|
||||||
|
|
||||||
|
|
||||||
class YDL(FakeYDL):
|
class YDL(FakeYDL):
|
||||||
@@ -46,8 +49,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
ydl.params['prefer_free_formats'] = True
|
ydl.params['prefer_free_formats'] = True
|
||||||
formats = [
|
formats = [
|
||||||
{'ext': 'webm', 'height': 460, 'url': 'x'},
|
{'ext': 'webm', 'height': 460, 'url': TEST_URL},
|
||||||
{'ext': 'mp4', 'height': 460, 'url': 'y'},
|
{'ext': 'mp4', 'height': 460, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
yie = YoutubeIE(ydl)
|
yie = YoutubeIE(ydl)
|
||||||
@@ -60,8 +63,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
ydl.params['prefer_free_formats'] = True
|
ydl.params['prefer_free_formats'] = True
|
||||||
formats = [
|
formats = [
|
||||||
{'ext': 'webm', 'height': 720, 'url': 'a'},
|
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||||
{'ext': 'mp4', 'height': 1080, 'url': 'b'},
|
{'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
yie = YoutubeIE(ydl)
|
yie = YoutubeIE(ydl)
|
||||||
@@ -74,9 +77,9 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
ydl.params['prefer_free_formats'] = False
|
ydl.params['prefer_free_formats'] = False
|
||||||
formats = [
|
formats = [
|
||||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||||
{'ext': 'mp4', 'height': 720, 'url': '_'},
|
{'ext': 'mp4', 'height': 720, 'url': TEST_URL},
|
||||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
yie = YoutubeIE(ydl)
|
yie = YoutubeIE(ydl)
|
||||||
@@ -88,8 +91,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
ydl.params['prefer_free_formats'] = False
|
ydl.params['prefer_free_formats'] = False
|
||||||
formats = [
|
formats = [
|
||||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
|
||||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
yie = YoutubeIE(ydl)
|
yie = YoutubeIE(ydl)
|
||||||
@@ -133,10 +136,10 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def test_format_selection(self):
|
def test_format_selection(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
|
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
|
||||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
|
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
|
||||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
|
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
@@ -167,10 +170,10 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def test_format_selection_audio(self):
|
def test_format_selection_audio(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
|
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
|
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
|
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
|
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
@@ -185,8 +188,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
self.assertEqual(downloaded['format_id'], 'audio-low')
|
self.assertEqual(downloaded['format_id'], 'audio-low')
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
|
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
@@ -228,9 +231,9 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def test_format_selection_video(self):
|
def test_format_selection_video(self):
|
||||||
formats = [
|
formats = [
|
||||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
|
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
|
||||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
|
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
|
||||||
]
|
]
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
|
|
||||||
@@ -337,6 +340,8 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'G')
|
self.assertEqual(downloaded['format_id'], 'G')
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeDL(unittest.TestCase):
|
||||||
def test_subtitles(self):
|
def test_subtitles(self):
|
||||||
def s_formats(lang, autocaption=False):
|
def s_formats(lang, autocaption=False):
|
||||||
return [{
|
return [{
|
||||||
@@ -459,6 +464,73 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||||
os.unlink(audiofile)
|
os.unlink(audiofile)
|
||||||
|
|
||||||
|
def test_match_filter(self):
|
||||||
|
class FilterYDL(YDL):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(FilterYDL, self).__init__(*args, **kwargs)
|
||||||
|
self.params['simulate'] = True
|
||||||
|
|
||||||
|
def process_info(self, info_dict):
|
||||||
|
super(YDL, self).process_info(info_dict)
|
||||||
|
|
||||||
|
def _match_entry(self, info_dict, incomplete):
|
||||||
|
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
|
||||||
|
if res is None:
|
||||||
|
self.downloaded_info_dicts.append(info_dict)
|
||||||
|
return res
|
||||||
|
|
||||||
|
first = {
|
||||||
|
'id': '1',
|
||||||
|
'url': TEST_URL,
|
||||||
|
'title': 'one',
|
||||||
|
'extractor': 'TEST',
|
||||||
|
'duration': 30,
|
||||||
|
'filesize': 10 * 1024,
|
||||||
|
}
|
||||||
|
second = {
|
||||||
|
'id': '2',
|
||||||
|
'url': TEST_URL,
|
||||||
|
'title': 'two',
|
||||||
|
'extractor': 'TEST',
|
||||||
|
'duration': 10,
|
||||||
|
'description': 'foo',
|
||||||
|
'filesize': 5 * 1024,
|
||||||
|
}
|
||||||
|
videos = [first, second]
|
||||||
|
|
||||||
|
def get_videos(filter_=None):
|
||||||
|
ydl = FilterYDL({'match_filter': filter_})
|
||||||
|
for v in videos:
|
||||||
|
ydl.process_ie_result(v, download=True)
|
||||||
|
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||||
|
|
||||||
|
res = get_videos()
|
||||||
|
self.assertEqual(res, ['1', '2'])
|
||||||
|
|
||||||
|
def f(v):
|
||||||
|
if v['id'] == '1':
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return 'Video id is not 1'
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
f = match_filter_func('duration < 30')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['2'])
|
||||||
|
|
||||||
|
f = match_filter_func('description = foo')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['2'])
|
||||||
|
|
||||||
|
f = match_filter_func('description =? foo')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1', '2'])
|
||||||
|
|
||||||
|
f = match_filter_func('filesize > 5KiB')
|
||||||
|
res = get_videos(f)
|
||||||
|
self.assertEqual(res, ['1'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
55
test/test_aes.py
Normal file
55
test/test_aes.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
|
||||||
|
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
|
||||||
|
|
||||||
|
|
||||||
|
class TestAES(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.key = self.iv = [0x20, 0x15] + 14 * [0]
|
||||||
|
self.secret_msg = b'Secret message goes here'
|
||||||
|
|
||||||
|
def test_encrypt(self):
|
||||||
|
msg = b'message'
|
||||||
|
key = list(range(16))
|
||||||
|
encrypted = aes_encrypt(bytes_to_intlist(msg), key)
|
||||||
|
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
|
||||||
|
self.assertEqual(decrypted, msg)
|
||||||
|
|
||||||
|
def test_cbc_decrypt(self):
|
||||||
|
data = bytes_to_intlist(
|
||||||
|
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
|
||||||
|
)
|
||||||
|
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
|
||||||
|
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||||
|
|
||||||
|
def test_decrypt_text(self):
|
||||||
|
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||||
|
encrypted = base64.b64encode(
|
||||||
|
intlist_to_bytes(self.iv[:8]) +
|
||||||
|
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
|
||||||
|
)
|
||||||
|
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
||||||
|
self.assertEqual(decrypted, self.secret_msg)
|
||||||
|
|
||||||
|
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||||
|
encrypted = base64.b64encode(
|
||||||
|
intlist_to_bytes(self.iv[:8]) +
|
||||||
|
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
|
||||||
|
)
|
||||||
|
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||||
|
self.assertEqual(decrypted, self.secret_msg)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||||
|
|
||||||
def test_youtube_feeds(self):
|
def test_youtube_feeds(self):
|
||||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
|
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
||||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
||||||
@@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||||
|
|
||||||
def test_vimeo_matching(self):
|
def test_vimeo_matching(self):
|
||||||
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||||
self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
|
self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
|
||||||
self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
|
self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
|
||||||
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
|
||||||
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
|
self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
|
||||||
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
||||||
|
|
||||||
# https://github.com/rg3/youtube-dl/issues/1930
|
# https://github.com/rg3/youtube-dl/issues/1930
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
@@ -6,6 +8,9 @@ import unittest
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.utils import encodeArgument
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
@@ -27,5 +32,12 @@ class TestExecution(unittest.TestCase):
|
|||||||
def test_main_exec(self):
|
def test_main_exec(self):
|
||||||
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
|
|
||||||
|
def test_cmdline_umlauts(self):
|
||||||
|
p = subprocess.Popen(
|
||||||
|
[sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
|
||||||
|
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||||
|
_, stderr = p.communicate()
|
||||||
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -8,7 +8,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server
|
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||||
import ssl
|
import ssl
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
@@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
|
|||||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_proxy_handler(name):
|
||||||
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
proxy_name = name
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
|
||||||
|
return HTTPTestRequestHandler
|
||||||
|
|
||||||
|
|
||||||
|
class TestProxy(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.proxy = compat_http_server.HTTPServer(
|
||||||
|
('localhost', 0), _build_proxy_handler('normal'))
|
||||||
|
self.port = self.proxy.socket.getsockname()[1]
|
||||||
|
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||||
|
self.proxy_thread.daemon = True
|
||||||
|
self.proxy_thread.start()
|
||||||
|
|
||||||
|
self.cn_proxy = compat_http_server.HTTPServer(
|
||||||
|
('localhost', 0), _build_proxy_handler('cn'))
|
||||||
|
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||||
|
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||||
|
self.cn_proxy_thread.daemon = True
|
||||||
|
self.cn_proxy_thread.start()
|
||||||
|
|
||||||
|
def test_proxy(self):
|
||||||
|
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
||||||
|
ydl = YoutubeDL({
|
||||||
|
'proxy': 'localhost:{0}'.format(self.port),
|
||||||
|
'cn_verification_proxy': cn_proxy,
|
||||||
|
})
|
||||||
|
url = 'http://foo.com/bar'
|
||||||
|
response = ydl.urlopen(url).read().decode('utf-8')
|
||||||
|
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Ytdl-request-proxy', cn_proxy)
|
||||||
|
response = ydl.urlopen(req).read().decode('utf-8')
|
||||||
|
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
17
test/test_postprocessors.py
Normal file
17
test/test_postprocessors.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.postprocessor import MetadataFromTitlePP
|
||||||
|
|
||||||
|
|
||||||
|
class TestMetadataFromTitle(unittest.TestCase):
|
||||||
|
def test_format_to_regex(self):
|
||||||
|
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||||
|
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
@@ -26,6 +26,7 @@ from youtube_dl.extractor import (
|
|||||||
VikiIE,
|
VikiIE,
|
||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
RTVEALaCartaIE,
|
RTVEALaCartaIE,
|
||||||
|
FunnyOrDieIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -320,5 +321,17 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
||||||
|
IE = FunnyOrDieIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -17,13 +17,22 @@ IGNORED_FILES = [
|
|||||||
'buildserver.py',
|
'buildserver.py',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
IGNORED_DIRS = [
|
||||||
|
'.git',
|
||||||
|
'.tox',
|
||||||
|
]
|
||||||
|
|
||||||
from test.helper import assertRegexpMatches
|
from test.helper import assertRegexpMatches
|
||||||
|
|
||||||
|
|
||||||
class TestUnicodeLiterals(unittest.TestCase):
|
class TestUnicodeLiterals(unittest.TestCase):
|
||||||
def test_all_files(self):
|
def test_all_files(self):
|
||||||
for dirpath, _, filenames in os.walk(rootDir):
|
for dirpath, dirnames, filenames in os.walk(rootDir):
|
||||||
|
for ignore_dir in IGNORED_DIRS:
|
||||||
|
if ignore_dir in dirnames:
|
||||||
|
# If we remove the directory from dirnames os.walk won't
|
||||||
|
# recurse into it
|
||||||
|
dirnames.remove(ignore_dir)
|
||||||
for basename in filenames:
|
for basename in filenames:
|
||||||
if not basename.endswith('.py'):
|
if not basename.endswith('.py'):
|
||||||
continue
|
continue
|
||||||
|
@@ -24,6 +24,7 @@ from youtube_dl.utils import (
|
|||||||
encodeFilename,
|
encodeFilename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
escape_url,
|
escape_url,
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
@@ -38,6 +39,8 @@ from youtube_dl.utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
|
sanitize_path,
|
||||||
|
sanitize_url_path_consecutive_slashes,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@@ -52,6 +55,7 @@ from youtube_dl.utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
xpath_text,
|
||||||
render_table,
|
render_table,
|
||||||
match_str,
|
match_str,
|
||||||
)
|
)
|
||||||
@@ -131,6 +135,62 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
||||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||||
|
|
||||||
|
def test_sanitize_path(self):
|
||||||
|
if sys.platform != 'win32':
|
||||||
|
return
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('abc'), 'abc')
|
||||||
|
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc|def'), 'abc#def')
|
||||||
|
self.assertEqual(sanitize_path('<>:"|?*'), '#######')
|
||||||
|
self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
|
||||||
|
self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
|
||||||
|
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
|
||||||
|
'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
|
||||||
|
'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
|
||||||
|
self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
|
||||||
|
self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
|
||||||
|
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('../abc'), '..\\abc')
|
||||||
|
self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
|
||||||
|
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||||
|
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||||
|
|
||||||
|
def test_sanitize_url_path_consecutive_slashes(self):
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||||
|
'http://hostname/')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||||
|
'http://hostname/')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||||
|
'http://hostname/abc/')
|
||||||
|
|
||||||
def test_ordered_set(self):
|
def test_ordered_set(self):
|
||||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||||
self.assertEqual(orderedSet([]), [])
|
self.assertEqual(orderedSet([]), [])
|
||||||
@@ -140,6 +200,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
|
|
||||||
def test_unescape_html(self):
|
def test_unescape_html(self):
|
||||||
self.assertEqual(unescapeHTML('%20;'), '%20;')
|
self.assertEqual(unescapeHTML('%20;'), '%20;')
|
||||||
|
self.assertEqual(unescapeHTML('/'), '/')
|
||||||
|
self.assertEqual(unescapeHTML('/'), '/')
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
unescapeHTML('é'), 'é')
|
unescapeHTML('é'), 'é')
|
||||||
|
|
||||||
@@ -165,6 +227,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||||
'20150202')
|
'20150202')
|
||||||
|
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
@@ -192,6 +255,17 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||||
|
|
||||||
|
def test_xpath_text(self):
|
||||||
|
testxml = '''<root>
|
||||||
|
<div>
|
||||||
|
<p>Foo</p>
|
||||||
|
</div>
|
||||||
|
</root>'''
|
||||||
|
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||||
|
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||||
|
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||||
|
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
||||||
|
|
||||||
def test_smuggle_url(self):
|
def test_smuggle_url(self):
|
||||||
data = {"ö": "ö", "abc": [3]}
|
data = {"ö": "ö", "abc": [3]}
|
||||||
url = 'https://foo.bar/baz?x=y#a'
|
url = 'https://foo.bar/baz?x=y#a'
|
||||||
@@ -397,6 +471,12 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(d['x'], 1)
|
self.assertEqual(d['x'], 1)
|
||||||
self.assertEqual(d['y'], 'a')
|
self.assertEqual(d['y'], 'a')
|
||||||
|
|
||||||
|
on = js_to_json('["abc", "def",]')
|
||||||
|
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||||
|
|
||||||
|
on = js_to_json('{"abc": "def",}')
|
||||||
|
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||||
|
|
||||||
def test_clean_html(self):
|
def test_clean_html(self):
|
||||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||||
|
7
tox.ini
7
tox.ini
@@ -1,8 +1,11 @@
|
|||||||
[tox]
|
[tox]
|
||||||
envlist = py26,py27,py33
|
envlist = py26,py27,py33,py34
|
||||||
[testenv]
|
[testenv]
|
||||||
deps =
|
deps =
|
||||||
nose
|
nose
|
||||||
coverage
|
coverage
|
||||||
commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
|
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
|
||||||
|
--exclude test_subtitles.py --exclude test_write_annotations.py
|
||||||
|
--exclude test_youtube_lists.py
|
||||||
|
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||||
# test.test_download:TestDownload.test_NowVideo
|
# test.test_download:TestDownload.test_NowVideo
|
||||||
|
@@ -61,6 +61,7 @@ from .utils import (
|
|||||||
render_table,
|
render_table,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
|
sanitize_path,
|
||||||
std_headers,
|
std_headers,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
takewhile_inclusive,
|
takewhile_inclusive,
|
||||||
@@ -322,8 +323,10 @@ class YoutubeDL(object):
|
|||||||
'Set the LC_ALL environment variable to fix this.')
|
'Set the LC_ALL environment variable to fix this.')
|
||||||
self.params['restrictfilenames'] = True
|
self.params['restrictfilenames'] = True
|
||||||
|
|
||||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
if isinstance(params.get('outtmpl'), bytes):
|
||||||
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
self.report_warning(
|
||||||
|
'Parameter outtmpl is bytes, but should be a unicode string. '
|
||||||
|
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||||
|
|
||||||
self._setup_opener()
|
self._setup_opener()
|
||||||
|
|
||||||
@@ -562,7 +565,7 @@ class YoutubeDL(object):
|
|||||||
if v is not None)
|
if v is not None)
|
||||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||||
|
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
|
||||||
tmpl = compat_expanduser(outtmpl)
|
tmpl = compat_expanduser(outtmpl)
|
||||||
filename = tmpl % template_dict
|
filename = tmpl % template_dict
|
||||||
# Temporary fix for #4787
|
# Temporary fix for #4787
|
||||||
@@ -629,7 +632,7 @@ class YoutubeDL(object):
|
|||||||
Returns a list with a dictionary for each video we find.
|
Returns a list with a dictionary for each video we find.
|
||||||
If 'download', also downloads the videos.
|
If 'download', also downloads the videos.
|
||||||
extra_info is a dict containing the extra values to add to each result
|
extra_info is a dict containing the extra values to add to each result
|
||||||
'''
|
'''
|
||||||
|
|
||||||
if ie_key:
|
if ie_key:
|
||||||
ies = [self.get_info_extractor(ie_key)]
|
ies = [self.get_info_extractor(ie_key)]
|
||||||
@@ -916,6 +919,11 @@ class YoutubeDL(object):
|
|||||||
if format_spec == 'best' or format_spec is None:
|
if format_spec == 'best' or format_spec is None:
|
||||||
return available_formats[-1]
|
return available_formats[-1]
|
||||||
elif format_spec == 'worst':
|
elif format_spec == 'worst':
|
||||||
|
audiovideo_formats = [
|
||||||
|
f for f in available_formats
|
||||||
|
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||||
|
if audiovideo_formats:
|
||||||
|
return audiovideo_formats[0]
|
||||||
return available_formats[0]
|
return available_formats[0]
|
||||||
elif format_spec == 'bestaudio':
|
elif format_spec == 'bestaudio':
|
||||||
audio_formats = [
|
audio_formats = [
|
||||||
@@ -1085,8 +1093,7 @@ class YoutubeDL(object):
|
|||||||
if req_format is None:
|
if req_format is None:
|
||||||
req_format = 'best'
|
req_format = 'best'
|
||||||
formats_to_download = []
|
formats_to_download = []
|
||||||
# The -1 is for supporting YoutubeIE
|
if req_format == 'all':
|
||||||
if req_format in ('-1', 'all'):
|
|
||||||
formats_to_download = formats
|
formats_to_download = formats
|
||||||
else:
|
else:
|
||||||
for rfstr in req_format.split(','):
|
for rfstr in req_format.split(','):
|
||||||
@@ -1213,9 +1220,6 @@ class YoutubeDL(object):
|
|||||||
if len(info_dict['title']) > 200:
|
if len(info_dict['title']) > 200:
|
||||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||||
|
|
||||||
# Keep for backwards compatibility
|
|
||||||
info_dict['stitle'] = info_dict['title']
|
|
||||||
|
|
||||||
if 'format' not in info_dict:
|
if 'format' not in info_dict:
|
||||||
info_dict['format'] = info_dict['ext']
|
info_dict['format'] = info_dict['ext']
|
||||||
|
|
||||||
@@ -1261,7 +1265,7 @@ class YoutubeDL(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dn = os.path.dirname(encodeFilename(filename))
|
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
|
||||||
if dn and not os.path.exists(dn):
|
if dn and not os.path.exists(dn):
|
||||||
os.makedirs(dn)
|
os.makedirs(dn)
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
@@ -1702,10 +1706,10 @@ class YoutubeDL(object):
|
|||||||
out = out.decode().strip()
|
out = out.decode().strip()
|
||||||
if re.match('[0-9a-f]+', out):
|
if re.match('[0-9a-f]+', out):
|
||||||
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
||||||
except:
|
except Exception:
|
||||||
try:
|
try:
|
||||||
sys.exc_clear()
|
sys.exc_clear()
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
self._write_string('[debug] Python version %s - %s\n' % (
|
self._write_string('[debug] Python version %s - %s\n' % (
|
||||||
platform.python_version(), platform_name()))
|
platform.python_version(), platform_name()))
|
||||||
|
@@ -189,10 +189,6 @@ def _real_main(argv=None):
|
|||||||
if opts.allsubtitles and not opts.writeautomaticsub:
|
if opts.allsubtitles and not opts.writeautomaticsub:
|
||||||
opts.writesubtitles = True
|
opts.writesubtitles = True
|
||||||
|
|
||||||
if sys.version_info < (3,):
|
|
||||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
|
||||||
if opts.outtmpl is not None:
|
|
||||||
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
|
||||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
|
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
|
||||||
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
|
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
|
||||||
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
|
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
|
||||||
@@ -213,6 +209,11 @@ def _real_main(argv=None):
|
|||||||
# PostProcessors
|
# PostProcessors
|
||||||
postprocessors = []
|
postprocessors = []
|
||||||
# Add the metadata pp first, the other pps will copy it
|
# Add the metadata pp first, the other pps will copy it
|
||||||
|
if opts.metafromtitle:
|
||||||
|
postprocessors.append({
|
||||||
|
'key': 'MetadataFromTitle',
|
||||||
|
'titleformat': opts.metafromtitle
|
||||||
|
})
|
||||||
if opts.addmetadata:
|
if opts.addmetadata:
|
||||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
|
@@ -389,7 +389,7 @@ else:
|
|||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
out, err = sp.communicate()
|
out, err = sp.communicate()
|
||||||
lines, columns = map(int, out.split())
|
lines, columns = map(int, out.split())
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return _terminal_size(columns, lines)
|
return _terminal_size(columns, lines)
|
||||||
|
|
||||||
|
@@ -204,7 +204,7 @@ class FileDownloader(object):
|
|||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
os.utime(filename, (time.time(), filetime))
|
os.utime(filename, (time.time(), filetime))
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return filetime
|
return filetime
|
||||||
|
|
||||||
@@ -318,7 +318,7 @@ class FileDownloader(object):
|
|||||||
)
|
)
|
||||||
|
|
||||||
continuedl_and_exists = (
|
continuedl_and_exists = (
|
||||||
self.params.get('continuedl', False) and
|
self.params.get('continuedl', True) and
|
||||||
os.path.isfile(encodeFilename(filename)) and
|
os.path.isfile(encodeFilename(filename)) and
|
||||||
not self.params.get('nopart', False)
|
not self.params.get('nopart', False)
|
||||||
)
|
)
|
||||||
|
@@ -281,7 +281,7 @@ class F4mFD(FileDownloader):
|
|||||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||||
else:
|
else:
|
||||||
bootstrap_url = None
|
bootstrap_url = None
|
||||||
bootstrap = base64.b64decode(node.text)
|
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||||
boot_info = read_bootstrap_info(bootstrap)
|
boot_info = read_bootstrap_info(bootstrap)
|
||||||
return (boot_info, bootstrap_url)
|
return (boot_info, bootstrap_url)
|
||||||
|
|
||||||
@@ -308,7 +308,7 @@ class F4mFD(FileDownloader):
|
|||||||
live = boot_info['live']
|
live = boot_info['live']
|
||||||
metadata_node = media.find(_add_ns('metadata'))
|
metadata_node = media.find(_add_ns('metadata'))
|
||||||
if metadata_node is not None:
|
if metadata_node is not None:
|
||||||
metadata = base64.b64decode(metadata_node.text)
|
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
|
||||||
else:
|
else:
|
||||||
metadata = None
|
metadata = None
|
||||||
|
|
||||||
|
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
|
|||||||
|
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
if resume_len != 0:
|
if resume_len != 0:
|
||||||
if self.params.get('continuedl', False):
|
if self.params.get('continuedl', True):
|
||||||
self.report_resuming_byte(resume_len)
|
self.report_resuming_byte(resume_len)
|
||||||
request.add_header('Range', 'bytes=%d-' % resume_len)
|
request.add_header('Range', 'bytes=%d-' % resume_len)
|
||||||
open_mode = 'ab'
|
open_mode = 'ab'
|
||||||
@@ -92,6 +92,8 @@ class HttpFD(FileDownloader):
|
|||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'downloaded_bytes': resume_len,
|
||||||
|
'total_bytes': resume_len,
|
||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
@@ -218,12 +220,6 @@ class HttpFD(FileDownloader):
|
|||||||
if tmpfilename != '-':
|
if tmpfilename != '-':
|
||||||
stream.close()
|
stream.close()
|
||||||
|
|
||||||
self._hook_progress({
|
|
||||||
'downloaded_bytes': byte_counter,
|
|
||||||
'total_bytes': data_len,
|
|
||||||
'tmpfilename': tmpfilename,
|
|
||||||
'status': 'error',
|
|
||||||
})
|
|
||||||
if data_len is not None and byte_counter != data_len:
|
if data_len is not None and byte_counter != data_len:
|
||||||
raise ContentTooShortError(byte_counter, int(data_len))
|
raise ContentTooShortError(byte_counter, int(data_len))
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
|
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
|
|||||||
protocol = info_dict.get('rtmp_protocol', None)
|
protocol = info_dict.get('rtmp_protocol', None)
|
||||||
real_time = info_dict.get('rtmp_real_time', False)
|
real_time = info_dict.get('rtmp_real_time', False)
|
||||||
no_resume = info_dict.get('no_resume', False)
|
no_resume = info_dict.get('no_resume', False)
|
||||||
continue_dl = info_dict.get('continuedl', False)
|
continue_dl = info_dict.get('continuedl', True)
|
||||||
|
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
@@ -37,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
|
|||||||
from .bbccouk import BBCCoUkIE
|
from .bbccouk import BBCCoUkIE
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
|
from .beatportpro import BeatportProIE
|
||||||
from .bet import BetIE
|
from .bet import BetIE
|
||||||
from .bild import BildIE
|
from .bild import BildIE
|
||||||
from .bilibili import BiliBiliIE
|
from .bilibili import BiliBiliIE
|
||||||
@@ -89,6 +90,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
|||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
|
from .crooksandliars import CrooksAndLiarsIE
|
||||||
from .crunchyroll import (
|
from .crunchyroll import (
|
||||||
CrunchyrollIE,
|
CrunchyrollIE,
|
||||||
CrunchyrollShowPlaylistIE
|
CrunchyrollShowPlaylistIE
|
||||||
@@ -105,17 +107,21 @@ from .dbtv import DBTVIE
|
|||||||
from .dctp import DctpTvIE
|
from .dctp import DctpTvIE
|
||||||
from .deezer import DeezerPlaylistIE
|
from .deezer import DeezerPlaylistIE
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
|
from .dhm import DHMIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
|
from .douyutv import DouyuTVIE
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drbonanza import DRBonanzaIE
|
from .drbonanza import DRBonanzaIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
from .drtv import DRTVIE
|
from .drtv import DRTVIE
|
||||||
from .dvtv import DVTVIE
|
from .dvtv import DVTVIE
|
||||||
from .dump import DumpIE
|
from .dump import DumpIE
|
||||||
|
from .dumpert import DumpertIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
from .divxstage import DivxStageIE
|
from .divxstage import DivxStageIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .echomsk import EchoMskIE
|
from .echomsk import EchoMskIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
@@ -150,6 +156,7 @@ from .fktv import (
|
|||||||
)
|
)
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
from .folketinget import FolketingetIE
|
from .folketinget import FolketingetIE
|
||||||
|
from .footyroom import FootyRoomIE
|
||||||
from .fourtube import FourTubeIE
|
from .fourtube import FourTubeIE
|
||||||
from .foxgay import FoxgayIE
|
from .foxgay import FoxgayIE
|
||||||
from .foxnews import FoxNewsIE
|
from .foxnews import FoxNewsIE
|
||||||
@@ -171,9 +178,11 @@ from .gameone import (
|
|||||||
GameOneIE,
|
GameOneIE,
|
||||||
GameOnePlaylistIE,
|
GameOnePlaylistIE,
|
||||||
)
|
)
|
||||||
|
from .gamersyde import GamersydeIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
|
from .gazeta import GazetaIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
@@ -228,6 +237,7 @@ from .jove import JoveIE
|
|||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
|
from .kanalplay import KanalPlayIE
|
||||||
from .kankan import KankanIE
|
from .kankan import KankanIE
|
||||||
from .karaoketv import KaraoketvIE
|
from .karaoketv import KaraoketvIE
|
||||||
from .keezmovies import KeezMoviesIE
|
from .keezmovies import KeezMoviesIE
|
||||||
@@ -244,6 +254,7 @@ from .letv import (
|
|||||||
LetvTvIE,
|
LetvTvIE,
|
||||||
LetvPlaylistIE
|
LetvPlaylistIE
|
||||||
)
|
)
|
||||||
|
from .libsyn import LibsynIE
|
||||||
from .lifenews import LifeNewsIE
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
@@ -267,6 +278,7 @@ from .metacritic import MetacriticIE
|
|||||||
from .mgoon import MgoonIE
|
from .mgoon import MgoonIE
|
||||||
from .minhateca import MinhatecaIE
|
from .minhateca import MinhatecaIE
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
|
from .miomio import MioMioIE
|
||||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||||
from .mitele import MiTeleIE
|
from .mitele import MiTeleIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
@@ -302,6 +314,8 @@ from .nba import NBAIE
|
|||||||
from .nbc import (
|
from .nbc import (
|
||||||
NBCIE,
|
NBCIE,
|
||||||
NBCNewsIE,
|
NBCNewsIE,
|
||||||
|
NBCSportsIE,
|
||||||
|
NBCSportsVPlayerIE,
|
||||||
)
|
)
|
||||||
from .ndr import NDRIE
|
from .ndr import NDRIE
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
@@ -340,6 +354,7 @@ from .npo import (
|
|||||||
)
|
)
|
||||||
from .nrk import (
|
from .nrk import (
|
||||||
NRKIE,
|
NRKIE,
|
||||||
|
NRKPlaylistIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
)
|
)
|
||||||
from .ntvde import NTVDeIE
|
from .ntvde import NTVDeIE
|
||||||
@@ -354,6 +369,7 @@ from .orf import (
|
|||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
ORFOE1IE,
|
ORFOE1IE,
|
||||||
ORFFM4IE,
|
ORFFM4IE,
|
||||||
|
ORFIPTVIE,
|
||||||
)
|
)
|
||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
@@ -361,9 +377,11 @@ from .pbs import PBSIE
|
|||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
from .planetaplay import PlanetaPlayIE
|
from .planetaplay import PlanetaPlayIE
|
||||||
|
from .pladform import PladformIE
|
||||||
from .played import PlayedIE
|
from .played import PlayedIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
|
from .playwire import PlaywireIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import (
|
from .pornhub import (
|
||||||
@@ -371,14 +389,22 @@ from .pornhub import (
|
|||||||
PornHubPlaylistIE,
|
PornHubPlaylistIE,
|
||||||
)
|
)
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
|
from .pornovoisines import PornoVoisinesIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
|
from .primesharetv import PrimeShareTVIE
|
||||||
from .promptfile import PromptFileIE
|
from .promptfile import PromptFileIE
|
||||||
from .prosiebensat1 import ProSiebenSat1IE
|
from .prosiebensat1 import ProSiebenSat1IE
|
||||||
from .puls4 import Puls4IE
|
from .puls4 import Puls4IE
|
||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
|
from .qqmusic import (
|
||||||
|
QQMusicIE,
|
||||||
|
QQMusicSingerIE,
|
||||||
|
QQMusicAlbumIE,
|
||||||
|
)
|
||||||
from .quickvid import QuickVidIE
|
from .quickvid import QuickVidIE
|
||||||
from .r7 import R7IE
|
from .r7 import R7IE
|
||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
|
from .radiojavan import RadioJavanIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
from .rai import RaiIE
|
from .rai import RaiIE
|
||||||
@@ -397,7 +423,7 @@ from .rtlnow import RTLnowIE
|
|||||||
from .rtl2 import RTL2IE
|
from .rtl2 import RTL2IE
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
@@ -408,6 +434,10 @@ from .rutube import (
|
|||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .sandia import SandiaIE
|
from .sandia import SandiaIE
|
||||||
|
from .safari import (
|
||||||
|
SafariIE,
|
||||||
|
SafariCourseIE,
|
||||||
|
)
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
@@ -447,6 +477,7 @@ from .southpark import (
|
|||||||
SouthparkDeIE,
|
SouthparkDeIE,
|
||||||
)
|
)
|
||||||
from .space import SpaceIE
|
from .space import SpaceIE
|
||||||
|
from .spankbang import SpankBangIE
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
from .spiegeltv import SpiegeltvIE
|
||||||
@@ -454,7 +485,9 @@ from .spike import SpikeIE
|
|||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
|
from .srf import SrfIE
|
||||||
from .srmediathek import SRMediathekIE
|
from .srmediathek import SRMediathekIE
|
||||||
|
from .ssa import SSAIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
@@ -513,6 +546,10 @@ from .tvp import TvpIE, TvpSeriesIE
|
|||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
from .tweakers import TweakersIE
|
from .tweakers import TweakersIE
|
||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
|
from .twentytwotracks import (
|
||||||
|
TwentyTwoTracksIE,
|
||||||
|
TwentyTwoTracksGenreIE
|
||||||
|
)
|
||||||
from .twitch import (
|
from .twitch import (
|
||||||
TwitchVideoIE,
|
TwitchVideoIE,
|
||||||
TwitchChapterIE,
|
TwitchChapterIE,
|
||||||
@@ -527,12 +564,16 @@ from .udemy import (
|
|||||||
UdemyIE,
|
UdemyIE,
|
||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
)
|
)
|
||||||
|
from .udn import UDNEmbedIE
|
||||||
|
from .ultimedia import UltimediaIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .urort import UrortIE
|
from .urort import UrortIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
|
from .varzesh3 import Varzesh3IE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .veehd import VeeHDIE
|
from .veehd import VeeHDIE
|
||||||
from .veoh import VeohIE
|
from .veoh import VeohIE
|
||||||
|
from .vessel import VesselIE
|
||||||
from .vesti import VestiIE
|
from .vesti import VestiIE
|
||||||
from .vevo import VevoIE
|
from .vevo import VevoIE
|
||||||
from .vgtv import VGTVIE
|
from .vgtv import VGTVIE
|
||||||
@@ -550,6 +591,7 @@ from .videoweed import VideoWeedIE
|
|||||||
from .vidme import VidmeIE
|
from .vidme import VidmeIE
|
||||||
from .vidzi import VidziIE
|
from .vidzi import VidziIE
|
||||||
from .vier import VierIE, VierVideosIE
|
from .vier import VierIE, VierVideosIE
|
||||||
|
from .viewster import ViewsterIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
VimeoAlbumIE,
|
VimeoAlbumIE,
|
||||||
@@ -606,6 +648,11 @@ from .yahoo import (
|
|||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
)
|
)
|
||||||
from .yam import YamIE
|
from .yam import YamIE
|
||||||
|
from .yandexmusic import (
|
||||||
|
YandexMusicTrackIE,
|
||||||
|
YandexMusicAlbumIE,
|
||||||
|
YandexMusicPlaylistIE,
|
||||||
|
)
|
||||||
from .yesjapan import YesJapanIE
|
from .yesjapan import YesJapanIE
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
|
@@ -11,12 +11,13 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
qualities,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AddAnimeIE(InfoExtractor):
|
class AddAnimeIE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
|
_VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -25,7 +26,10 @@ class AddAnimeIE(InfoExtractor):
|
|||||||
'description': 'One Piece 606',
|
'description': 'One Piece 606',
|
||||||
'title': 'One Piece 606',
|
'title': 'One Piece 606',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -63,8 +67,10 @@ class AddAnimeIE(InfoExtractor):
|
|||||||
note='Confirming after redirect')
|
note='Confirming after redirect')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
FORMATS = ('normal', 'hq')
|
||||||
|
quality = qualities(FORMATS)
|
||||||
formats = []
|
formats = []
|
||||||
for format_id in ('normal', 'hq'):
|
for format_id in FORMATS:
|
||||||
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
||||||
video_url = self._search_regex(rex, webpage, 'video file URLx',
|
video_url = self._search_regex(rex, webpage, 'video file URLx',
|
||||||
fatal=False)
|
fatal=False)
|
||||||
@@ -73,6 +79,7 @@ class AddAnimeIE(InfoExtractor):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
'quality': quality(format_id),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
video_title = self._og_search_title(webpage)
|
video_title = self._og_search_title(webpage)
|
||||||
|
@@ -2,13 +2,12 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
xpath_text,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
'title': 'American Dad - Putting Francine Out of Business',
|
'title': 'American Dad - Putting Francine Out of Business',
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||||
|
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||||
|
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
for video in collection.get('videos'):
|
for video in collection.get('videos'):
|
||||||
if video.get('slug') == slug:
|
if video.get('slug') == slug:
|
||||||
return collection, video
|
return collection, video
|
||||||
|
return None, None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, episode_path)
|
webpage = self._download_webpage(url, episode_path)
|
||||||
|
|
||||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||||
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
|
bootstrapped_data = self._parse_json(self._search_regex(
|
||||||
|
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
|
||||||
try:
|
|
||||||
bootstrappedData = json.loads(bootstrappedDataJS)
|
|
||||||
except ValueError as ve:
|
|
||||||
errmsg = '%s: Failed to parse JSON ' % episode_path
|
|
||||||
raise ExtractorError(errmsg, cause=ve)
|
|
||||||
|
|
||||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||||
if is_playlist:
|
if is_playlist:
|
||||||
collections = bootstrappedData['playlists']['collections']
|
collections = bootstrapped_data['playlists']['collections']
|
||||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||||
video_info = self.find_video_info(collection, episode_path)
|
video_info = self.find_video_info(collection, episode_path)
|
||||||
|
|
||||||
show_title = video_info['showTitle']
|
show_title = video_info['showTitle']
|
||||||
segment_ids = [video_info['videoPlaybackID']]
|
segment_ids = [video_info['videoPlaybackID']]
|
||||||
else:
|
else:
|
||||||
collections = bootstrappedData['show']['collections']
|
collections = bootstrapped_data['show']['collections']
|
||||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||||
|
|
||||||
show = bootstrappedData['show']
|
# Video wasn't found in the collections, let's try `slugged_video`.
|
||||||
|
if video_info is None:
|
||||||
|
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||||
|
video_info = bootstrapped_data['slugged_video']
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unable to find video info')
|
||||||
|
|
||||||
|
show = bootstrapped_data['show']
|
||||||
show_title = show['title']
|
show_title = show['title']
|
||||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||||
|
|
||||||
|
@@ -14,10 +14,10 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AftenpostenIE(InfoExtractor):
|
class AftenpostenIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
|
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
|
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||||
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '21039',
|
'id': '21039',
|
||||||
@@ -30,12 +30,7 @@ class AftenpostenIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(
|
|
||||||
r'data-xs-id="(\d+)"', webpage, 'video id')
|
|
||||||
|
|
||||||
data = self._download_xml(
|
data = self._download_xml(
|
||||||
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
||||||
|
@@ -2,10 +2,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class AftonbladetIE(InfoExtractor):
|
class AftonbladetIE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
|
_VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
|
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'width': fmt['width'],
|
'width': int_or_none(fmt.get('width')),
|
||||||
'height': fmt['height'],
|
'height': int_or_none(fmt.get('height')),
|
||||||
'tbr': fmt['bitrate'],
|
'tbr': int_or_none(fmt.get('bitrate')),
|
||||||
'protocol': 'http',
|
'protocol': 'http',
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': internal_meta_json['title'],
|
'title': internal_meta_json['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': internal_meta_json['imageUrl'],
|
'thumbnail': internal_meta_json.get('imageUrl'),
|
||||||
'description': internal_meta_json['shortPreamble'],
|
'description': internal_meta_json.get('shortPreamble'),
|
||||||
'timestamp': internal_meta_json['timePublished'],
|
'timestamp': int_or_none(internal_meta_json.get('timePublished')),
|
||||||
'duration': internal_meta_json['duration'],
|
'duration': int_or_none(internal_meta_json.get('duration')),
|
||||||
'view_count': internal_meta_json['views'],
|
'view_count': int_or_none(internal_meta_json.get('views')),
|
||||||
}
|
}
|
||||||
|
@@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||||
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||||
|
|
||||||
|
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
|
||||||
|
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||||
|
|
||||||
if re.search(r'[\?&]rss($|[=&])', url):
|
if re.search(r'[\?&]rss($|[=&])', url):
|
||||||
doc = parse_xml(webpage)
|
doc = parse_xml(webpage)
|
||||||
if doc.tag == 'rss':
|
if doc.tag == 'rss':
|
||||||
|
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
|
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
|
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
|
103
youtube_dl/extractor/beatportpro.py
Normal file
103
youtube_dl/extractor/beatportpro.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class BeatportProIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
|
||||||
|
'md5': 'b3c34d8639a2f6a7f734382358478887',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5379371',
|
||||||
|
'display_id': 'synesthesia-original-mix',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Froxic - Synesthesia (Original Mix)',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
|
||||||
|
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3756896',
|
||||||
|
'display_id': 'love-and-war-original-mix',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
|
||||||
|
'md5': 'a1fd8e8046de3950fd039304c186c05f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4991738',
|
||||||
|
'display_id': 'birds-original-mix',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
track_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
playables = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.Playables\s*=\s*({.+?});', webpage,
|
||||||
|
'playables info', flags=re.DOTALL),
|
||||||
|
track_id)
|
||||||
|
|
||||||
|
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
|
||||||
|
|
||||||
|
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
|
||||||
|
if track['mix']:
|
||||||
|
title += ' (' + track['mix'] + ')'
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for ext, info in track['preview'].items():
|
||||||
|
if not info['url']:
|
||||||
|
continue
|
||||||
|
fmt = {
|
||||||
|
'url': info['url'],
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
||||||
|
if ext == 'mp3':
|
||||||
|
fmt['preference'] = 0
|
||||||
|
fmt['acodec'] = 'mp3'
|
||||||
|
fmt['abr'] = 96
|
||||||
|
fmt['asr'] = 44100
|
||||||
|
elif ext == 'mp4':
|
||||||
|
fmt['preference'] = 1
|
||||||
|
fmt['acodec'] = 'aac'
|
||||||
|
fmt['abr'] = 96
|
||||||
|
fmt['asr'] = 44100
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
images = []
|
||||||
|
for name, info in track['images'].items():
|
||||||
|
image_url = info.get('url')
|
||||||
|
if name == 'dynamic' or not image_url:
|
||||||
|
continue
|
||||||
|
image = {
|
||||||
|
'id': name,
|
||||||
|
'url': image_url,
|
||||||
|
'height': int_or_none(info.get('height')),
|
||||||
|
'width': int_or_none(info.get('width')),
|
||||||
|
}
|
||||||
|
images.append(image)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': compat_str(track.get('id')) or track_id,
|
||||||
|
'display_id': track.get('slug') or display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': images,
|
||||||
|
}
|
@@ -172,6 +172,7 @@ class BlipTVIE(InfoExtractor):
|
|||||||
'width': int_or_none(media_content.get('width')),
|
'width': int_or_none(media_content.get('width')),
|
||||||
'height': int_or_none(media_content.get('height')),
|
'height': int_or_none(media_content.get('height')),
|
||||||
})
|
})
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = self.extract_subtitles(video_id, subtitles_urls)
|
subtitles = self.extract_subtitles(video_id, subtitles_urls)
|
||||||
|
@@ -6,32 +6,39 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class BloombergIE(InfoExtractor):
|
class BloombergIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
|
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
|
||||||
# The md5 checksum changes
|
# The md5 checksum changes
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||||
'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
|
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
name = self._match_id(url)
|
name = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
|
video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
|
||||||
f4m_url = self._search_regex(
|
|
||||||
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
|
||||||
'f4m url')
|
|
||||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||||
|
|
||||||
|
embed_info = self._download_json(
|
||||||
|
'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
|
||||||
|
formats = []
|
||||||
|
for stream in embed_info['streams']:
|
||||||
|
if stream["muxing_format"] == "TS":
|
||||||
|
formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
|
||||||
|
else:
|
||||||
|
formats.extend(self._extract_f4m_formats(stream['url'], video_id))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': name.split('-')[-1],
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': self._extract_f4m_formats(f4m_url, name),
|
'formats': formats,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
}
|
}
|
||||||
|
@@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
|
|||||||
'tbr': media['bitRate'],
|
'tbr': media['bitRate'],
|
||||||
'width': media['width'],
|
'width': media['width'],
|
||||||
'height': media['height'],
|
'height': media['height'],
|
||||||
} for media in info['media']]
|
} for media in info['media'] if media.get('mediaPurpose') == 'play']
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
formats.append({
|
formats.append({
|
||||||
|
@@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
|
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
|
||||||
object_str = fix_xml_ampersands(object_str)
|
object_str = fix_xml_ampersands(object_str)
|
||||||
|
|
||||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
try:
|
||||||
|
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||||
|
except xml.etree.ElementTree.ParseError:
|
||||||
|
return
|
||||||
|
|
||||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||||
if fv_el is not None:
|
if fv_el is not None:
|
||||||
@@ -183,9 +186,9 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
|
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
|
||||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||||
).+?</object>''',
|
).+?>\s*</object>''',
|
||||||
webpage)
|
webpage)
|
||||||
return [cls._build_brighcove_url(m) for m in matches]
|
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
@@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
file_key = self._search_regex(
|
file_key = self._search_regex(
|
||||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||||
|
webpage, 'file_key')
|
||||||
|
|
||||||
return self._extract_video(video_host, video_id, file_key)
|
return self._extract_video(video_host, video_id, file_key)
|
||||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
@@ -45,6 +45,12 @@ class CNNIE(InfoExtractor):
|
|||||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||||
'upload_date': '20141222',
|
'upload_date': '20141222',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -324,7 +324,7 @@ class InfoExtractor(object):
|
|||||||
self._downloader.report_warning(errmsg)
|
self._downloader.report_warning(errmsg)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
|
||||||
""" Returns a tuple (page content as string, URL handle) """
|
""" Returns a tuple (page content as string, URL handle) """
|
||||||
# Strip hashes from the URL (#1038)
|
# Strip hashes from the URL (#1038)
|
||||||
if isinstance(url_or_request, (compat_str, str)):
|
if isinstance(url_or_request, (compat_str, str)):
|
||||||
@@ -334,14 +334,11 @@ class InfoExtractor(object):
|
|||||||
if urlh is False:
|
if urlh is False:
|
||||||
assert not fatal
|
assert not fatal
|
||||||
return False
|
return False
|
||||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
|
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||||
return (content, urlh)
|
return (content, urlh)
|
||||||
|
|
||||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
|
@staticmethod
|
||||||
content_type = urlh.headers.get('Content-Type', '')
|
def _guess_encoding_from_content(content_type, webpage_bytes):
|
||||||
webpage_bytes = urlh.read()
|
|
||||||
if prefix is not None:
|
|
||||||
webpage_bytes = prefix + webpage_bytes
|
|
||||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
encoding = m.group(1)
|
encoding = m.group(1)
|
||||||
@@ -354,6 +351,16 @@ class InfoExtractor(object):
|
|||||||
encoding = 'utf-16'
|
encoding = 'utf-16'
|
||||||
else:
|
else:
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
|
return encoding
|
||||||
|
|
||||||
|
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||||
|
content_type = urlh.headers.get('Content-Type', '')
|
||||||
|
webpage_bytes = urlh.read()
|
||||||
|
if prefix is not None:
|
||||||
|
webpage_bytes = prefix + webpage_bytes
|
||||||
|
if not encoding:
|
||||||
|
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
|
||||||
if self._downloader.params.get('dump_intermediate_pages', False):
|
if self._downloader.params.get('dump_intermediate_pages', False):
|
||||||
try:
|
try:
|
||||||
url = url_or_request.get_full_url()
|
url = url_or_request.get_full_url()
|
||||||
@@ -410,13 +417,13 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||||
""" Returns the data of the page as a string """
|
""" Returns the data of the page as a string """
|
||||||
success = False
|
success = False
|
||||||
try_count = 0
|
try_count = 0
|
||||||
while success is False:
|
while success is False:
|
||||||
try:
|
try:
|
||||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||||
success = True
|
success = True
|
||||||
except compat_http_client.IncompleteRead as e:
|
except compat_http_client.IncompleteRead as e:
|
||||||
try_count += 1
|
try_count += 1
|
||||||
@@ -431,10 +438,10 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(self, url_or_request, video_id,
|
||||||
note='Downloading XML', errnote='Unable to download XML',
|
note='Downloading XML', errnote='Unable to download XML',
|
||||||
transform_source=None, fatal=True):
|
transform_source=None, fatal=True, encoding=None):
|
||||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
xml_string = self._download_webpage(
|
xml_string = self._download_webpage(
|
||||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
|
||||||
if xml_string is False:
|
if xml_string is False:
|
||||||
return xml_string
|
return xml_string
|
||||||
if transform_source:
|
if transform_source:
|
||||||
@@ -445,9 +452,10 @@ class InfoExtractor(object):
|
|||||||
note='Downloading JSON metadata',
|
note='Downloading JSON metadata',
|
||||||
errnote='Unable to download JSON metadata',
|
errnote='Unable to download JSON metadata',
|
||||||
transform_source=None,
|
transform_source=None,
|
||||||
fatal=True):
|
fatal=True, encoding=None):
|
||||||
json_string = self._download_webpage(
|
json_string = self._download_webpage(
|
||||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||||
|
encoding=encoding)
|
||||||
if (not fatal) and json_string is False:
|
if (not fatal) and json_string is False:
|
||||||
return None
|
return None
|
||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
@@ -492,7 +500,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
# Methods for following #608
|
# Methods for following #608
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def url_result(url, ie=None, video_id=None):
|
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||||
"""Returns a url that points to a page that should be processed"""
|
"""Returns a url that points to a page that should be processed"""
|
||||||
# TODO: ie should be the class used for getting the info
|
# TODO: ie should be the class used for getting the info
|
||||||
video_info = {'_type': 'url',
|
video_info = {'_type': 'url',
|
||||||
@@ -500,6 +508,8 @@ class InfoExtractor(object):
|
|||||||
'ie_key': ie}
|
'ie_key': ie}
|
||||||
if video_id is not None:
|
if video_id is not None:
|
||||||
video_info['id'] = video_id
|
video_info['id'] = video_id
|
||||||
|
if video_title is not None:
|
||||||
|
video_info['title'] = video_title
|
||||||
return video_info
|
return video_info
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -822,7 +832,7 @@ class InfoExtractor(object):
|
|||||||
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
@@ -839,7 +849,7 @@ class InfoExtractor(object):
|
|||||||
m3u8_id=None):
|
m3u8_id=None):
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
|
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
@@ -883,8 +893,13 @@ class InfoExtractor(object):
|
|||||||
formats.append({'url': format_url(line)})
|
formats.append({'url': format_url(line)})
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||||
|
format_id = []
|
||||||
|
if m3u8_id:
|
||||||
|
format_id.append(m3u8_id)
|
||||||
|
last_media_name = last_media.get('NAME') if last_media else None
|
||||||
|
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||||
f = {
|
f = {
|
||||||
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
|
'format_id': '-'.join(format_id),
|
||||||
'url': format_url(line.strip()),
|
'url': format_url(line.strip()),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
@@ -1057,6 +1072,9 @@ class InfoExtractor(object):
|
|||||||
def _get_automatic_captions(self, *args, **kwargs):
|
def _get_automatic_captions(self, *args, **kwargs):
|
||||||
raise NotImplementedError("This method must be implemented by subclasses")
|
raise NotImplementedError("This method must be implemented by subclasses")
|
||||||
|
|
||||||
|
def _subtitles_timecode(self, seconds):
|
||||||
|
return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
60
youtube_dl/extractor/crooksandliars.py
Normal file
60
youtube_dl/extractor/crooksandliars.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CrooksAndLiarsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8RUoRhRi',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
|
||||||
|
'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'timestamp': 1428207000,
|
||||||
|
'upload_date': '20150405',
|
||||||
|
'uploader': 'Heather',
|
||||||
|
'duration': 236,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
manifest = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': item['url'],
|
||||||
|
'format_id': item['type'],
|
||||||
|
'quality': quality(item['type']),
|
||||||
|
} for item in manifest['flavors'] if item['mime'].startswith('video/')]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': manifest['title'],
|
||||||
|
'description': manifest.get('description'),
|
||||||
|
'thumbnail': self._proto_relative_url(manifest.get('poster')),
|
||||||
|
'timestamp': int_or_none(manifest.get('created')),
|
||||||
|
'uploader': manifest.get('author'),
|
||||||
|
'duration': int_or_none(manifest.get('duration')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -23,7 +23,6 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
inc,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
|
|
||||||
key = obfuscate_key(id)
|
key = obfuscate_key(id)
|
||||||
|
|
||||||
class Counter:
|
|
||||||
__value = iv
|
|
||||||
|
|
||||||
def next_value(self):
|
|
||||||
temp = self.__value
|
|
||||||
self.__value = inc(self.__value)
|
|
||||||
return temp
|
|
||||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||||
return zlib.decompress(decrypted_data)
|
return zlib.decompress(decrypted_data)
|
||||||
|
|
||||||
|
@@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
def _build_request(url):
|
def _build_request(url):
|
||||||
"""Build a request with the family filter disabled"""
|
"""Build a request with the family filter disabled"""
|
||||||
request = compat_urllib_request.Request(url)
|
request = compat_urllib_request.Request(url)
|
||||||
request.add_header('Cookie', 'family_filter=off')
|
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||||
request.add_header('Cookie', 'ff=off')
|
|
||||||
return request
|
return request
|
||||||
|
|
||||||
|
|
||||||
@@ -46,13 +45,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||||
'md5': '392c4b85a60a90dc4792da41ce3144eb',
|
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'x33vw9',
|
'id': 'x2iuewm',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': 'Amphora Alex and Van .',
|
'uploader': 'IGN',
|
||||||
'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
|
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Vevo video
|
# Vevo video
|
||||||
@@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
|||||||
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
||||||
|
|
||||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
||||||
embed_page = self._download_webpage(embed_url, video_id,
|
embed_request = self._build_request(embed_url)
|
||||||
'Downloading embed page')
|
embed_page = self._download_webpage(
|
||||||
|
embed_request, video_id, 'Downloading embed page')
|
||||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||||
'video info', flags=re.MULTILINE)
|
'video info', flags=re.MULTILINE)
|
||||||
info = json.loads(info)
|
info = json.loads(info)
|
||||||
@@ -224,7 +224,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
|
|
||||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||||
IE_NAME = 'dailymotion:user'
|
IE_NAME = 'dailymotion:user'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
|
||||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||||
|
73
youtube_dl/extractor/dhm.py
Normal file
73
youtube_dl/extractor/dhm.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DHMIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
|
||||||
|
'md5': '11c475f670209bf6acca0b2b7ef51827',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-marshallplan-at-work-in-west-germany',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
|
||||||
|
'description': 'md5:1fabd480c153f97b07add61c44407c82',
|
||||||
|
'duration': 660,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
|
||||||
|
'md5': '09890226332476a3e3f6f2cb74734aa5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rolle-1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'ROLLE 1',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
playlist_url = self._search_regex(
|
||||||
|
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
||||||
|
|
||||||
|
playlist = self._download_xml(playlist_url, video_id)
|
||||||
|
|
||||||
|
track = playlist.find(
|
||||||
|
'./{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
|
||||||
|
|
||||||
|
video_url = xpath_text(
|
||||||
|
track, './{http://xspf.org/ns/0/}location',
|
||||||
|
'video url', fatal=True)
|
||||||
|
thumbnail = xpath_text(
|
||||||
|
track, './{http://xspf.org/ns/0/}image',
|
||||||
|
'thumbnail')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
||||||
|
webpage, 'title').strip()
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<p><strong>Description:</strong>(.+?)</p>',
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
||||||
|
webpage, 'duration', default=None))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
112
youtube_dl/extractor/douyutv.py
Normal file
112
youtube_dl/extractor/douyutv.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import time
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (ExtractorError, unescapeHTML)
|
||||||
|
from ..compat import (compat_str, compat_basestring)
|
||||||
|
|
||||||
|
|
||||||
|
class DouyuTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.douyutv.com/iseven',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17732',
|
||||||
|
'display_id': 'iseven',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': '7师傅',
|
||||||
|
'uploader_id': '431925',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.douyutv.com/85982',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85982',
|
||||||
|
'display_id': '85982',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'douyu小漠',
|
||||||
|
'uploader_id': '3769985',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
if video_id.isdigit():
|
||||||
|
room_id = video_id
|
||||||
|
else:
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
room_id = self._html_search_regex(
|
||||||
|
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||||
|
|
||||||
|
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||||
|
room_id, int(time.time()))
|
||||||
|
|
||||||
|
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||||
|
config = self._download_json(
|
||||||
|
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
data = config['data']
|
||||||
|
|
||||||
|
error_code = config.get('error', 0)
|
||||||
|
if error_code is not 0:
|
||||||
|
error_desc = 'Server reported error %i' % error_code
|
||||||
|
if isinstance(data, (compat_str, compat_basestring)):
|
||||||
|
error_desc += ': ' + data
|
||||||
|
raise ExtractorError(error_desc, expected=True)
|
||||||
|
|
||||||
|
show_status = data.get('show_status')
|
||||||
|
# 1 = live, 2 = offline
|
||||||
|
if show_status == '2':
|
||||||
|
raise ExtractorError(
|
||||||
|
'Live stream is offline', expected=True)
|
||||||
|
|
||||||
|
base_url = data['rtmp_url']
|
||||||
|
live_path = data['rtmp_live']
|
||||||
|
|
||||||
|
title = self._live_title(unescapeHTML(data['room_name']))
|
||||||
|
description = data.get('show_details')
|
||||||
|
thumbnail = data.get('room_src')
|
||||||
|
|
||||||
|
uploader = data.get('nickname')
|
||||||
|
uploader_id = data.get('owner_uid')
|
||||||
|
|
||||||
|
multi_formats = data.get('rtmp_multi_bitrate')
|
||||||
|
if not isinstance(multi_formats, dict):
|
||||||
|
multi_formats = {}
|
||||||
|
multi_formats['live'] = live_path
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': '%s/%s' % (base_url, format_path),
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': 1 if format_id == 'live' else 0,
|
||||||
|
} for format_id, format_path in multi_formats.items()]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': room_id,
|
||||||
|
'display_id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
@@ -3,22 +3,25 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DreiSatIE(InfoExtractor):
|
class DreiSatIE(InfoExtractor):
|
||||||
IE_NAME = '3sat'
|
IE_NAME = '3sat'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
|
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||||
'md5': '9dcfe344732808dbfcc901537973c922',
|
'md5': 'be37228896d30a88f315b638900a026e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '36983',
|
'id': '45918',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Kaffeeland Schweiz',
|
'title': 'Waidmannsheil',
|
||||||
'description': 'md5:cc4424b18b75ae9948b13929a0814033',
|
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||||
'uploader': '3sat',
|
'uploader': '3sat',
|
||||||
'upload_date': '20130622'
|
'upload_date': '20140913'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -28,6 +31,15 @@ class DreiSatIE(InfoExtractor):
|
|||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
|
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
|
||||||
|
|
||||||
|
status_code = details_doc.find('./status/statuscode')
|
||||||
|
if status_code is not None and status_code.text != 'ok':
|
||||||
|
code = status_code.text
|
||||||
|
if code == 'notVisibleAnymore':
|
||||||
|
message = 'Video %s is not available' % video_id
|
||||||
|
else:
|
||||||
|
message = '%s returned error: %s' % (self.IE_NAME, code)
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
|
||||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'width': int(te.attrib['key'].partition('x')[0]),
|
'width': int(te.attrib['key'].partition('x')[0]),
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor, ExtractorError
|
from .common import InfoExtractor, ExtractorError
|
||||||
@@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
|
'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
|
||||||
'md5': '4a7e1dd65cdb2643500a3f753c942f25',
|
'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'partiets-mand-7-8',
|
'id': 'panisk-paske-5',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Partiets mand (7:8)',
|
'title': 'Panisk Påske (5)',
|
||||||
'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
|
'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
|
||||||
'timestamp': 1403047940,
|
'timestamp': 1426984612,
|
||||||
'upload_date': '20140617',
|
'upload_date': '20150322',
|
||||||
'duration': 1299.040,
|
'duration': 1455,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,6 +27,10 @@ class DRTVIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if '>Programmet er ikke længere tilgængeligt' in webpage:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Video %s is not available' % video_id, expected=True)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
@@ -28,12 +28,12 @@ class DumpIE(InfoExtractor):
|
|||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
|
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
|
||||||
|
|
||||||
thumb = self._og_search_thumbnail(webpage)
|
title = self._og_search_title(webpage)
|
||||||
title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': thumb,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
60
youtube_dl/extractor/dumpert.py
Normal file
60
youtube_dl/extractor/dumpert.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_request
|
||||||
|
from ..utils import qualities
|
||||||
|
|
||||||
|
|
||||||
|
class DumpertIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
||||||
|
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6646981/951bc60f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ik heb nieuws voor je',
|
||||||
|
'description': 'Niet schrikken hoor',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Cookie', 'nsfw=1')
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
files_base64 = self._search_regex(
|
||||||
|
r'data-files="([^"]+)"', webpage, 'data files')
|
||||||
|
|
||||||
|
files = self._parse_json(
|
||||||
|
base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': quality(format_id),
|
||||||
|
} for format_id, video_url in files.items() if format_id != 'still']
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'title', webpage) or self._og_search_title(webpage)
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage) or self._og_search_description(webpage)
|
||||||
|
thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats
|
||||||
|
}
|
99
youtube_dl/extractor/eagleplatform.py
Normal file
99
youtube_dl/extractor/eagleplatform.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EaglePlatformIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
eagleplatform:(?P<custom_host>[^/]+):|
|
||||||
|
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
# http://lenta.ru/news/2015/03/06/navalny/
|
||||||
|
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
||||||
|
'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '227304',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Навальный вышел на свободу',
|
||||||
|
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 87,
|
||||||
|
'view_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# http://muz-tv.ru/play/7129/
|
||||||
|
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
||||||
|
'url': 'eagleplatform:media.clipyou.ru:12820',
|
||||||
|
'md5': '6c2ebeab03b739597ce8d86339d5a905',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12820',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "'O Sole Mio",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 216,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'skip': 'Georestricted',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _handle_error(self, response):
|
||||||
|
status = int_or_none(response.get('status', 200))
|
||||||
|
if status != 200:
|
||||||
|
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||||
|
|
||||||
|
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||||
|
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||||
|
self._handle_error(response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
|
||||||
|
|
||||||
|
player_data = self._download_json(
|
||||||
|
'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
|
||||||
|
|
||||||
|
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
|
||||||
|
|
||||||
|
title = media['title']
|
||||||
|
description = media.get('description')
|
||||||
|
thumbnail = media.get('snapshot')
|
||||||
|
duration = int_or_none(media.get('duration'))
|
||||||
|
view_count = int_or_none(media.get('views'))
|
||||||
|
|
||||||
|
age_restriction = media.get('age_restriction')
|
||||||
|
age_limit = None
|
||||||
|
if age_restriction:
|
||||||
|
age_limit = 0 if age_restriction == 'allow_all' else 18
|
||||||
|
|
||||||
|
m3u8_data = self._download_json(
|
||||||
|
media['sources']['secure_m3u8']['auto'],
|
||||||
|
video_id, 'Downloading m3u8 JSON')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_data['data'][0], video_id,
|
||||||
|
'mp4', entry_protocol='m3u8_native')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
playlist_id = self._match_id(url)
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
json_like = self._search_regex(
|
data = self._parse_json(
|
||||||
r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
|
self._search_regex(
|
||||||
data = json.loads(json_like)
|
r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
|
||||||
|
playlist_id)
|
||||||
|
|
||||||
session = str(random.randint(0, 1000000000))
|
session = str(random.randint(0, 1000000000))
|
||||||
mix_id = data['id']
|
mix_id = data['id']
|
||||||
track_count = data['tracks_count']
|
track_count = data['tracks_count']
|
||||||
duration = data['duration']
|
duration = data['duration']
|
||||||
avg_song_duration = float(duration) / track_count
|
avg_song_duration = float(duration) / track_count
|
||||||
|
# duration is sometimes negative, use predefined avg duration
|
||||||
|
if avg_song_duration <= 0:
|
||||||
|
avg_song_duration = 300
|
||||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||||
next_url = first_url
|
next_url = first_url
|
||||||
entries = []
|
entries = []
|
||||||
|
@@ -13,15 +13,15 @@ from ..utils import (
|
|||||||
class EllenTVIE(InfoExtractor):
|
class EllenTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
|
||||||
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
'md5': '8e3c576bf2e9bfff4d76565f56f94c9c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0-7jqrsr18',
|
'id': '0-ipq1gsai',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
'title': 'Fast Fingers of Fate',
|
||||||
'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
|
'description': 'md5:686114ced0a032926935e9015ee794ac',
|
||||||
'timestamp': 1406876400,
|
'timestamp': 1428033600,
|
||||||
'upload_date': '20140801',
|
'upload_date': '20150403',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
|
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
|
||||||
@@ -40,14 +40,15 @@ class EllenTVIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._html_search_meta('VideoURL', webpage, 'url')
|
|
||||||
|
video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True)
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
|
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
'description', webpage, 'description') or self._og_search_description(webpage)
|
'description', webpage, 'description') or self._og_search_description(webpage)
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
r'<span class="publish-date"><time datetime="([^"]+)">',
|
r'<span class="publish-date"><time datetime="([^"]+)">',
|
||||||
webpage, 'timestamp'))
|
webpage, 'timestamp', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,11 +1,17 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class EroProfileIE(InfoExtractor):
|
class EroProfileIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
|
||||||
|
_NETRC_MACHINE = 'eroprofile'
|
||||||
|
_TESTS = [{
|
||||||
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
|
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
|
||||||
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
|
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):
|
|||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
|
||||||
|
'md5': '1baa9602ede46ce904c431f5418d8916',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1133519',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'skip': 'Requires login',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
query = compat_urllib_parse.urlencode({
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'url': 'http://www.eroprofile.com/',
|
||||||
|
})
|
||||||
|
login_url = self._LOGIN_URL + query
|
||||||
|
login_page = self._download_webpage(login_url, None, False)
|
||||||
|
|
||||||
|
m = re.search(r'Your username or password was incorrect\.', login_page)
|
||||||
|
if m:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Wrong username and/or password.', expected=True)
|
||||||
|
|
||||||
|
self.report_login()
|
||||||
|
redirect_url = self._search_regex(
|
||||||
|
r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
|
||||||
|
self._download_webpage(redirect_url, None, False)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
m = re.search(r'You must be logged in to view this video\.', webpage)
|
||||||
|
if m:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video requires login. Please specify a username and password and try again.', expected=True)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||||
webpage, 'video id', default=None)
|
webpage, 'video id', default=None)
|
||||||
|
@@ -4,11 +4,11 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_parse_qs,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
qualities,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '652431',
|
'id': '652431',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -49,19 +49,27 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
flash_vars = compat_parse_qs(self._search_regex(
|
||||||
r'video_url=(.+?)&', webpage, 'video_url'))
|
r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
|
||||||
format = path.split('/')[5].split('_')[:2]
|
formats = []
|
||||||
format = "-".join(format)
|
quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
|
||||||
|
for k, vals in flash_vars.items():
|
||||||
|
m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
|
||||||
|
if m is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': m.group('quality'),
|
||||||
|
'quality': quality(m.group('quality')),
|
||||||
|
'url': vals[0],
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
|
'formats': formats,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'url': video_url,
|
|
||||||
'format': format,
|
|
||||||
'format_id': format,
|
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
49
youtube_dl/extractor/footyroom.py
Normal file
49
youtube_dl/extractor/footyroom.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class FootyRoomIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'schalke-04-0-2-real-madrid-2015-02',
|
||||||
|
'title': 'Schalke 04 0 – 2 Real Madrid',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'georgia-0-2-germany-2015-03',
|
||||||
|
'title': 'Georgia 0 – 2 Germany',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
|
||||||
|
playlist_id)
|
||||||
|
|
||||||
|
playlist_title = self._og_search_title(webpage)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for video in playlist:
|
||||||
|
payload = video.get('payload')
|
||||||
|
if not payload:
|
||||||
|
continue
|
||||||
|
playwire_url = self._search_regex(
|
||||||
|
r'data-config="([^"]+)"', payload,
|
||||||
|
'playwire url', default=None)
|
||||||
|
if playwire_url:
|
||||||
|
entries.append(self.url_result(self._proto_relative_url(
|
||||||
|
playwire_url, 'http:'), 'Playwire'))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title)
|
@@ -14,7 +14,9 @@ from ..utils import (
|
|||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
determine_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -50,7 +52,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
format_id = video['format']
|
format_id = video['format']
|
||||||
if video_url.endswith('.f4m'):
|
ext = determine_ext(video_url)
|
||||||
|
if ext == 'f4m':
|
||||||
if georestricted:
|
if georestricted:
|
||||||
# See https://github.com/rg3/youtube-dl/issues/3963
|
# See https://github.com/rg3/youtube-dl/issues/3963
|
||||||
# m3u8 urls work fine
|
# m3u8 urls work fine
|
||||||
@@ -60,12 +63,9 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||||
if f4m_url:
|
if f4m_url:
|
||||||
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
|
||||||
for f4m_format in f4m_formats:
|
elif ext == 'm3u8':
|
||||||
f4m_format['preference'] = 1
|
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
|
||||||
formats.extend(f4m_formats)
|
|
||||||
elif video_url.endswith('.m3u8'):
|
|
||||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
|
|
||||||
elif video_url.startswith('rtmp'):
|
elif video_url.startswith('rtmp'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@@ -86,7 +86,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
'title': info['titre'],
|
'title': info['titre'],
|
||||||
'description': clean_html(info['synopsis']),
|
'description': clean_html(info['synopsis']),
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||||
'duration': parse_duration(info['duree']),
|
'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
|
||||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
@@ -260,22 +260,28 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
|
'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
|
||||||
'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
|
'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'EV_22853',
|
'id': 'EV_50111',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Dans les jardins de William Christie - Le Camus',
|
'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
|
||||||
'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
|
'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
|
||||||
'upload_date': '20140829',
|
'upload_date': '20150320',
|
||||||
'timestamp': 1409317200,
|
'timestamp': 1426892400,
|
||||||
|
'duration': 2760.9,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
|
|
||||||
|
if ">Ce live n'est plus disponible en replay<" in webpage:
|
||||||
|
raise ExtractorError('Video %s is not available' % name, expected=True)
|
||||||
|
|
||||||
video_id, catalogue = self._search_regex(
|
video_id, catalogue = self._search_regex(
|
||||||
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
|
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
|
||||||
|
|
||||||
|
@@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
bitrates.sort()
|
bitrates.sort()
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for bitrate in bitrates:
|
for bitrate in bitrates:
|
||||||
for link in links:
|
for link in links:
|
||||||
formats.append({
|
formats.append({
|
||||||
@@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'vbr': bitrate,
|
'vbr': bitrate,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
|
||||||
|
subtitles[src_lang] = [{
|
||||||
|
'ext': src.split('/')[-1],
|
||||||
|
'url': 'http://www.funnyordie.com%s' % src,
|
||||||
|
}]
|
||||||
|
|
||||||
post_json = self._search_regex(
|
post_json = self._search_regex(
|
||||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||||
post = json.loads(post_json)
|
post = json.loads(post_json)
|
||||||
@@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
'description': post.get('description'),
|
'description': post.get('description'),
|
||||||
'thumbnail': post.get('picture'),
|
'thumbnail': post.get('picture'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
70
youtube_dl/extractor/gamersyde.py
Normal file
70
youtube_dl/extractor/gamersyde.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
parse_duration,
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GamersydeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
|
||||||
|
'md5': 'f38d400d32f19724570040d5ce3a505f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '34371',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 372,
|
||||||
|
'title': 'Bloodborne - Birth of a hero',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
|
||||||
|
display_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in playlist['sources']:
|
||||||
|
video_url = source.get('file')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
format_id = source.get('label')
|
||||||
|
f = {
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
}
|
||||||
|
m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
|
||||||
|
if m:
|
||||||
|
f.update({
|
||||||
|
'height': int(m.group('height')),
|
||||||
|
'fps': int(m.group('fps')),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = remove_start(playlist['title'], '%s - ' % video_id)
|
||||||
|
thumbnail = playlist.get('image')
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
38
youtube_dl/extractor/gazeta.py
Normal file
38
youtube_dl/extractor/gazeta.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class GazetaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
||||||
|
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '205566',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '«70–80 процентов гражданских в Донецке на грани голода»',
|
||||||
|
'description': 'md5:38617526050bd17b234728e7f9620a71',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
embed_url = '%s?p=embed' % mobj.group('url')
|
||||||
|
embed_page = self._download_webpage(
|
||||||
|
embed_url, display_id, 'Downloading embed page')
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
|
@@ -29,10 +29,12 @@ from ..utils import (
|
|||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
|
from .nbc import NBCSportsVPlayerIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .smotri import SmotriIE
|
from .smotri import SmotriIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
|
from .udn import UDNEmbedIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -527,6 +529,17 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Viddler'],
|
'add_ie': ['Viddler'],
|
||||||
},
|
},
|
||||||
|
# Libsyn embed
|
||||||
|
{
|
||||||
|
'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3377616',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||||
|
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||||
|
'upload_date': '20150220',
|
||||||
|
},
|
||||||
|
},
|
||||||
# jwplayer YouTube
|
# jwplayer YouTube
|
||||||
{
|
{
|
||||||
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
||||||
@@ -570,6 +583,66 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'John Carlson Postgame 2/25/15',
|
'title': 'John Carlson Postgame 2/25/15',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Eagle.Platform embed (generic URL)
|
||||||
|
{
|
||||||
|
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '227304',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Навальный вышел на свободу',
|
||||||
|
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 87,
|
||||||
|
'view_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# ClipYou (Eagle.Platform) embed (custom URL)
|
||||||
|
{
|
||||||
|
'url': 'http://muz-tv.ru/play/7129/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12820',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "'O Sole Mio",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 216,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# Pladform embed
|
||||||
|
{
|
||||||
|
'url': 'http://muz-tv.ru/kinozal/view/7400/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100183293',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
|
||||||
|
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 694,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# Playwire embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3519514',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
|
||||||
|
'thumbnail': 're:^https?://.*\.png$',
|
||||||
|
'duration': 45.115,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# 5min embed
|
||||||
|
{
|
||||||
|
'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
|
||||||
|
'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '518726732',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Facebook Creates "On This Day" | Crunch Report',
|
||||||
|
},
|
||||||
|
},
|
||||||
# RSS feed with enclosure
|
# RSS feed with enclosure
|
||||||
{
|
{
|
||||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
@@ -579,6 +652,81 @@ class GenericIE(InfoExtractor):
|
|||||||
'upload_date': '20150228',
|
'upload_date': '20150228',
|
||||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
# Crooks and Liars embed
|
||||||
|
{
|
||||||
|
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8RUoRhRi',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
|
||||||
|
'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
|
||||||
|
'timestamp': 1428207000,
|
||||||
|
'upload_date': '20150405',
|
||||||
|
'uploader': 'Heather',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# Crooks and Liars external embed
|
||||||
|
{
|
||||||
|
'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MTE3MjUtMzQ2MzA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
|
||||||
|
'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
|
||||||
|
'timestamp': 1265032391,
|
||||||
|
'upload_date': '20100201',
|
||||||
|
'uploader': 'Heather',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# NBC Sports vplayer embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ln7x1qSThw4k',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': "PFT Live: New leader in the 'new-look' defense",
|
||||||
|
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# UDN embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.udn.com/news/story/7314/822787',
|
||||||
|
'md5': 'fd2060e988c326991037b9aff9df21a6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '300346',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '中一中男師變性 全校師生力挺',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Ooyala embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'VIDEO: Index/Match versus VLOOKUP.',
|
||||||
|
'title': 'This is what separates the Excel masters from the wannabes',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 downloads
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Contains a SMIL manifest
|
||||||
|
{
|
||||||
|
'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'file',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '+ Football: Lottery Champions League Europe',
|
||||||
|
'uploader': 'www.telewebion.com',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmpe downloads
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -967,10 +1115,24 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for NYTimes player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for Libsyn player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
|
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||||
|
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||||
|
|
||||||
@@ -1155,6 +1317,54 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||||
|
|
||||||
|
# Look for Eagle.Platform embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'EaglePlatform')
|
||||||
|
|
||||||
|
# Look for ClipYou (uses Eagle.Platform) embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
|
||||||
|
|
||||||
|
# Look for Pladform embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Pladform')
|
||||||
|
|
||||||
|
# Look for Playwire embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for 5min embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
|
||||||
|
|
||||||
|
# Look for Crooks and Liars embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for NBC Sports VPlayer embeds
|
||||||
|
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
|
||||||
|
if nbc_sports_url:
|
||||||
|
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
# Look for UDN embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(
|
||||||
|
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
@@ -1211,10 +1421,16 @@ class GenericIE(InfoExtractor):
|
|||||||
# HTML5 video
|
# HTML5 video
|
||||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
|
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||||
found = re.search(
|
found = re.search(
|
||||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||||
webpage)
|
webpage)
|
||||||
|
if not found:
|
||||||
|
# Look also in Refresh HTTP header
|
||||||
|
refresh_header = head_response.headers.get('Refresh')
|
||||||
|
if refresh_header:
|
||||||
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
new_url = found.group(1)
|
new_url = found.group(1)
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
@@ -1238,13 +1454,22 @@ class GenericIE(InfoExtractor):
|
|||||||
# here's a fun little line of code for you:
|
# here's a fun little line of code for you:
|
||||||
video_id = os.path.splitext(video_id)[0]
|
video_id = os.path.splitext(video_id)[0]
|
||||||
|
|
||||||
entries.append({
|
if determine_ext(video_url) == 'smil':
|
||||||
'id': video_id,
|
entries.append({
|
||||||
'url': video_url,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
'formats': self._extract_smil_formats(video_url, video_id),
|
||||||
'title': video_title,
|
'uploader': video_uploader,
|
||||||
'age_limit': age_limit,
|
'title': video_title,
|
||||||
})
|
'age_limit': age_limit,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
entries.append({
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'title': video_title,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
})
|
||||||
|
|
||||||
if len(entries) == 1:
|
if len(entries) == 1:
|
||||||
return entries[0]
|
return entries[0]
|
||||||
|
@@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
|
|||||||
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
||||||
|
|
||||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
|
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
||||||
|
|
||||||
_VIDEOID_REGEXES = [
|
_VIDEOID_REGEXES = [
|
||||||
r'\bdata-video-id="(\d+)"',
|
r'\bdata-video-id="(\d+)"',
|
||||||
|
@@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
|
|||||||
|
|
||||||
if webpage is not None:
|
if webpage is not None:
|
||||||
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||||
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
|
return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
|
||||||
|
|
||||||
return (webpage, None)
|
return webpage, None
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self.ts = int(time.time() * 1000) # timestamp in millis
|
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||||
@@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
|
|||||||
swf_referer = None
|
swf_referer = None
|
||||||
if self.do_playerpage_request:
|
if self.do_playerpage_request:
|
||||||
(_, player_objs) = self._get_playerpage(url)
|
(_, player_objs) = self._get_playerpage(url)
|
||||||
if player_objs is not None:
|
if player_objs:
|
||||||
swf_referer = self._build_swf_referer(url, player_objs[0])
|
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||||
self.to_screen('SWF Referer: %s' % swf_referer)
|
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||||
|
|
||||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
determine_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -147,12 +148,27 @@ class HitboxLiveIE(HitboxIE):
|
|||||||
servers.append(base_url)
|
servers.append(base_url)
|
||||||
for stream in cdn.get('bitrates'):
|
for stream in cdn.get('bitrates'):
|
||||||
label = stream.get('label')
|
label = stream.get('label')
|
||||||
if label != 'Auto':
|
if label == 'Auto':
|
||||||
|
continue
|
||||||
|
stream_url = stream.get('url')
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
bitrate = int_or_none(stream.get('bitrate'))
|
||||||
|
if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
|
||||||
|
if not stream_url.startswith('http'):
|
||||||
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': '%s/%s' % (base_url, stream.get('url')),
|
'url': stream_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'vbr': stream.get('bitrate'),
|
'tbr': bitrate,
|
||||||
'resolution': label,
|
'format_note': label,
|
||||||
|
'rtmp_live': True,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': '%s/%s' % (base_url, stream_url),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'tbr': bitrate,
|
||||||
'rtmp_live': True,
|
'rtmp_live': True,
|
||||||
'format_note': host,
|
'format_note': host,
|
||||||
'page_url': url,
|
'page_url': url,
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5182',
|
'id': '114765',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
|
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
title = mobj.group(1)
|
title = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
xml_link = self._html_search_regex(
|
title = self._html_search_meta('name', webpage)
|
||||||
r'<param name="flashvars" value="config=(.*?)" />',
|
config_url = self._html_search_regex(
|
||||||
|
r'data-src="(/contenu/medias/video.php.*?)"',
|
||||||
webpage, 'config URL')
|
webpage, 'config URL')
|
||||||
|
config_url = 'http://www.jeuxvideo.com' + config_url
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'id=(\d+)',
|
||||||
xml_link, 'video ID')
|
config_url, 'video ID')
|
||||||
|
|
||||||
config = self._download_xml(
|
config = self._download_json(
|
||||||
xml_link, title, 'Downloading XML config')
|
config_url, title, 'Downloading JSON config')
|
||||||
info_json = config.find('format.json').text
|
|
||||||
info = json.loads(info_json)['versions'][0]
|
|
||||||
|
|
||||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
formats = [{
|
||||||
|
'url': source['file'],
|
||||||
|
'format_id': source['label'],
|
||||||
|
'resolution': source['label'],
|
||||||
|
} for source in reversed(config['sources'])]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': config.find('titre_video').text,
|
'title': title,
|
||||||
'ext': 'mp4',
|
'formats': formats,
|
||||||
'url': video_url,
|
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': config.find('image').text,
|
'thumbnail': config.get('image'),
|
||||||
}
|
}
|
||||||
|
96
youtube_dl/extractor/kanalplay.py
Normal file
96
youtube_dl/extractor/kanalplay.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KanalPlayIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Kanal 5/9/11 Play'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3270012277',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Saknar både dusch och avlopp',
|
||||||
|
'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
|
||||||
|
'duration': 2636.36,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _fix_subtitles(self, subs):
|
||||||
|
return '\r\n\r\n'.join(
|
||||||
|
'%s\r\n%s --> %s\r\n%s'
|
||||||
|
% (
|
||||||
|
num,
|
||||||
|
self._subtitles_timecode(item['startMillis'] / 1000.0),
|
||||||
|
self._subtitles_timecode(item['endMillis'] / 1000.0),
|
||||||
|
item['text'],
|
||||||
|
) for num, item in enumerate(subs, 1))
|
||||||
|
|
||||||
|
def _get_subtitles(self, channel_id, video_id):
|
||||||
|
subs = self._download_json(
|
||||||
|
'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
|
||||||
|
video_id, 'Downloading subtitles JSON', fatal=False)
|
||||||
|
return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
channel_id = mobj.group('channel_id')
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
reasons_for_no_streams = video.get('reasonsForNoStreams')
|
||||||
|
if reasons_for_no_streams:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
description = video.get('description')
|
||||||
|
duration = float_or_none(video.get('length'), 1000)
|
||||||
|
thumbnail = video.get('posterUrl')
|
||||||
|
|
||||||
|
stream_base_url = video['streamBaseUrl']
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': stream_base_url,
|
||||||
|
'play_path': stream['source'],
|
||||||
|
'ext': 'flv',
|
||||||
|
'tbr': float_or_none(stream.get('bitrate'), 1000),
|
||||||
|
'rtmp_real_time': True,
|
||||||
|
} for stream in video['streams']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
if video.get('hasSubtitle'):
|
||||||
|
subtitles = self.extract_subtitles(channel_id, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
|
|||||||
description = self._og_search_description(webpage, default=None)
|
description = self._og_search_description(webpage, default=None)
|
||||||
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
||||||
duration = int_or_none(flashvars.get('duration'))
|
duration = int_or_none(flashvars.get('duration'))
|
||||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
width = int_or_none(self._og_search_property(
|
||||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
'video:width', webpage, 'video width', default=None))
|
||||||
|
height = int_or_none(self._og_search_property(
|
||||||
|
'video:height', webpage, 'video height', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -88,12 +88,13 @@ class LetvIE(InfoExtractor):
|
|||||||
play_json_req = compat_urllib_request.Request(
|
play_json_req = compat_urllib_request.Request(
|
||||||
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||||
)
|
)
|
||||||
play_json_req.add_header(
|
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||||
'Ytdl-request-proxy',
|
if cn_verification_proxy:
|
||||||
self._downloader.params.get('cn_verification_proxy'))
|
play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||||
|
|
||||||
play_json = self._download_json(
|
play_json = self._download_json(
|
||||||
play_json_req,
|
play_json_req,
|
||||||
media_id, 'playJson data')
|
media_id, 'Downloading playJson data')
|
||||||
|
|
||||||
# Check for errors
|
# Check for errors
|
||||||
playstatus = play_json['playstatus']
|
playstatus = play_json['playstatus']
|
||||||
|
59
youtube_dl/extractor/libsyn.py
Normal file
59
youtube_dl/extractor/libsyn.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class LibsynIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||||
|
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3377616',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||||
|
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||||
|
'upload_date': '20150220',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': media_url,
|
||||||
|
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||||
|
|
||||||
|
podcast_title = self._search_regex(
|
||||||
|
r'<h2>([^<]+)</h2>', webpage, 'title')
|
||||||
|
episode_title = self._search_regex(
|
||||||
|
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
||||||
|
|
||||||
|
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
release_date = unified_strdate(self._search_regex(
|
||||||
|
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': release_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -20,7 +21,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class LivestreamIE(InfoExtractor):
|
class LivestreamIE(InfoExtractor):
|
||||||
IE_NAME = 'livestream'
|
IE_NAME = 'livestream'
|
||||||
_VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
|
_VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||||
'md5': '53274c76ba7754fb0e8d072716f2292b',
|
'md5': '53274c76ba7754fb0e8d072716f2292b',
|
||||||
@@ -40,9 +41,19 @@ class LivestreamIE(InfoExtractor):
|
|||||||
'id': '2245590',
|
'id': '2245590',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
|
}, {
|
||||||
|
'url': 'http://new.livestream.com/chess24/tatasteelchess',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Tata Steel Chess',
|
||||||
|
'id': '3705884',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 60,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_smil(self, video_id, smil_url):
|
def _parse_smil(self, video_id, smil_url):
|
||||||
@@ -117,6 +128,30 @@ class LivestreamIE(InfoExtractor):
|
|||||||
'view_count': video_data.get('views'),
|
'view_count': video_data.get('views'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_event(self, info):
|
||||||
|
event_id = compat_str(info['id'])
|
||||||
|
account = compat_str(info['owner_account_id'])
|
||||||
|
root_url = (
|
||||||
|
'https://new.livestream.com/api/accounts/{account}/events/{event}/'
|
||||||
|
'feed.json'.format(account=account, event=event_id))
|
||||||
|
|
||||||
|
def _extract_videos():
|
||||||
|
last_video = None
|
||||||
|
for i in itertools.count(1):
|
||||||
|
if last_video is None:
|
||||||
|
info_url = root_url
|
||||||
|
else:
|
||||||
|
info_url = '{root}?&id={id}&newer=-1&type=video'.format(
|
||||||
|
root=root_url, id=last_video)
|
||||||
|
videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data']
|
||||||
|
videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
|
||||||
|
if not videos_info:
|
||||||
|
break
|
||||||
|
for v in videos_info:
|
||||||
|
yield self._extract_video_info(v)
|
||||||
|
last_video = videos_info[-1]['id']
|
||||||
|
return self.playlist_result(_extract_videos(), event_id, info['full_name'])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
@@ -144,14 +179,13 @@ class LivestreamIE(InfoExtractor):
|
|||||||
result = result and compat_str(vdata['data']['id']) == vid
|
result = result and compat_str(vdata['data']['id']) == vid
|
||||||
return result
|
return result
|
||||||
|
|
||||||
videos = [self._extract_video_info(video_data['data'])
|
|
||||||
for video_data in info['feed']['data']
|
|
||||||
if is_relevant(video_data, video_id)]
|
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
# This is an event page:
|
# This is an event page:
|
||||||
return self.playlist_result(
|
return self._extract_event(info)
|
||||||
videos, '%s' % info['id'], info['full_name'])
|
|
||||||
else:
|
else:
|
||||||
|
videos = [self._extract_video_info(video_data['data'])
|
||||||
|
for video_data in info['feed']['data']
|
||||||
|
if is_relevant(video_data, video_id)]
|
||||||
if not videos:
|
if not videos:
|
||||||
raise ExtractorError('Cannot find video %s' % video_id)
|
raise ExtractorError('Cannot find video %s' % video_id)
|
||||||
return videos[0]
|
return videos[0]
|
||||||
|
@@ -52,6 +52,7 @@ class LRTIE(InfoExtractor):
|
|||||||
'url': data['streamer'],
|
'url': data['streamer'],
|
||||||
'play_path': 'mp4:%s' % data['file'],
|
'play_path': 'mp4:%s' % data['file'],
|
||||||
'preference': -1,
|
'preference': -1,
|
||||||
|
'rtmp_real_time': True,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
formats.extend(
|
formats.extend(
|
||||||
|
98
youtube_dl/extractor/miomio.py
Normal file
98
youtube_dl/extractor/miomio.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
int_or_none,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MioMioIE(InfoExtractor):
|
||||||
|
IE_NAME = 'miomio.tv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# "type=video" in flashvars
|
||||||
|
'url': 'http://www.miomio.tv/watch/cc88912/',
|
||||||
|
'md5': '317a5f7f6b544ce8419b784ca8edae65',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '88912',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
|
||||||
|
'duration': 5923,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.miomio.tv/watch/cc184024/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '43729',
|
||||||
|
'title': '《动漫同人插画绘制》',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 86,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'description', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
|
mioplayer_path = self._search_regex(
|
||||||
|
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
|
||||||
|
|
||||||
|
xml_config = self._search_regex(
|
||||||
|
r'flashvars="type=(?:sina|video)&(.+?)&',
|
||||||
|
webpage, 'xml config')
|
||||||
|
|
||||||
|
# skipping the following page causes lags and eventually connection drop-outs
|
||||||
|
self._request_webpage(
|
||||||
|
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
# the following xml contains the actual configuration information on the video file(s)
|
||||||
|
vid_config = self._download_xml(
|
||||||
|
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
http_headers = {
|
||||||
|
'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not int_or_none(xpath_text(vid_config, 'timelength')):
|
||||||
|
raise ExtractorError('Unable to load videos!', expected=True)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for f in vid_config.findall('./durl'):
|
||||||
|
segment_url = xpath_text(f, 'url', 'video url')
|
||||||
|
if not segment_url:
|
||||||
|
continue
|
||||||
|
order = xpath_text(f, 'order', 'order')
|
||||||
|
segment_id = video_id
|
||||||
|
segment_title = title
|
||||||
|
if order:
|
||||||
|
segment_id += '-%s' % order
|
||||||
|
segment_title += ' part %s' % order
|
||||||
|
entries.append({
|
||||||
|
'id': segment_id,
|
||||||
|
'url': segment_url,
|
||||||
|
'title': segment_title,
|
||||||
|
'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
|
||||||
|
'http_headers': http_headers,
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(entries) == 1:
|
||||||
|
segment = entries[0]
|
||||||
|
segment['id'] = video_id
|
||||||
|
segment['title'] = title
|
||||||
|
return segment
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'id': video_id,
|
||||||
|
'entries': entries,
|
||||||
|
'title': title,
|
||||||
|
'http_headers': http_headers,
|
||||||
|
}
|
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@@ -10,7 +11,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
parse_iso8601,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -27,8 +27,6 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'Daniel Holbach',
|
||||||
'uploader_id': 'dholbach',
|
'uploader_id': 'dholbach',
|
||||||
'upload_date': '20111115',
|
|
||||||
'timestamp': 1321359578,
|
|
||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
@@ -37,31 +35,30 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
||||||
'ext': 'm4a',
|
'ext': 'mp3',
|
||||||
'title': 'Electric Relaxation vol. 3',
|
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
||||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||||
'uploader': 'Daniel Drumz',
|
'uploader': 'Gilles Peterson Worldwide',
|
||||||
'uploader_id': 'gillespeterson',
|
'uploader_id': 'gillespeterson',
|
||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*/images/',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_url(self, track_id, template_url):
|
def _get_url(self, track_id, template_url, server_number):
|
||||||
server_count = 30
|
boundaries = (1, 30)
|
||||||
for i in range(server_count):
|
for nr in server_numbers(server_number, boundaries):
|
||||||
url = template_url % i
|
url = template_url % nr
|
||||||
try:
|
try:
|
||||||
# We only want to know if the request succeed
|
# We only want to know if the request succeed
|
||||||
# don't download the whole file
|
# don't download the whole file
|
||||||
self._request_webpage(
|
self._request_webpage(
|
||||||
HEADRequest(url), track_id,
|
HEADRequest(url), track_id,
|
||||||
'Checking URL %d/%d ...' % (i + 1, server_count + 1))
|
'Checking URL %d/%d ...' % (nr, boundaries[-1]))
|
||||||
return url
|
return url
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -75,17 +72,18 @@ class MixcloudIE(InfoExtractor):
|
|||||||
preview_url = self._search_regex(
|
preview_url = self._search_regex(
|
||||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
||||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||||
|
server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
|
||||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||||
final_song_url = self._get_url(track_id, template_url)
|
final_song_url = self._get_url(track_id, template_url, server_number)
|
||||||
if final_song_url is None:
|
if final_song_url is None:
|
||||||
self.to_screen('Trying with m4a extension')
|
self.to_screen('Trying with m4a extension')
|
||||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||||
final_song_url = self._get_url(track_id, template_url)
|
final_song_url = self._get_url(track_id, template_url, server_number)
|
||||||
if final_song_url is None:
|
if final_song_url is None:
|
||||||
raise ExtractorError('Unable to extract track url')
|
raise ExtractorError('Unable to extract track url')
|
||||||
|
|
||||||
PREFIX = (
|
PREFIX = (
|
||||||
r'<span class="play-button[^"]*?"'
|
r'm-play-on-spacebar[^>]+'
|
||||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||||
@@ -99,16 +97,12 @@ class MixcloudIE(InfoExtractor):
|
|||||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
like_count = str_to_int(self._search_regex(
|
like_count = str_to_int(self._search_regex(
|
||||||
[r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
|
||||||
r'/favorites/?">([0-9]+)<'],
|
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||||
r'/listeners/?">([0-9,.]+)</a>'],
|
r'/listeners/?">([0-9,.]+)</a>'],
|
||||||
webpage, 'play count', fatal=False))
|
webpage, 'play count', fatal=False))
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
|
||||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
|
||||||
webpage, 'upload date', default=None))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
@@ -118,7 +112,38 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'timestamp': timestamp,
|
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def server_numbers(first, boundaries):
|
||||||
|
""" Server numbers to try in descending order of probable availability.
|
||||||
|
Starting from first (i.e. the number of the server hosting the preview file)
|
||||||
|
and going further and further up to the higher boundary and down to the
|
||||||
|
lower one in an alternating fashion. Namely:
|
||||||
|
|
||||||
|
server_numbers(2, (1, 5))
|
||||||
|
|
||||||
|
# Where the preview server is 2, min number is 1 and max is 5.
|
||||||
|
# Yields: 2, 3, 1, 4, 5
|
||||||
|
|
||||||
|
Why not random numbers or increasing sequences? Since from what I've seen,
|
||||||
|
full length files seem to be hosted on servers whose number is closer to
|
||||||
|
that of the preview; to be confirmed.
|
||||||
|
"""
|
||||||
|
zip_longest = getattr(itertools, 'zip_longest', None)
|
||||||
|
if zip_longest is None:
|
||||||
|
# python 2.x
|
||||||
|
zip_longest = itertools.izip_longest
|
||||||
|
|
||||||
|
if len(boundaries) != 2:
|
||||||
|
raise ValueError("boundaries should be a two-element tuple")
|
||||||
|
min, max = boundaries
|
||||||
|
highs = range(first + 1, max + 1)
|
||||||
|
lows = range(first - 1, min - 1, -1)
|
||||||
|
rest = filter(
|
||||||
|
None, itertools.chain.from_iterable(zip_longest(highs, lows)))
|
||||||
|
yield first
|
||||||
|
for n in rest:
|
||||||
|
yield n
|
||||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MLBIE(InfoExtractor):
|
class MLBIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
_VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||||
@@ -80,6 +80,10 @@ class MLBIE(InfoExtractor):
|
|||||||
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
|
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -118,6 +118,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
||||||
'Downloading video urls')
|
'Downloading video urls')
|
||||||
|
|
||||||
|
item = mediagen_doc.find('./video/item')
|
||||||
|
if item is not None and item.get('type') == 'text':
|
||||||
|
message = '%s returned error: ' % self.IE_NAME
|
||||||
|
if item.get('code') is not None:
|
||||||
|
message += '%s - ' % item.get('code')
|
||||||
|
message += item.text
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
|
||||||
description_node = itemdoc.find('description')
|
description_node = itemdoc.find('description')
|
||||||
if description_node is not None:
|
if description_node is not None:
|
||||||
description = description_node.text.strip()
|
description = description_node.text.strip()
|
||||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NBCIE(InfoExtractor):
|
class NBCIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
_VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):
|
|||||||
return self.url_result(theplatform_url)
|
return self.url_result(theplatform_url)
|
||||||
|
|
||||||
|
|
||||||
|
class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9CsDKds0kvHI',
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||||
|
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
iframe_m = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
||||||
|
if iframe_m:
|
||||||
|
return iframe_m.group('url')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
theplatform_url = self._og_search_video_url(webpage)
|
||||||
|
return self.url_result(theplatform_url, 'ThePlatform')
|
||||||
|
|
||||||
|
|
||||||
|
class NBCSportsIE(InfoExtractor):
|
||||||
|
# Does not include https becuase its certificate is invalid
|
||||||
|
_VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PHJSaFWbrTY9',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||||
|
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
return self.url_result(
|
||||||
|
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(InfoExtractor):
|
class NBCNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||||
(?:video/.+?/(?P<id>\d+)|
|
(?:video/.+?/(?P<id>\d+)|
|
||||||
|
@@ -22,7 +22,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
IE_NAME = 'niconico'
|
IE_NAME = 'niconico'
|
||||||
IE_DESC = 'ニコニコ動画'
|
IE_DESC = 'ニコニコ動画'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -39,9 +39,26 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'username': 'ydl.niconico@gmail.com',
|
'username': 'ydl.niconico@gmail.com',
|
||||||
'password': 'youtube-dl',
|
'password': 'youtube-dl',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.nicovideo.jp/watch/nm14296458',
|
||||||
|
'md5': '8db08e0158457cf852a31519fceea5bc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nm14296458',
|
||||||
|
'ext': 'swf',
|
||||||
|
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
||||||
|
'description': 'md5:',
|
||||||
|
'uploader': 'りょうた',
|
||||||
|
'uploader_id': '18822557',
|
||||||
|
'upload_date': '20110429',
|
||||||
|
'duration': 209,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'username': 'ydl.niconico@gmail.com',
|
||||||
|
'password': 'youtube-dl',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
# Determine whether the downloader used authentication to download video
|
# Determine whether the downloader used authentication to download video
|
||||||
_AUTHENTICATED = False
|
_AUTHENTICATED = False
|
||||||
@@ -76,8 +93,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
# Get video webpage. We are not actually interested in it, but need
|
# Get video webpage. We are not actually interested in it, but need
|
||||||
# the cookies in order to be able to download the info webpage
|
# the cookies in order to be able to download the info webpage
|
||||||
@@ -90,7 +106,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
if self._AUTHENTICATED:
|
if self._AUTHENTICATED:
|
||||||
# Get flv info
|
# Get flv info
|
||||||
flv_info_webpage = self._download_webpage(
|
flv_info_webpage = self._download_webpage(
|
||||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||||
video_id, 'Downloading flv info')
|
video_id, 'Downloading flv info')
|
||||||
else:
|
else:
|
||||||
# Get external player info
|
# Get external player info
|
||||||
|
@@ -219,7 +219,8 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
if streams:
|
if streams:
|
||||||
for stream in streams:
|
for stream in streams:
|
||||||
stream_type = stream.get('type').lower()
|
stream_type = stream.get('type').lower()
|
||||||
if stream_type == 'ss':
|
# smooth streaming is not supported
|
||||||
|
if stream_type in ['ss', 'ms']:
|
||||||
continue
|
continue
|
||||||
stream_info = self._download_json(
|
stream_info = self._download_json(
|
||||||
'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
|
'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
|
||||||
@@ -230,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
stream_url = self._download_json(
|
stream_url = self._download_json(
|
||||||
stream_info['stream'], display_id,
|
stream_info['stream'], display_id,
|
||||||
'Downloading %s URL' % stream_type,
|
'Downloading %s URL' % stream_type,
|
||||||
transform_source=strip_jsonp)
|
'Unable to download %s URL' % stream_type,
|
||||||
|
transform_source=strip_jsonp, fatal=False)
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
if stream_type == 'hds':
|
if stream_type == 'hds':
|
||||||
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
|
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
|
||||||
# f4m downloader downloads only piece of live stream
|
# f4m downloader downloads only piece of live stream
|
||||||
@@ -242,6 +246,7 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': stream_url,
|
'url': stream_url,
|
||||||
|
'preference': -10,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@@ -14,46 +14,48 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NRKIE(InfoExtractor):
|
class NRKIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
|
_VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
|
'url': 'http://www.nrk.no/video/PS*150533',
|
||||||
'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
|
'md5': 'bccd850baebefe23b56d708a113229c2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150533',
|
'id': '150533',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Dompap og andre fugler i Piip-Show',
|
'title': 'Dompap og andre fugler i Piip-Show',
|
||||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
|
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||||
|
'duration': 263,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
|
'url': 'http://www.nrk.no/video/PS*154915',
|
||||||
'md5': '3471f2a51718195164e88f46bf427668',
|
'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '154915',
|
'id': '154915',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Slik høres internett ut når du er blind',
|
'title': 'Slik høres internett ut når du er blind',
|
||||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||||
|
'duration': 20,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
|
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
|
'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
|
||||||
|
video_id, 'Downloading media JSON')
|
||||||
|
|
||||||
if data['usageRights']['isGeoBlocked']:
|
if data['usageRights']['isGeoBlocked']:
|
||||||
raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
|
raise ExtractorError(
|
||||||
|
'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
|
video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
|
||||||
|
|
||||||
|
duration = parse_duration(data.get('duration'))
|
||||||
|
|
||||||
images = data.get('images')
|
images = data.get('images')
|
||||||
if images:
|
if images:
|
||||||
@@ -69,10 +71,51 @@ class NRKIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
'description': data['description'],
|
'description': data['description'],
|
||||||
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NRKPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||||
|
'title': 'Gjenopplev den historiske solformørkelsen',
|
||||||
|
'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'rivertonprisen-til-karin-fossum-1.12266449',
|
||||||
|
'title': 'Rivertonprisen til Karin Fossum',
|
||||||
|
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
||||||
|
},
|
||||||
|
'playlist_count': 5,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('nrk:%s' % video_id, 'NRK')
|
||||||
|
for video_id in re.findall(
|
||||||
|
r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
|
||||||
|
webpage)
|
||||||
|
]
|
||||||
|
|
||||||
|
playlist_title = self._og_search_title(webpage)
|
||||||
|
playlist_description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
|
||||||
class NRKTVIE(InfoExtractor):
|
class NRKTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||||
|
|
||||||
@@ -149,9 +192,6 @@ class NRKTVIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _seconds2str(self, s):
|
|
||||||
return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
|
|
||||||
|
|
||||||
def _debug_print(self, txt):
|
def _debug_print(self, txt):
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self.to_screen('[debug] %s' % txt)
|
self.to_screen('[debug] %s' % txt)
|
||||||
@@ -168,8 +208,8 @@ class NRKTVIE(InfoExtractor):
|
|||||||
for pos, p in enumerate(ps):
|
for pos, p in enumerate(ps):
|
||||||
begin = parse_duration(p.get('begin'))
|
begin = parse_duration(p.get('begin'))
|
||||||
duration = parse_duration(p.get('dur'))
|
duration = parse_duration(p.get('dur'))
|
||||||
starttime = self._seconds2str(begin)
|
starttime = self._subtitles_timecode(begin)
|
||||||
endtime = self._seconds2str(begin + duration)
|
endtime = self._subtitles_timecode(begin + duration)
|
||||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
|
||||||
return {lang: [
|
return {lang: [
|
||||||
{'ext': 'ttml', 'url': url},
|
{'ext': 'ttml', 'url': url},
|
||||||
|
@@ -1,15 +1,17 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_iso8601
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NYTimesIE(InfoExtractor):
|
class NYTimesIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -22,18 +24,21 @@ class NYTimesIE(InfoExtractor):
|
|||||||
'uploader': 'Brett Weiner',
|
'uploader': 'Brett Weiner',
|
||||||
'duration': 419,
|
'duration': 419,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
|
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
title = video_data['headline']
|
title = video_data['headline']
|
||||||
description = video_data['summary']
|
description = video_data.get('summary')
|
||||||
duration = video_data['duration'] / 1000.0
|
duration = float_or_none(video_data.get('duration'), 1000)
|
||||||
|
|
||||||
uploader = video_data['byline']
|
uploader = video_data['byline']
|
||||||
timestamp = parse_iso8601(video_data['publication_date'][:-8])
|
timestamp = parse_iso8601(video_data['publication_date'][:-8])
|
||||||
@@ -49,11 +54,11 @@ class NYTimesIE(InfoExtractor):
|
|||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'url': video['url'],
|
'url': video['url'],
|
||||||
'format_id': video['type'],
|
'format_id': video.get('type'),
|
||||||
'vcodec': video['video_codec'],
|
'vcodec': video.get('video_codec'),
|
||||||
'width': video['width'],
|
'width': int_or_none(video.get('width')),
|
||||||
'height': video['height'],
|
'height': int_or_none(video.get('height')),
|
||||||
'filesize': get_file_size(video['fileSize']),
|
'filesize': get_file_size(video.get('fileSize')),
|
||||||
} for video in video_data['renditions']
|
} for video in video_data['renditions']
|
||||||
]
|
]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@@ -61,7 +66,8 @@ class NYTimesIE(InfoExtractor):
|
|||||||
thumbnails = [
|
thumbnails = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nytimes.com/%s' % image['url'],
|
'url': 'http://www.nytimes.com/%s' % image['url'],
|
||||||
'resolution': '%dx%d' % (image['width'], image['height']),
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
} for image in video_data['images']
|
} for image in video_data['images']
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -11,6 +11,11 @@ from ..utils import (
|
|||||||
HEADRequest,
|
HEADRequest,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
strip_jsonp,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
determine_ext,
|
||||||
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -197,3 +202,92 @@ class ORFFM4IE(InfoExtractor):
|
|||||||
'description': data['subtitle'],
|
'description': data['subtitle'],
|
||||||
'entries': entries
|
'entries': entries
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFIPTVIE(InfoExtractor):
|
||||||
|
IE_NAME = 'orf:iptv'
|
||||||
|
IE_DESC = 'iptv.ORF.at'
|
||||||
|
_VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://iptv.orf.at/stories/2267952',
|
||||||
|
'md5': '26ffa4bab6dbce1eee78bbc7021016cd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '339775',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Kreml-Kritiker Nawalny wieder frei',
|
||||||
|
'description': 'md5:6f24e7f546d364dacd0e616a9e409236',
|
||||||
|
'duration': 84.729,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20150306',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
story_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://iptv.orf.at/stories/%s' % story_id, story_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-video(?:id)?="(\d+)"', webpage, 'video id')
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
|
||||||
|
video_id)[0]
|
||||||
|
|
||||||
|
duration = float_or_none(data['duration'], 1000)
|
||||||
|
|
||||||
|
video = data['sources']['default']
|
||||||
|
load_balancer_url = video['loadBalancerUrl']
|
||||||
|
abr = int_or_none(video.get('audioBitrate'))
|
||||||
|
vbr = int_or_none(video.get('bitrate'))
|
||||||
|
fps = int_or_none(video.get('videoFps'))
|
||||||
|
width = int_or_none(video.get('videoWidth'))
|
||||||
|
height = int_or_none(video.get('videoHeight'))
|
||||||
|
thumbnail = video.get('preview')
|
||||||
|
|
||||||
|
rendition = self._download_json(
|
||||||
|
load_balancer_url, video_id, transform_source=strip_jsonp)
|
||||||
|
|
||||||
|
f = {
|
||||||
|
'abr': abr,
|
||||||
|
'vbr': vbr,
|
||||||
|
'fps': fps,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, format_url in rendition['redirect'].items():
|
||||||
|
if format_id == 'rtmp':
|
||||||
|
ff = f.copy()
|
||||||
|
ff.update({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
formats.append(ff)
|
||||||
|
elif determine_ext(format_url) == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
format_url, video_id, f4m_id=format_id))
|
||||||
|
elif determine_ext(format_url) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
upload_date = unified_strdate(self._html_search_meta(
|
||||||
|
'dc.date', webpage, 'upload date'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
@@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url
|
|||||||
|
|
||||||
|
|
||||||
class PhoenixIE(InfoExtractor):
|
class PhoenixIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
|
_VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
|
||||||
_TEST = {
|
(?:
|
||||||
'url': 'http://www.phoenix.de/content/884301',
|
phoenix/die_sendungen/(?:[^/]+/)?
|
||||||
'md5': 'ed249f045256150c92e72dbb70eadec6',
|
)?
|
||||||
'info_dict': {
|
(?P<id>[0-9]+)'''
|
||||||
'id': '884301',
|
_TESTS = [
|
||||||
'ext': 'mp4',
|
{
|
||||||
'title': 'Michael Krons mit Hans-Werner Sinn',
|
'url': 'http://www.phoenix.de/content/884301',
|
||||||
'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
|
'md5': 'ed249f045256150c92e72dbb70eadec6',
|
||||||
'upload_date': '20141025',
|
'info_dict': {
|
||||||
'uploader': 'Im Dialog',
|
'id': '884301',
|
||||||
}
|
'ext': 'mp4',
|
||||||
}
|
'title': 'Michael Krons mit Hans-Werner Sinn',
|
||||||
|
'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
|
||||||
|
'upload_date': '20141025',
|
||||||
|
'uploader': 'Im Dialog',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
90
youtube_dl/extractor/pladform.py
Normal file
90
youtube_dl/extractor/pladform.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
xpath_text,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PladformIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
out\.pladform\.ru/player|
|
||||||
|
static\.pladform\.ru/player\.swf
|
||||||
|
)
|
||||||
|
\?.*\bvideoid=|
|
||||||
|
video\.pladform\.ru/catalog/video/videoid/
|
||||||
|
)
|
||||||
|
(?P<id>\d+)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
# http://muz-tv.ru/kinozal/view/7400/
|
||||||
|
'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
|
||||||
|
'md5': '61f37b575dd27f1bb2e1854777fe31f4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100183293',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
|
||||||
|
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 694,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_xml(
|
||||||
|
'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
if video.tag == 'error':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, video.text),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
quality = qualities(('ld', 'sd', 'hd'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': src.text,
|
||||||
|
'format_id': src.get('quality'),
|
||||||
|
'quality': quality(src.get('quality')),
|
||||||
|
} for src in video.findall('./src')]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage, fatal=False) or xpath_text(
|
||||||
|
video, './/title', 'title', fatal=True)
|
||||||
|
description = self._search_regex(
|
||||||
|
r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
|
||||||
|
video, './/cover', 'cover')
|
||||||
|
|
||||||
|
duration = int_or_none(xpath_text(video, './/time', 'duration'))
|
||||||
|
age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -4,85 +4,72 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_urllib_parse,
|
|
||||||
compat_urllib_request,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PlayFMIE(InfoExtractor):
|
class PlayFMIE(InfoExtractor):
|
||||||
IE_NAME = 'play.fm'
|
IE_NAME = 'play.fm'
|
||||||
_VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
|
_VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
|
'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
|
||||||
'md5': 'c505f8307825a245d0c7ad1850001f22',
|
'md5': 'c505f8307825a245d0c7ad1850001f22',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '137220',
|
'id': '71276',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
|
'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
|
||||||
'uploader': 'Sven Tasnadi',
|
'description': '',
|
||||||
'uploader_id': 'sventasnadi',
|
'duration': 5627,
|
||||||
'duration': 5627.428,
|
'timestamp': 1406033781,
|
||||||
'upload_date': '20140712',
|
'upload_date': '20140722',
|
||||||
|
'uploader': 'Dan Drastic',
|
||||||
|
'uploader_id': '71170',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
upload_date = mobj.group('upload_date')
|
slug = mobj.group('slug')
|
||||||
|
|
||||||
rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
|
recordings = self._download_json(
|
||||||
req = compat_urllib_request.Request(
|
'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
|
||||||
'http://www.play.fm/flexRead/recording', data=rec_data)
|
|
||||||
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
|
||||||
rec_doc = self._download_xml(req, video_id)
|
|
||||||
|
|
||||||
error_node = rec_doc.find('./error')
|
error = recordings.get('error')
|
||||||
if error_node is not None:
|
if isinstance(error, dict):
|
||||||
raise ExtractorError('An error occured: %s (code %s)' % (
|
raise ExtractorError(
|
||||||
error_node.text, rec_doc.find('./status').text))
|
'%s returned error: %s' % (self.IE_NAME, error.get('message')),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
recording = rec_doc.find('./recording')
|
audio_url = recordings['audio']
|
||||||
title = recording.find('./title').text
|
video_id = compat_str(recordings.get('id') or video_id)
|
||||||
view_count = str_to_int(recording.find('./stats/playcount').text)
|
title = recordings['title']
|
||||||
comment_count = str_to_int(recording.find('./stats/comments').text)
|
description = recordings.get('description')
|
||||||
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
duration = int_or_none(recordings.get('recordingDuration'))
|
||||||
thumbnail = recording.find('./image').text
|
timestamp = parse_iso8601(recordings.get('created_at'))
|
||||||
|
uploader = recordings.get('page', {}).get('title')
|
||||||
artist = recording.find('./artists/artist')
|
uploader_id = compat_str(recordings.get('page', {}).get('id'))
|
||||||
uploader = artist.find('./name').text
|
view_count = int_or_none(recordings.get('playCount'))
|
||||||
uploader_id = artist.find('./slug').text
|
comment_count = int_or_none(recordings.get('commentCount'))
|
||||||
|
categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
|
||||||
video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
|
|
||||||
'http:', recording.find('./url').text,
|
|
||||||
recording.find('./_class').text, recording.find('./file_id').text,
|
|
||||||
rec_doc.find('./uuid').text, video_id,
|
|
||||||
rec_doc.find('./jingle/file_id').text,
|
|
||||||
'http%3A%2F%2Fwww.play.fm%2Fplayer',
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': audio_url,
|
||||||
'ext': 'mp3',
|
|
||||||
'filesize': int_or_none(recording.find('./size').text),
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'upload_date': upload_date,
|
'description': description,
|
||||||
'view_count': view_count,
|
|
||||||
'comment_count': comment_count,
|
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'timestamp': timestamp,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'categories': categories,
|
||||||
}
|
}
|
||||||
|
78
youtube_dl/extractor/playwire.py
Normal file
78
youtube_dl/extractor/playwire.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlaywireIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
|
||||||
|
'md5': 'e6398701e3595888125729eaa2329ed9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3353705',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'S04_RM_UCL_Rus',
|
||||||
|
'thumbnail': 're:^http://.*\.png$',
|
||||||
|
'duration': 145.94,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://cdn.playwire.com/11625/embed/85228.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
|
||||||
|
|
||||||
|
player = self._download_json(
|
||||||
|
'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = player['settings']['title']
|
||||||
|
duration = float_or_none(player.get('duration'), 1000)
|
||||||
|
|
||||||
|
content = player['content']
|
||||||
|
thumbnail = content.get('poster')
|
||||||
|
src = content['media']['f4m']
|
||||||
|
|
||||||
|
f4m = self._download_xml(src, video_id)
|
||||||
|
base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
|
||||||
|
formats = []
|
||||||
|
for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
|
||||||
|
media_url = media.get('url')
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
tbr = int_or_none(media.get('bitrate'))
|
||||||
|
width = int_or_none(media.get('width'))
|
||||||
|
height = int_or_none(media.get('height'))
|
||||||
|
f = {
|
||||||
|
'url': '%s/%s' % (base_url, media.attrib['url']),
|
||||||
|
'tbr': tbr,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
}
|
||||||
|
if not (tbr or width or height):
|
||||||
|
f['quality'] = 1 if '-hd.' in media_url else 0
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _extract_count(self, pattern, webpage, name):
|
def _extract_count(self, pattern, webpage, name):
|
||||||
count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
|
return str_to_int(self._search_regex(
|
||||||
if count:
|
pattern, webpage, '%s count' % name, fatal=False))
|
||||||
count = str_to_int(count)
|
|
||||||
return count
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
|
|||||||
if thumbnail:
|
if thumbnail:
|
||||||
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||||
|
|
||||||
view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
view_count = self._extract_count(
|
||||||
like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||||
dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
like_count = self._extract_count(
|
||||||
|
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||||
|
dislike_count = self._extract_count(
|
||||||
|
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||||
if webpage.find('"encrypted":true') != -1:
|
if webpage.find('"encrypted":true') != -1:
|
||||||
|
96
youtube_dl/extractor/pornovoisines.py
Normal file
96
youtube_dl/extractor/pornovoisines.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import random
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PornoVoisinesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
|
||||||
|
|
||||||
|
_VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
|
||||||
|
'/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
|
||||||
|
|
||||||
|
_SERVER_NUMBERS = (1, 2)
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
|
||||||
|
'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1285',
|
||||||
|
'display_id': 'recherche-appartement',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Recherche appartement',
|
||||||
|
'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20140925',
|
||||||
|
'duration': 120,
|
||||||
|
'view_count': int,
|
||||||
|
'average_rating': float,
|
||||||
|
'categories': ['Débutante', 'Scénario', 'Sodomie'],
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def build_video_url(cls, num):
|
||||||
|
return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self.build_video_url(video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<article id="descriptif">(.+?)</article>',
|
||||||
|
webpage, "description", fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
if thumbnail:
|
||||||
|
thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
|
||||||
|
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
|
||||||
|
duration = int_or_none(self._search_regex(
|
||||||
|
'Durée (\d+)', webpage, 'duration', fatal=False))
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'(\d+) vues', webpage, 'view count', fatal=False))
|
||||||
|
average_rating = self._search_regex(
|
||||||
|
r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
|
||||||
|
if average_rating:
|
||||||
|
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||||
|
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', fatal=False)
|
||||||
|
if categories:
|
||||||
|
categories = [category.strip() for category in categories.split(',')]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'average_rating': average_rating,
|
||||||
|
'categories': categories,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
69
youtube_dl/extractor/primesharetv.py
Normal file
69
youtube_dl/extractor/primesharetv.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class PrimeShareTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://primeshare.tv/download/238790B611',
|
||||||
|
'md5': 'b92d9bf5461137c36228009f31533fbc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '238790B611',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if '>File not exist<' in webpage:
|
||||||
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
fields = dict(re.findall(r'''(?x)<input\s+
|
||||||
|
type="hidden"\s+
|
||||||
|
name="([^"]+)"\s+
|
||||||
|
(?:id="[^"]+"\s+)?
|
||||||
|
value="([^"]*)"
|
||||||
|
''', webpage))
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Referer': url,
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_time = int(self._search_regex(
|
||||||
|
r'var\s+cWaitTime\s*=\s*(\d+)',
|
||||||
|
webpage, 'wait time', default=7)) + 1
|
||||||
|
self._sleep(wait_time, video_id)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(
|
||||||
|
url, compat_urllib_parse.urlencode(fields), headers)
|
||||||
|
video_page = self._download_webpage(
|
||||||
|
req, video_id, 'Downloading video page')
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
|
||||||
|
video_page, 'video url')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1>Watch\s*(?: )?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?: )?\s*<strong>',
|
||||||
|
video_page, 'title')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
@@ -10,6 +10,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -24,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2104602',
|
'id': '2104602',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Staffel 2, Episode 18 - Jahresrückblick',
|
'title': 'Episode 18 - Staffel 2',
|
||||||
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||||
'upload_date': '20131231',
|
'upload_date': '20131231',
|
||||||
'duration': 5845.04,
|
'duration': 5845.04,
|
||||||
@@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor):
|
|||||||
urls_sources = urls_sources.values()
|
urls_sources = urls_sources.values()
|
||||||
|
|
||||||
def fix_bitrate(bitrate):
|
def fix_bitrate(bitrate):
|
||||||
|
bitrate = int_or_none(bitrate)
|
||||||
|
if not bitrate:
|
||||||
|
return None
|
||||||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||||
|
|
||||||
for source in urls_sources:
|
for source in urls_sources:
|
||||||
|
170
youtube_dl/extractor/qqmusic.py
Normal file
170
youtube_dl/extractor/qqmusic.py
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
strip_jsonp,
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
from ..compat import compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
|
class QQMusicIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
|
||||||
|
'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '004295Et37taLD',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': '可惜没如果',
|
||||||
|
'upload_date': '20141227',
|
||||||
|
'creator': '林俊杰',
|
||||||
|
'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
# Reference: m_r_GetRUin() in top_player.js
|
||||||
|
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
|
||||||
|
@staticmethod
|
||||||
|
def m_r_get_ruin():
|
||||||
|
curMs = int(time.time() * 1000) % 1000
|
||||||
|
return int(round(random.random() * 2147483647) * curMs % 1E10)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mid = self._match_id(url)
|
||||||
|
|
||||||
|
detail_info_page = self._download_webpage(
|
||||||
|
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
|
||||||
|
mid, note='Download song detail info',
|
||||||
|
errnote='Unable to get song detail info', encoding='gbk')
|
||||||
|
|
||||||
|
song_name = self._html_search_regex(
|
||||||
|
r"songname:\s*'([^']+)'", detail_info_page, 'song name')
|
||||||
|
|
||||||
|
publish_time = self._html_search_regex(
|
||||||
|
r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
|
||||||
|
'publish time', default=None)
|
||||||
|
if publish_time:
|
||||||
|
publish_time = publish_time.replace('-', '')
|
||||||
|
|
||||||
|
singer = self._html_search_regex(
|
||||||
|
r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
|
||||||
|
|
||||||
|
lrc_content = self._html_search_regex(
|
||||||
|
r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
|
||||||
|
detail_info_page, 'LRC lyrics', default=None)
|
||||||
|
|
||||||
|
guid = self.m_r_get_ruin()
|
||||||
|
|
||||||
|
vkey = self._download_json(
|
||||||
|
'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
|
||||||
|
mid, note='Retrieve vkey', errnote='Unable to get vkey',
|
||||||
|
transform_source=strip_jsonp)['key']
|
||||||
|
song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': mid,
|
||||||
|
'url': song_url,
|
||||||
|
'title': song_name,
|
||||||
|
'upload_date': publish_time,
|
||||||
|
'creator': singer,
|
||||||
|
'description': lrc_content,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class QQPlaylistBaseIE(InfoExtractor):
|
||||||
|
@staticmethod
|
||||||
|
def qq_static_url(category, mid):
|
||||||
|
return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_entries_from_page(cls, page):
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
|
||||||
|
song_mid = unescapeHTML(item).split('|')[-5]
|
||||||
|
entries.append(cls.url_result(
|
||||||
|
'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
|
||||||
|
song_mid))
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||||
|
_VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '001BLpXF2DyJe2',
|
||||||
|
'title': '林俊杰',
|
||||||
|
'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
|
||||||
|
},
|
||||||
|
'playlist_count': 12,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mid = self._match_id(url)
|
||||||
|
|
||||||
|
singer_page = self._download_webpage(
|
||||||
|
self.qq_static_url('singer', mid), mid, 'Download singer page')
|
||||||
|
|
||||||
|
entries = self.get_entries_from_page(singer_page)
|
||||||
|
|
||||||
|
singer_name = self._html_search_regex(
|
||||||
|
r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
singer_id = self._html_search_regex(
|
||||||
|
r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
singer_desc = None
|
||||||
|
|
||||||
|
if singer_id:
|
||||||
|
req = compat_urllib_request.Request(
|
||||||
|
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
|
||||||
|
req.add_header(
|
||||||
|
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
|
||||||
|
singer_desc_page = self._download_xml(
|
||||||
|
req, mid, 'Donwload singer description XML')
|
||||||
|
|
||||||
|
singer_desc = singer_desc_page.find('./data/info/desc').text
|
||||||
|
|
||||||
|
return self.playlist_result(entries, mid, singer_name, singer_desc)
|
||||||
|
|
||||||
|
|
||||||
|
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||||
|
_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '000gXCTb2AhRR1',
|
||||||
|
'title': '我们都是这样长大的',
|
||||||
|
'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
|
||||||
|
},
|
||||||
|
'playlist_count': 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mid = self._match_id(url)
|
||||||
|
|
||||||
|
album_page = self._download_webpage(
|
||||||
|
self.qq_static_url('album', mid), mid, 'Download album page')
|
||||||
|
|
||||||
|
entries = self.get_entries_from_page(album_page)
|
||||||
|
|
||||||
|
album_name = self._html_search_regex(
|
||||||
|
r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
album_detail = self._html_search_regex(
|
||||||
|
r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
|
||||||
|
album_page, 'album details', default=None)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, mid, album_name, album_detail)
|
67
youtube_dl/extractor/radiojavan.py
Normal file
67
youtube_dl/extractor/radiojavan.py
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import(
|
||||||
|
unified_strdate,
|
||||||
|
str_to_int,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RadioJavanIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
|
||||||
|
'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'chaartaar-ashoobam',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chaartaar - Ashoobam',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||||
|
'upload_date': '20150215',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
|
||||||
|
'format_id': '%sp' % height,
|
||||||
|
'height': int(height),
|
||||||
|
} for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'class="date_added">Date added: ([^<]+)<',
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
r'class="views">Plays: ([\d,]+)',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
like_count = str_to_int(self._search_regex(
|
||||||
|
r'class="rating">([\d,]+) likes',
|
||||||
|
webpage, 'like count', fatal=False))
|
||||||
|
dislike_count = str_to_int(self._search_regex(
|
||||||
|
r'class="rating">([\d,]+) dislikes',
|
||||||
|
webpage, 'dislike count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -13,7 +13,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class RaiIE(InfoExtractor):
|
class RaiIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
|
_VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||||
@@ -62,34 +62,78 @@ class RaiIE(InfoExtractor):
|
|||||||
'description': 'Edizione delle ore 20:30 ',
|
'description': 'Edizione delle ore 20:30 ',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
|
||||||
|
'md5': '02b64456f7cc09f96ff14e7dd489017e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Il Candidato - Primo episodio: "Le Primarie"',
|
||||||
|
'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
|
||||||
|
'uploader': 'RaiTre',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def _extract_relinker_url(self, webpage):
|
||||||
|
return self._proto_relative_url(self._search_regex(
|
||||||
|
[r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'],
|
||||||
|
webpage, 'relinker url', default=None))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
host = mobj.group('host')
|
||||||
|
|
||||||
media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = media.get('name')
|
relinker_url = self._extract_relinker_url(webpage)
|
||||||
description = media.get('desc')
|
|
||||||
thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
|
|
||||||
duration = parse_duration(media.get('length'))
|
|
||||||
uploader = media.get('author')
|
|
||||||
upload_date = unified_strdate(media.get('date'))
|
|
||||||
|
|
||||||
formats = []
|
if not relinker_url:
|
||||||
|
iframe_path = self._search_regex(
|
||||||
|
r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
|
||||||
|
webpage, 'iframe')
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'%s/%s' % (host, iframe_path), video_id)
|
||||||
|
relinker_url = self._extract_relinker_url(webpage)
|
||||||
|
|
||||||
for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
|
relinker = self._download_json(
|
||||||
media_url = media.get(format_id)
|
'%s&output=47' % relinker_url, video_id)
|
||||||
if not media_url:
|
|
||||||
continue
|
media_url = relinker['video'][0]
|
||||||
formats.append({
|
ct = relinker.get('ct')
|
||||||
|
if ct == 'f4m':
|
||||||
|
formats = self._extract_f4m_formats(
|
||||||
|
media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id)
|
||||||
|
else:
|
||||||
|
formats = [{
|
||||||
'url': media_url,
|
'url': media_url,
|
||||||
'format_id': format_id,
|
'format_id': ct,
|
||||||
'ext': 'mp4',
|
}]
|
||||||
})
|
|
||||||
|
|
||||||
subtitles = self.extract_subtitles(video_id, url)
|
json_link = self._html_search_meta(
|
||||||
|
'jsonlink', webpage, 'JSON link', default=None)
|
||||||
|
if json_link:
|
||||||
|
media = self._download_json(
|
||||||
|
host + json_link, video_id, 'Downloading video JSON')
|
||||||
|
title = media.get('name')
|
||||||
|
description = media.get('desc')
|
||||||
|
thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
|
||||||
|
duration = parse_duration(media.get('length'))
|
||||||
|
uploader = media.get('author')
|
||||||
|
upload_date = unified_strdate(media.get('date'))
|
||||||
|
else:
|
||||||
|
title = (self._search_regex(
|
||||||
|
r'var\s+videoTitolo\s*=\s*"(.+?)";',
|
||||||
|
webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"')
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
duration = None
|
||||||
|
uploader = self._html_search_meta('Editore', webpage, 'uploader')
|
||||||
|
upload_date = unified_strdate(self._html_search_meta(
|
||||||
|
'item-date', webpage, 'upload date', default=None))
|
||||||
|
|
||||||
|
subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@@ -103,8 +147,7 @@ class RaiIE(InfoExtractor):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, url):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
|
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
|
||||||
if m:
|
if m:
|
||||||
|
@@ -1,17 +1,19 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class RedTubeIE(InfoExtractor):
|
class RedTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
|
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.redtube.com/66418',
|
'url': 'http://www.redtube.com/66418',
|
||||||
|
'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '66418',
|
'id': '66418',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
"title": "Sucked on a toilet",
|
'title': 'Sucked on a toilet',
|
||||||
"age_limit": 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -19,6 +21,9 @@ class RedTubeIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
|
||||||
|
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
|
@@ -8,8 +8,10 @@ import time
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
std_headers,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -84,13 +86,22 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||||
|
manager_info = self._download_json(
|
||||||
|
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||||
|
None, 'Fetching manager info')
|
||||||
|
self._manager = manager_info['manager']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||||
video_id)['page']['items'][0]
|
video_id)['page']['items'][0]
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
if info['state'] == 'DESPU':
|
||||||
|
raise ExtractorError('The video is no longer available', expected=True)
|
||||||
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
video_url = _decrypt_url(png)
|
video_url = _decrypt_url(png)
|
||||||
if not video_url.endswith('.f4m'):
|
if not video_url.endswith('.f4m'):
|
||||||
@@ -127,6 +138,47 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
for s in subs)
|
for s in subs)
|
||||||
|
|
||||||
|
|
||||||
|
class RTVEInfantilIE(InfoExtractor):
|
||||||
|
IE_NAME = 'rtve.es:infantil'
|
||||||
|
IE_DESC = 'RTVE infantil'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||||
|
'md5': '915319587b33720b8e0357caaa6617e6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3040283',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Maneras de vivir',
|
||||||
|
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
||||||
|
'duration': 357.958,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
info = self._download_json(
|
||||||
|
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||||
|
video_id)['page']['items'][0]
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
vidplayer_id = self._search_regex(
|
||||||
|
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||||
|
|
||||||
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||||
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
|
video_url = _decrypt_url(png)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': info['title'],
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': info.get('image'),
|
||||||
|
'duration': float_or_none(info.get('duration'), scale=1000),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class RTVELiveIE(InfoExtractor):
|
class RTVELiveIE(InfoExtractor):
|
||||||
IE_NAME = 'rtve.es:live'
|
IE_NAME = 'rtve.es:live'
|
||||||
IE_DESC = 'RTVE.es live streams'
|
IE_DESC = 'RTVE.es live streams'
|
||||||
|
157
youtube_dl/extractor/safari.py
Normal file
157
youtube_dl/extractor/safari.py
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .brightcove import BrightcoveIE
|
||||||
|
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
smuggle_url,
|
||||||
|
std_headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SafariBaseIE(InfoExtractor):
|
||||||
|
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||||
|
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
||||||
|
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
|
||||||
|
_NETRC_MACHINE = 'safari'
|
||||||
|
|
||||||
|
_API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
|
||||||
|
_API_FORMAT = 'json'
|
||||||
|
|
||||||
|
LOGGED_IN = False
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
# We only need to log in once for courses or individual videos
|
||||||
|
if not self.LOGGED_IN:
|
||||||
|
self._login()
|
||||||
|
SafariBaseIE.LOGGED_IN = True
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
raise ExtractorError(
|
||||||
|
self._ACCOUNT_CREDENTIALS_HINT,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
headers = std_headers
|
||||||
|
if 'Referer' not in headers:
|
||||||
|
headers['Referer'] = self._LOGIN_URL
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
self._LOGIN_URL, None,
|
||||||
|
'Downloading login form')
|
||||||
|
|
||||||
|
csrf = self._html_search_regex(
|
||||||
|
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
|
||||||
|
login_page, 'csrf token')
|
||||||
|
|
||||||
|
login_form = {
|
||||||
|
'csrfmiddlewaretoken': csrf,
|
||||||
|
'email': username,
|
||||||
|
'password1': password,
|
||||||
|
'login': 'Sign In',
|
||||||
|
'next': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
request, None, 'Logging in as %s' % username)
|
||||||
|
|
||||||
|
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Login failed; make sure your credentials are correct and try again.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
self.to_screen('Login successful')
|
||||||
|
|
||||||
|
|
||||||
|
class SafariIE(SafariBaseIE):
|
||||||
|
IE_NAME = 'safari'
|
||||||
|
IE_DESC = 'safaribooksonline.com online video'
|
||||||
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?:www\.)?safaribooksonline\.com/
|
||||||
|
(?:
|
||||||
|
library/view/[^/]+|
|
||||||
|
api/v1/book
|
||||||
|
)/
|
||||||
|
(?P<course_id>\d+)/
|
||||||
|
(?:chapter(?:-content)?/)?
|
||||||
|
(?P<part>part\d+)\.html
|
||||||
|
'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
|
||||||
|
'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2842601850001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Introduction',
|
||||||
|
},
|
||||||
|
'skip': 'Requires safaribooksonline account credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
course_id = mobj.group('course_id')
|
||||||
|
part = mobj.group('part')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
|
||||||
|
part)
|
||||||
|
|
||||||
|
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||||
|
if not bc_url:
|
||||||
|
raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
|
||||||
|
|
||||||
|
return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
|
||||||
|
|
||||||
|
|
||||||
|
class SafariCourseIE(SafariBaseIE):
|
||||||
|
IE_NAME = 'safari:course'
|
||||||
|
IE_DESC = 'safaribooksonline.com online courses'
|
||||||
|
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9780133392838',
|
||||||
|
'title': 'Hadoop Fundamentals LiveLessons',
|
||||||
|
},
|
||||||
|
'playlist_count': 22,
|
||||||
|
'skip': 'Requires safaribooksonline account credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_id = self._match_id(url)
|
||||||
|
|
||||||
|
course_json = self._download_json(
|
||||||
|
'%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
|
||||||
|
course_id, 'Downloading course JSON')
|
||||||
|
|
||||||
|
if 'chapters' not in course_json:
|
||||||
|
raise ExtractorError(
|
||||||
|
'No chapters found for course %s' % course_id, expected=True)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(chapter, 'Safari')
|
||||||
|
for chapter in course_json['chapters']]
|
||||||
|
|
||||||
|
course_title = course_json['title']
|
||||||
|
|
||||||
|
return self.playlist_result(entries, course_id, course_title)
|
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
|
|||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
slideshare_obj = self._search_regex(
|
slideshare_obj = self._search_regex(
|
||||||
r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
|
r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
|
||||||
webpage, 'slideshare object')
|
webpage, 'slideshare object')
|
||||||
info = json.loads(slideshare_obj)
|
info = json.loads(slideshare_obj)
|
||||||
if info['slideshow']['type'] != 'video':
|
if info['slideshow']['type'] != 'video':
|
||||||
|
@@ -4,22 +4,87 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .common import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request
|
||||||
|
)
|
||||||
|
from ..utils import sanitize_url_path_consecutive_slashes
|
||||||
|
|
||||||
|
|
||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
'note': 'This video is available only in Mainland China',
|
||||||
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||||
'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
|
'md5': '29175c8cadd8b5cc4055001e85d6b372',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '382479172',
|
'id': '382479172',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'MV:Far East Movement《The Illest》',
|
'title': 'MV:Far East Movement《The Illest》',
|
||||||
},
|
},
|
||||||
'skip': 'Only available from China',
|
'params': {
|
||||||
}
|
'cn_verification_proxy': 'proxy.uku.im:8888'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||||
|
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '409385080',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||||
|
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78693464',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【爱范品】第31期:MWC见不到的奇葩手机',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Multipart video',
|
||||||
|
'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339_part1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 294,
|
||||||
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339_part2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 300,
|
||||||
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '8407e634175fdac706766481b9443450',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78910339_part3',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 150,
|
||||||
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}, {
|
||||||
|
'note': 'Video with title containing dash',
|
||||||
|
'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78932792',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'youtube-dl testing video',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
||||||
@@ -29,8 +94,14 @@ class SohuIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(base_data_url + vid_id)
|
||||||
|
|
||||||
|
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||||
|
if cn_verification_proxy:
|
||||||
|
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||||
|
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
base_data_url + vid_id, video_id,
|
req, video_id,
|
||||||
'Downloading JSON data for %s' % vid_id)
|
'Downloading JSON data for %s' % vid_id)
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -38,10 +109,8 @@ class SohuIE(InfoExtractor):
|
|||||||
mytv = mobj.group('mytv') is not None
|
mytv = mobj.group('mytv') is not None
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
raw_title = self._html_search_regex(
|
|
||||||
r'(?s)<title>(.+?)</title>',
|
title = self._og_search_title(webpage)
|
||||||
webpage, 'video title')
|
|
||||||
title = raw_title.partition('-')[0].strip()
|
|
||||||
|
|
||||||
vid = self._html_search_regex(
|
vid = self._html_search_regex(
|
||||||
r'var vid ?= ?["\'](\d+)["\']',
|
r'var vid ?= ?["\'](\d+)["\']',
|
||||||
@@ -77,7 +146,9 @@ class SohuIE(InfoExtractor):
|
|||||||
% (format_id, i + 1, part_count))
|
% (format_id, i + 1, part_count))
|
||||||
|
|
||||||
part_info = part_str.split('|')
|
part_info = part_str.split('|')
|
||||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
|
||||||
|
video_url = sanitize_url_path_consecutive_slashes(
|
||||||
|
'%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@@ -221,7 +221,12 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
info_json_url += "&secret_token=" + token
|
info_json_url += "&secret_token=" + token
|
||||||
elif mobj.group('player'):
|
elif mobj.group('player'):
|
||||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
return self.url_result(query['url'][0])
|
real_url = query['url'][0]
|
||||||
|
# If the token is in the query of the original url we have to
|
||||||
|
# manually add it
|
||||||
|
if 'secret_token' in query:
|
||||||
|
real_url += '?secret_token=' + query['secret_token'][0]
|
||||||
|
return self.url_result(real_url)
|
||||||
else:
|
else:
|
||||||
# extract uploader (which is in the url)
|
# extract uploader (which is in the url)
|
||||||
uploader = mobj.group('uploader')
|
uploader = mobj.group('uploader')
|
||||||
@@ -242,7 +247,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class SoundcloudSetIE(SoundcloudIE):
|
class SoundcloudSetIE(SoundcloudIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
|
||||||
IE_NAME = 'soundcloud:set'
|
IE_NAME = 'soundcloud:set'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
||||||
@@ -287,7 +292,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
|||||||
|
|
||||||
|
|
||||||
class SoundcloudUserIE(SoundcloudIE):
|
class SoundcloudUserIE(SoundcloudIE):
|
||||||
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
|
||||||
IE_NAME = 'soundcloud:user'
|
IE_NAME = 'soundcloud:user'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://soundcloud.com/the-concept-band',
|
'url': 'https://soundcloud.com/the-concept-band',
|
||||||
|
60
youtube_dl/extractor/spankbang.py
Normal file
60
youtube_dl/extractor/spankbang.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class SpankBangIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||||
|
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3vvn',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'fantasy solo',
|
||||||
|
'description': 'dillion harper masturbates on a bed',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'silly2587',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
stream_key = self._html_search_regex(
|
||||||
|
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
|
||||||
|
webpage, 'stream key')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '%sp' % height,
|
||||||
|
'height': int(height),
|
||||||
|
} for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'(?s)<h1>(.+?)</h1>', webpage, 'title')
|
||||||
|
description = self._search_regex(
|
||||||
|
r'class="desc"[^>]*>([^<]+)',
|
||||||
|
webpage, 'description', default=None)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
uploader = self._search_regex(
|
||||||
|
r'class="user"[^>]*>([^<]+)',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'formats': formats,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor
|
|||||||
|
|
||||||
class SpikeIE(MTVServicesInfoExtractor):
|
class SpikeIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
|
(?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+|
|
||||||
m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
|
m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
|
||||||
'''
|
'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
104
youtube_dl/extractor/srf.py
Normal file
104
youtube_dl/extractor/srf.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SrfIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||||
|
'md5': '4cd93523723beff51bb4bee974ee238d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||||
|
'display_id': 'snowden-beantragt-asyl-in-russland',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'upload_date': '20130701',
|
||||||
|
'title': 'Snowden beantragt Asyl in Russland',
|
||||||
|
'timestamp': 1372713995,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# No Speichern (Save) button
|
||||||
|
'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa',
|
||||||
|
'md5': 'd97e236e80d1d24729e5d0953d276a4f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '677f5829-e473-4823-ac83-a1087fe97faa',
|
||||||
|
'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive',
|
||||||
|
'ext': 'flv',
|
||||||
|
'upload_date': '20130710',
|
||||||
|
'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive',
|
||||||
|
'timestamp': 1373493600,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tp.srgssr.ch/p/flash?urn=urn:srf:ais:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
display_id = re.match(self._VALID_URL, url).group('display_id') or video_id
|
||||||
|
|
||||||
|
video_data = self._download_xml(
|
||||||
|
'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id,
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
title = xpath_text(
|
||||||
|
video_data, './AssetMetadatas/AssetMetadata/title', fatal=True)
|
||||||
|
thumbnails = [{
|
||||||
|
'url': s.text
|
||||||
|
} for s in video_data.findall('.//ImageRepresentation/url')]
|
||||||
|
timestamp = parse_iso8601(xpath_text(video_data, './createdDate'))
|
||||||
|
# The <duration> field in XML is different from the exact duration, skipping
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'):
|
||||||
|
for url_node in item.findall('url'):
|
||||||
|
quality = url_node.attrib['quality']
|
||||||
|
full_url = url_node.text
|
||||||
|
original_ext = determine_ext(full_url)
|
||||||
|
format_id = '%s-%s' % (quality, item.attrib['protocol'])
|
||||||
|
if original_ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
full_url + '?hdcore=3.4.0', display_id, f4m_id=format_id))
|
||||||
|
elif original_ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
full_url, display_id, 'mp4', m3u8_id=format_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': full_url,
|
||||||
|
'ext': original_ext,
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': 0 if 'HD' in quality else -1,
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subtitles_data = video_data.find('Subtitles')
|
||||||
|
if subtitles_data is not None:
|
||||||
|
subtitles_list = [{
|
||||||
|
'url': sub.text,
|
||||||
|
'ext': determine_ext(sub.text),
|
||||||
|
} for sub in subtitles_data]
|
||||||
|
if subtitles_list:
|
||||||
|
subtitles['de'] = subtitles_list
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user