Compare commits
611 Commits
2015.02.02
...
2015.03.24
Author | SHA1 | Date | |
---|---|---|---|
48c971e073 | |||
f5e2efbbf0 | |||
b0872c19ea | |||
9f790b9901 | |||
93f787070f | |||
f9544f6e8f | |||
336d19044c | |||
7866c9e173 | |||
1a4123de04 | |||
cf2e2eb1c0 | |||
2051acdeb2 | |||
cefdf970cc | |||
a1d0aa7b88 | |||
49aeedb8cb | |||
ef249a2cd7 | |||
a09141548a | |||
5379a2d40d | |||
c9450c7ab1 | |||
faa1b5c292 | |||
393d9fc6d2 | |||
4e6a228689 | |||
179d6678b1 | |||
85698c5086 | |||
a7d9ded45d | |||
531980d89c | |||
1887ecd4d6 | |||
cd32c2caba | |||
1c9a1457fc | |||
038b0eb1da | |||
f20bf146e2 | |||
01218f919b | |||
2684871bc1 | |||
ccf3960eec | |||
eecc0685c9 | |||
2ed849eccf | |||
3378d67a18 | |||
f3c0c667a6 | |||
0ae8bbac2d | |||
cbc3cfcab4 | |||
b30ef07c6c | |||
73900846b1 | |||
d1dc7e3991 | |||
3073a6d5e9 | |||
aae53774f2 | |||
7a757b7194 | |||
fa8ce26904 | |||
2c2c06e359 | |||
ee580538fa | |||
c3c5c31517 | |||
ed9a25dd61 | |||
9ef4f12b53 | |||
84f8101606 | |||
b1337948eb | |||
98f02fdde2 | |||
048fdc2292 | |||
2ca1c5aa9f | |||
674fb0fcc5 | |||
00bfe40e4d | |||
cd459b1d49 | |||
92a4793b3c | |||
dc03a42537 | |||
219da6bb68 | |||
0499cd866e | |||
13047f4135 | |||
af69cab21d | |||
d41a3fa1b4 | |||
733be371af | |||
576904bce6 | |||
cf47794f09 | |||
c06a9f8730 | |||
2e90dff2c2 | |||
90183a46d8 | |||
b68eedba23 | |||
d5b559393b | |||
1de4ac1385 | |||
39aa42ffbb | |||
ec1b9577ba | |||
3b4444f99a | |||
613b2d9dc6 | |||
8f4cc22455 | |||
7c42327e0e | |||
873383e9bd | |||
8508557e77 | |||
4d1652484f | |||
88cf6fb368 | |||
e7db87f700 | |||
2cb434e53e | |||
cd65491c30 | |||
082b1155a3 | |||
9202b1b787 | |||
a7e01c438d | |||
05be67e77d | |||
85741b9986 | |||
f247a199fe | |||
29171bc2d2 | |||
7be5a62ed7 | |||
3647136f24 | |||
13598940e3 | |||
0eb365868e | |||
28c6411e49 | |||
bba3fc7960 | |||
fcd877013e | |||
ba1d4c0488 | |||
517bcca299 | |||
1b53778175 | |||
b7a0304d92 | |||
545315a985 | |||
3f4327520c | |||
4a34f69ea6 | |||
fb7e68833c | |||
486dd09e0b | |||
054b99a330 | |||
65c5e044c7 | |||
11984c7467 | |||
3946864c8a | |||
b84037013e | |||
1dbfc62d75 | |||
d7d79106c7 | |||
1138491631 | |||
71705fa70d | |||
602814adab | |||
3a77719c5a | |||
7e195d0e92 | |||
e04793401d | |||
a3fbd18824 | |||
c6052b8c14 | |||
c792b5011f | |||
32aaeca775 | |||
1593194c63 | |||
614a7e1e23 | |||
2ebfeacabc | |||
f5d8f58a17 | |||
937daef4a7 | |||
dd77f14c64 | |||
c36cbe5a8a | |||
41b2194f86 | |||
d1e2e8f583 | |||
47fe42e1ab | |||
4c60393854 | |||
f848215dfc | |||
dcca581967 | |||
d475b3384c | |||
dd7831fe94 | |||
cc08b11d16 | |||
8bba753cca | |||
43d6280d0a | |||
e5a11a2293 | |||
f18ef2d144 | |||
1bb5c511a5 | |||
d55de57b67 | |||
a2aaf4dbc6 | |||
bdf6eee0ae | |||
8b910bda0c | |||
24993e3b39 | |||
11101076a1 | |||
f838875726 | |||
28778d6bae | |||
1132eae56d | |||
d34e79492d | |||
ab205b9dc8 | |||
7dcad95d4f | |||
8a48223a7b | |||
d47ae7f620 | |||
135c9c42bf | |||
0bf79ac455 | |||
98998cded6 | |||
14137b5781 | |||
a172d96292 | |||
23ba76bc0e | |||
61e00a9775 | |||
d1508cd68d | |||
9c85b5376d | |||
3c6f245083 | |||
f207019ce5 | |||
bd05aa4e24 | |||
8dc9d361c2 | |||
d0e958c71c | |||
a0bb7c5593 | |||
7feddd9fc7 | |||
55969016e9 | |||
9609f02e3c | |||
5c7495a194 | |||
5ee6fc974e | |||
c2ebea6580 | |||
12a129ec6d | |||
f28fe66970 | |||
123397317c | |||
dc570c4951 | |||
22d3628319 | |||
50c9949d7a | |||
376817c6d4 | |||
63fc800057 | |||
e0d0572b73 | |||
7fde87c77d | |||
938c3f65b6 | |||
2461f79d2a | |||
499bfcbfd0 | |||
07490f8017 | |||
91410c9bfa | |||
a7440261c5 | |||
76c73715fb | |||
c75f0b361a | |||
295df4edb9 | |||
562ceab13d | |||
2f0f6578c3 | |||
30cbd4e0d6 | |||
549e58069c | |||
7594be85ff | |||
3630034609 | |||
4e01501bbf | |||
1aa5172f56 | |||
f7e2ee8fa6 | |||
66dc9a3701 | |||
31bd39256b | |||
003c69a84b | |||
0134901108 | |||
eee6293d57 | |||
8237bec4f0 | |||
29cad7ad13 | |||
0d103de3b0 | |||
a0090691d0 | |||
6c87c2eea8 | |||
58c2ec6ab3 | |||
df5ae3eb16 | |||
efda2d7854 | |||
e143f5dae9 | |||
48218cdb97 | |||
e9fade72f3 | |||
0f2c0d335b | |||
40b077bc7e | |||
a931092cb3 | |||
bd3749ed69 | |||
4ffbf77886 | |||
781a7ef60a | |||
5b2949ee0b | |||
a0d646135a | |||
7862ad88b7 | |||
f3bff94cf9 | |||
0eba1e1782 | |||
e3216b82bf | |||
da419e2332 | |||
0d97ef43be | |||
1a2313a6f2 | |||
250a9bdfe2 | |||
6317a3e9da | |||
7ab7c9e932 | |||
e129c5bc0d | |||
2e241242a3 | |||
9724e5d336 | |||
63a562f95e | |||
5c340b0387 | |||
1c6510f57a | |||
2a15a98a6a | |||
72a406e7aa | |||
feccc3ff37 | |||
265bfa2c79 | |||
8faf9b9b41 | |||
84be7c230c | |||
3e675fabe0 | |||
cd5b4b0bc2 | |||
7ef822021b | |||
9a48926a57 | |||
13cd97f3df | |||
183139340b | |||
1c69bca258 | |||
c10ea454dc | |||
9504fc21b5 | |||
13d8fbef30 | |||
b8988b63a6 | |||
5eaaeb7c31 | |||
c4f8c453ae | |||
6f4ba54079 | |||
637570326b | |||
37f885650c | |||
c8c34ccb20 | |||
e765ed3a9c | |||
677063594e | |||
59c7cbd482 | |||
570311610e | |||
41b264e77c | |||
df4bd0d53f | |||
7f09a662a0 | |||
4f3b21e1c7 | |||
54233c9080 | |||
db8e13ef71 | |||
5a42414b9c | |||
9c665ab72e | |||
b665ba6aa6 | |||
ec5913b5cd | |||
25ac63ed71 | |||
99209c2916 | |||
1fbaa0a521 | |||
3037b91e05 | |||
ffdf972b91 | |||
459e5fbd5f | |||
bfc993cc91 | |||
4432db35d9 | |||
591ab1dff9 | |||
5bca2424bc | |||
bd61a9e770 | |||
3438e7acd2 | |||
09c200acf2 | |||
716889cab1 | |||
409693984f | |||
04e8c11080 | |||
80af2b73ab | |||
3cc57f9645 | |||
a65d4e7f14 | |||
b531cfc019 | |||
543ec2136b | |||
93b5071f73 | |||
ddc369f073 | |||
1b40dc92eb | |||
fcc3e6138b | |||
9fe6ef7ab2 | |||
c010af6f19 | |||
35b7982303 | |||
f311cfa231 | |||
80970e531b | |||
b7bb76df05 | |||
98c70d6fc7 | |||
ab84349b16 | |||
03091e372f | |||
4d17184817 | |||
e086e0eb6c | |||
314368c822 | |||
c5181ab410 | |||
ea5152cae1 | |||
255fca5eea | |||
4aeccadf4e | |||
93540ee10e | |||
8fb3ac3649 | |||
77b2986b5b | |||
62b013df0d | |||
fad6768bd1 | |||
a78125f925 | |||
a00a8bcc8a | |||
1e9a9e167d | |||
3da0db62e6 | |||
e14ced7918 | |||
ab9d02f53b | |||
a461a11989 | |||
1bd838608f | |||
365577f567 | |||
50efb383f0 | |||
5da6bd0083 | |||
5e9a033e6e | |||
fb7cb6823e | |||
dd0a58f5f0 | |||
a21420389e | |||
6140baf4e1 | |||
8fc642eb5b | |||
e66e1a0046 | |||
d5c69f1da4 | |||
f13b1e7d7f | |||
5c8a3f862a | |||
8807f1277f | |||
a3b9157f49 | |||
b88ba05356 | |||
b74d505577 | |||
9e2d7dca87 | |||
d236b37ac9 | |||
e880c66bd8 | |||
383456aa29 | |||
1a13940c8d | |||
3d54788495 | |||
71d53ace2f | |||
f37e3f99f0 | |||
bd03ffc16e | |||
1ac1af9b47 | |||
3bf5705316 | |||
1c2528c8a3 | |||
7bd15b1a03 | |||
6b961a85fd | |||
7707004043 | |||
a025d3c5a5 | |||
c460bdd56b | |||
b81a359eb6 | |||
d61aefb24c | |||
d305dd73a3 | |||
93a16ba238 | |||
4f7cea6c53 | |||
afbdd3acc3 | |||
85d5866177 | |||
9789d7535d | |||
d8443cd3f7 | |||
d47c26e168 | |||
01561da142 | |||
0af25f784b | |||
b9b42f2ea0 | |||
311c393838 | |||
18c1c42405 | |||
37dd5d4629 | |||
81975f4693 | |||
b8b928d5cb | |||
3eff81fbf7 | |||
785521bf4f | |||
6d1a55a521 | |||
9cad27008b | |||
11e611a7fa | |||
72c1f8de06 | |||
6e99868e4c | |||
4d278fde64 | |||
f21e915fb9 | |||
6f53c63df6 | |||
1def5f359e | |||
15ec669374 | |||
a3fa5da496 | |||
30965ac66a | |||
09ab40b7d1 | |||
edab9dbf4d | |||
9868ea4936 | |||
85920dd01d | |||
fa15607773 | |||
a91a2c1a83 | |||
16e7711e22 | |||
5cda4eda72 | |||
98f000409f | |||
bd7fe0cf66 | |||
48246541da | |||
4a8d4a53b1 | |||
4cd95bcbc3 | |||
be24c8697f | |||
0d93378887 | |||
4069766c52 | |||
7010577720 | |||
8ac27a68e6 | |||
46312e0b46 | |||
f9216ed6ad | |||
65bf37ef83 | |||
f740fae2a4 | |||
fbc503d696 | |||
662435f728 | |||
163d966707 | |||
85729c51af | |||
360e1ca5cc | |||
a1f2a06b34 | |||
c84dd8a90d | |||
65469a7f8b | |||
6b597516c1 | |||
b5857f62e2 | |||
a504ced097 | |||
1db5fbcfe3 | |||
59b8ab5834 | |||
a568180441 | |||
85e80f71cd | |||
bfa6bdcd8b | |||
03cd72b007 | |||
5bfd430f81 | |||
73fac4e911 | |||
8fb474fb17 | |||
f813928e4b | |||
b9c7a97318 | |||
9fb2f1cd6d | |||
6ca7732d5e | |||
b0ab0fac49 | |||
a294bce82f | |||
76d1466b08 | |||
1888d3f7b3 | |||
c2787701cc | |||
52e1d0ccc4 | |||
10e3c4c221 | |||
68f2d273bf | |||
7c86c21662 | |||
ae1580d790 | |||
3215c50f25 | |||
36f73e8044 | |||
a4f3d779db | |||
d9aa2b784d | |||
cffcbc02de | |||
9347fddbfc | |||
037e9437e4 | |||
36e7a4ca2e | |||
ae6423d704 | |||
7105440cec | |||
c80b9cd280 | |||
171ca612af | |||
c3d64fc1b3 | |||
7c24ce225d | |||
08b38d5401 | |||
024c53694d | |||
7e6011101f | |||
c40feaba77 | |||
5277f09dfc | |||
2d30521ab9 | |||
050fa43561 | |||
f36f92f4da | |||
124f3bc67d | |||
d304209a85 | |||
8367d3f3cb | |||
c56d7d899d | |||
ea5db8469e | |||
3811c567e7 | |||
8708d76425 | |||
054fe3cc40 | |||
af0d11f244 | |||
9650885be9 | |||
596ac6e31f | |||
612ee37365 | |||
442c37b7a9 | |||
04bbe41330 | |||
8f84f57183 | |||
6a78740211 | |||
c0e1a415fd | |||
bf8f082a90 | |||
2f543a2142 | |||
7e5db8c930 | |||
f7a211dcc8 | |||
845734773d | |||
347de4931c | |||
8829650513 | |||
c73fae1e2e | |||
834bf069d2 | |||
c06a9fa34f | |||
753fad4adc | |||
34814eb66e | |||
3a5bcd0326 | |||
99c2398bc6 | |||
28f1272870 | |||
f18e3a2fc0 | |||
c4c5dc27cb | |||
2caf182f37 | |||
43f244b6d5 | |||
1309b396d0 | |||
ba61796458 | |||
3255fe7141 | |||
e98b8e79ea | |||
196121c51b | |||
5269028951 | |||
f7bc056b5a | |||
a0f7198544 | |||
dd8930684e | |||
bdb186f3b0 | |||
64f9baa084 | |||
b29231c040 | |||
6128bf07a9 | |||
2ec19e9558 | |||
9ddb6925bf | |||
12931e1c6e | |||
41c23b0da5 | |||
2578ab19e4 | |||
d87ec897e9 | |||
3bd4bffb1c | |||
c36b09a502 | |||
641eb10d34 | |||
955c5505e7 | |||
69319969de | |||
a14292e848 | |||
5d678df64a | |||
8ca8cbe2bd | |||
ba322d8209 | |||
2f38289b79 | |||
f23a3ca699 | |||
77d2b106cc | |||
c0e46412e9 | |||
0161353d7d | |||
2b4ecde2c8 | |||
b3a286d69d | |||
467d3c9a0c | |||
ad5747bad1 | |||
d6eb66ed3c | |||
7f2a9f1b49 | |||
1e1896f2de | |||
c831973366 | |||
1a2548d9e9 | |||
3900eec27c | |||
a02d212638 | |||
9c91a8fa70 | |||
41469f335e | |||
67ce4f8820 | |||
bc63d56cca | |||
c893d70805 | |||
3ee6e02564 | |||
e3aaace400 | |||
300753a069 | |||
f13b88c616 | |||
60ca389c64 | |||
1b0f3919c1 | |||
6a348cf7d5 | |||
9e91449c8d | |||
25e5ebf382 | |||
7dfc356625 | |||
58ba6c0160 | |||
f076b63821 | |||
12f0454cd6 | |||
cd7342755f | |||
9bb8e0a3f9 | |||
1a6373ef39 | |||
f6c24009be | |||
d862042301 | |||
23d9ded655 | |||
4c1a017e69 | |||
ee623d9247 | |||
330537d08a | |||
2cf0ecac7b | |||
d200b11c7e | |||
d0eca21021 | |||
c1147c05e1 | |||
55898ad2cf | |||
a465808592 | |||
5c4862bad4 | |||
995029a142 | |||
a57b562cff | |||
531572578e | |||
3a4cca687f | |||
7d3d06a16c | |||
c21b1fbeeb | |||
f920ce295e | |||
7a7bd19c45 | |||
8f4b58d70e | |||
3fd45e03bf |
@ -2,6 +2,7 @@ language: python
|
||||
python:
|
||||
- "2.6"
|
||||
- "2.7"
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
before_install:
|
||||
|
9
AUTHORS
9
AUTHORS
@ -108,3 +108,12 @@ Enam Mijbah Noor
|
||||
David Luhmer
|
||||
Shaya Goldberg
|
||||
Paul Hartmann
|
||||
Frans de Jonge
|
||||
Robin de Rooij
|
||||
Ryan Schmidt
|
||||
Leslie P. Polzer
|
||||
Duncan Keall
|
||||
Alexander Mamay
|
||||
Devin J. Pohly
|
||||
Eduardo Ferro Aldama
|
||||
Jeff Buchbinder
|
||||
|
@ -1,4 +1,6 @@
|
||||
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
**Please include the full output of youtube-dl when run with `-v`**.
|
||||
|
||||
The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
@ -16,7 +18,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
||||
|
||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||
|
||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
|
||||
### Are you using the latest version?
|
||||
|
||||
@ -122,7 +126,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
|
8
Makefile
8
Makefile
@ -1,10 +1,8 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
||||
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||
find -name "*.pyc" -delete
|
||||
|
||||
PREFIX ?= /usr/local
|
||||
BINDIR ?= $(PREFIX)/bin
|
||||
@ -46,7 +44,7 @@ test:
|
||||
ot: offlinetest
|
||||
|
||||
offlinetest: codetest
|
||||
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
|
||||
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
|
433
README.md
433
README.md
@ -47,189 +47,109 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
# OPTIONS
|
||||
-h, --help print this help text and exit
|
||||
--version print program version and exit
|
||||
-U, --update update this program to latest version. Make
|
||||
sure that you have sufficient permissions
|
||||
(run with sudo if needed)
|
||||
-i, --ignore-errors continue on download errors, for example to
|
||||
skip unavailable videos in a playlist
|
||||
--abort-on-error Abort downloading of further videos (in the
|
||||
playlist or the command line) if an error
|
||||
occurs
|
||||
-U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
|
||||
-i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist
|
||||
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
|
||||
--dump-user-agent display the current browser identification
|
||||
--list-extractors List all supported extractors and the URLs
|
||||
they would handle
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
extractors
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
example "gvsearch2:" downloads two videos
|
||||
from google videos for youtube-dl "large
|
||||
apple". Use the value "auto" to let
|
||||
youtube-dl guess ("auto_warning" to emit a
|
||||
warning when guessing). "error" just throws
|
||||
an error. The default value "fixup_error"
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: Do not read the user
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them.
|
||||
--list-extractors List all supported extractors and the URLs they would handle
|
||||
--extractor-descriptions Output descriptions of all supported extractors
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
||||
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
||||
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
|
||||
in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
|
||||
--flat-playlist Do not extract the videos of a playlist, only list them.
|
||||
--no-color Do not emit color codes in output.
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
(experimental)
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
(experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
(experimental)
|
||||
--source-address IP Client-side IP address to bind to (experimental)
|
||||
-4, --force-ipv4 Make all connections via IPv4 (experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6 (experimental)
|
||||
--cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
|
||||
not present) is used for the actual downloading. (experimental)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER playlist video to end at (default is last)
|
||||
--playlist-items ITEM_SPEC playlist video items to download. Specify
|
||||
indices of the videos in the playlist
|
||||
seperated by commas like: "--playlist-items
|
||||
1,2,5,8" if you want to download videos
|
||||
indexed 1, 2, 5, 8 in the playlist. You can
|
||||
specify range: "--playlist-items
|
||||
1-3,7,10-13", it will download the videos
|
||||
at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||
--match-title REGEX download only matching titles (regex or
|
||||
caseless sub-string)
|
||||
--reject-title REGEX skip download for matching titles (regex or
|
||||
caseless sub-string)
|
||||
--playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
||||
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
||||
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||
--match-title REGEX download only matching titles (regex or caseless sub-string)
|
||||
--reject-title REGEX skip download for matching titles (regex or caseless sub-string)
|
||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||
--min-filesize SIZE Do not download any videos smaller than
|
||||
SIZE (e.g. 50k or 44.6m)
|
||||
--max-filesize SIZE Do not download any videos larger than SIZE
|
||||
(e.g. 50k or 44.6m)
|
||||
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
|
||||
--max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
|
||||
--date DATE download only videos uploaded in this date
|
||||
--datebefore DATE download only videos uploaded on or before
|
||||
this date (i.e. inclusive)
|
||||
--dateafter DATE download only videos uploaded on or after
|
||||
this date (i.e. inclusive)
|
||||
--min-views COUNT Do not download any videos with less than
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--no-playlist If the URL refers to a video and a
|
||||
playlist, download only the video.
|
||||
--age-limit YEARS download only videos suitable for the given
|
||||
age
|
||||
--download-archive FILE Download only videos not listed in the
|
||||
archive file. Record the IDs of all
|
||||
downloaded videos in it.
|
||||
--include-ads Download advertisements as well
|
||||
(experimental)
|
||||
--datebefore DATE download only videos uploaded on or before this date (i.e. inclusive)
|
||||
--dateafter DATE download only videos uploaded on or after this date (i.e. inclusive)
|
||||
--min-views COUNT Do not download any videos with less than COUNT views
|
||||
--max-views COUNT Do not download any videos with more than COUNT views
|
||||
--match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present,
|
||||
!key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
|
||||
a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
|
||||
operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
|
||||
functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
|
||||
dislike_count <? 50 & description" .
|
||||
--no-playlist If the URL refers to a video and a playlist, download only the video.
|
||||
--yes-playlist If the URL refers to a video and a playlist, download the playlist.
|
||||
--age-limit YEARS download only videos suitable for the given age
|
||||
--download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
|
||||
--include-ads Download advertisements as well (experimental)
|
||||
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES number of retries (default is 10), or
|
||||
"infinite".
|
||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer do not automatically adjust the buffer
|
||||
size. By default, the buffer size is
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES number of retries (default is 10), or "infinite".
|
||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024)
|
||||
--no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize (experimental) set file xattribute
|
||||
ytdl.filesize with expected filesize
|
||||
--external-downloader COMMAND (experimental) Use the specified external
|
||||
downloader. Currently supports
|
||||
aria2c,curl,wget
|
||||
--xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize
|
||||
--hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg.
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external downloader.
|
||||
|
||||
## Filesystem Options:
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
stdin)
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||
--id use only video ID in file name
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to
|
||||
get the title, %(uploader)s for the
|
||||
uploader name, %(uploader_id)s for the
|
||||
uploader nickname if different,
|
||||
%(autonumber)s to get an automatically
|
||||
incremented number, %(ext)s for the
|
||||
filename extension, %(format)s for the
|
||||
format description (like "22 - 1280x720" or
|
||||
"HD"), %(format_id)s for the unique id of
|
||||
the format (like Youtube's itags: "137"),
|
||||
%(upload_date)s for the upload date
|
||||
(YYYYMMDD), %(extractor)s for the provider
|
||||
(youtube, metacafe, etc), %(id)s for the
|
||||
video id, %(playlist_title)s,
|
||||
%(playlist_id)s, or %(playlist)s (=title if
|
||||
present, ID otherwise) for the playlist the
|
||||
video is in, %(playlist_index)s for the
|
||||
position in the playlist. %(height)s and
|
||||
%(width)s for the width and height of the
|
||||
video format. %(resolution)s for a textual
|
||||
description of the resolution of the video
|
||||
format. %% for a literal percent. Use - to
|
||||
output to stdout. Can also be used to
|
||||
download to a different directory, for
|
||||
example with -o '/my/downloads/%(uploader)s
|
||||
/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in
|
||||
%(autonumber)s when it is present in output
|
||||
filename template or --auto-number option
|
||||
is given
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
-A, --auto-number [deprecated; use -o
|
||||
"%(autonumber)s-%(title)s.%(ext)s" ] number
|
||||
downloaded files starting from 00000
|
||||
-t, --title [deprecated] use title in file name
|
||||
(default)
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
|
||||
nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
|
||||
the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"),
|
||||
%(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
|
||||
%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
|
||||
%(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
|
||||
%(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
|
||||
Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
|
||||
--restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
|
||||
-A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000
|
||||
-t, --title [deprecated] use title in file name (default)
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-w, --no-overwrites do not overwrite files
|
||||
-c, --continue force resume of partially downloaded files.
|
||||
By default, youtube-dl will resume
|
||||
downloads if possible.
|
||||
--no-continue do not resume partially downloaded files
|
||||
(restart from beginning)
|
||||
--no-part do not use .part files - write directly
|
||||
into output file
|
||||
--no-mtime do not use the Last-modified header to set
|
||||
the file modification time
|
||||
--write-description write video description to a .description
|
||||
file
|
||||
-c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
|
||||
--no-continue do not resume partially downloaded files (restart from beginning)
|
||||
--no-part do not use .part files - write directly into output file
|
||||
--no-mtime do not use the Last-modified header to set the file modification time
|
||||
--write-description write video description to a .description file
|
||||
--write-info-json write video metadata to a .info.json file
|
||||
--write-annotations write video annotations to a .annotation
|
||||
file
|
||||
--load-info FILE json file containing the video information
|
||||
(created with the "--write-json" option)
|
||||
--cookies FILE file to read cookies from and dump cookie
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
--write-annotations write video annotations to a .annotation file
|
||||
--load-info FILE json file containing the video information (created with the "--write-json" option)
|
||||
--cookies FILE file to read cookies from and dump cookie jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
|
||||
or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
|
||||
change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Thumbnail images:
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
--write-all-thumbnails write all thumbnail image formats to disk
|
||||
--list-thumbnails Simulate and list all available thumbnail
|
||||
formats
|
||||
--list-thumbnails Simulate and list all available thumbnail formats
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate do not download the video and do not write
|
||||
anything to disk
|
||||
-s, --simulate do not download the video and do not write anything to disk
|
||||
--skip-download do not download the video
|
||||
-g, --get-url simulate, quiet but print URL
|
||||
-e, --get-title simulate, quiet but print title
|
||||
@ -239,148 +159,87 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--get-duration simulate, quiet but print video length
|
||||
--get-filename simulate, quiet but print output filename
|
||||
--get-format simulate, quiet but print output format
|
||||
-j, --dump-json simulate, quiet but print JSON information.
|
||||
See --output for a description of available
|
||||
keys.
|
||||
-J, --dump-single-json simulate, quiet but print JSON information
|
||||
for each command-line argument. If the URL
|
||||
refers to a playlist, dump the whole
|
||||
playlist information in a single line.
|
||||
--print-json Be quiet and print the video information as
|
||||
JSON (video is still being downloaded).
|
||||
-j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys.
|
||||
-J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
|
||||
information in a single line.
|
||||
--print-json Be quiet and print the video information as JSON (video is still being downloaded).
|
||||
--newline output progress bar as new lines
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
-v, --verbose print various debugging information
|
||||
--dump-intermediate-pages print downloaded pages to debug problems
|
||||
(very verbose)
|
||||
--write-pages Write downloaded intermediary pages to
|
||||
files in the current directory to debug
|
||||
problems
|
||||
--dump-pages print downloaded pages to debug problems (very verbose)
|
||||
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
|
||||
--print-traffic Display sent and read HTTP traffic
|
||||
-C, --call-home Contact the youtube-dl server for
|
||||
debugging.
|
||||
--no-call-home Do NOT contact the youtube-dl server for
|
||||
debugging.
|
||||
-C, --call-home Contact the youtube-dl server for debugging.
|
||||
--no-call-home Do NOT contact the youtube-dl server for debugging.
|
||||
|
||||
## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer URL specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||
download.
|
||||
--referer URL specify a custom referer, use if the video access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
|
||||
--bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each download.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes, as in -f 22/17/18
|
||||
. Instead of format codes, you can select
|
||||
by extension for the extensions aac, m4a,
|
||||
mp3, mp4, ogg, wav, webm. You can also use
|
||||
the special names "best", "bestvideo",
|
||||
"bestaudio", "worst". You can filter the
|
||||
video results by putting a condition in
|
||||
brackets, as in -f "best[height=720]" (or
|
||||
-f "[filesize>10M]"). This works for
|
||||
filesize, height, width, tbr, abr, vbr, and
|
||||
fps and the comparisons <, <=, >, >=, =, !=
|
||||
. Formats for which the value is not known
|
||||
are excluded unless you put a question mark
|
||||
(?) after the operator. You can combine
|
||||
format filters, so -f "[height <=?
|
||||
720][tbr>500]" selects up to 720p videos
|
||||
(or videos where the height is not known)
|
||||
with a bitrate of at least 500 KBit/s. By
|
||||
default, youtube-dl will pick the best
|
||||
quality. Use commas to download multiple
|
||||
audio formats, such as -f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||
You can merge the video and audio of two
|
||||
formats into a single file using -f <video-
|
||||
format>+<audio-format> (requires ffmpeg or
|
||||
avconv), for example -f
|
||||
-f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
|
||||
extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
|
||||
"worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
|
||||
This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
|
||||
vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a
|
||||
question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos
|
||||
(or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality.
|
||||
Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
|
||||
of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
|
||||
bestvideo+bestaudio.
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific
|
||||
one is requested
|
||||
--prefer-free-formats prefer free video formats unless a specific one is requested
|
||||
--max-quality FORMAT highest quality format to download
|
||||
-F, --list-formats list all available formats
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifest on
|
||||
YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g.
|
||||
bestvideo+bestaudio), output to given
|
||||
container format. One of mkv, mp4, ogg,
|
||||
webm, flv.Ignored if no merge is required
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
|
||||
merge is required
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub write subtitle file
|
||||
--write-auto-sub write automatic subtitle file (youtube
|
||||
only)
|
||||
--all-subs downloads all the available subtitles of
|
||||
the video
|
||||
--write-auto-sub write automatic subtitle file (youtube only)
|
||||
--all-subs downloads all the available subtitles of the video
|
||||
--list-subs lists all available subtitles for the video
|
||||
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt]
|
||||
youtube only)
|
||||
--sub-lang LANGS languages of the subtitles to download
|
||||
(optional) separated by commas, use IETF
|
||||
language tags like 'en,pt'
|
||||
--sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best"
|
||||
--sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME login with this account ID
|
||||
-p, --password PASSWORD account password. If this option is left
|
||||
out, youtube-dl will ask interactively.
|
||||
-p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively.
|
||||
-2, --twofactor TWOFACTOR two-factor auth code
|
||||
-n, --netrc use .netrc authentication data
|
||||
--video-password PASSWORD video password (vimeo, smotri)
|
||||
|
||||
## Post-processing Options:
|
||||
-x, --extract-audio convert video files to audio-only files
|
||||
(requires ffmpeg or avconv and ffprobe or
|
||||
avprobe)
|
||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
|
||||
"opus", or "wav"; "best" by default
|
||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification,
|
||||
insert a value between 0 (better) and 9
|
||||
(worse) for VBR or a specific bitrate like
|
||||
128K (default 5)
|
||||
--recode-video FORMAT Encode the video to another format if
|
||||
necessary (currently supported:
|
||||
mp4|flv|ogg|webm|mkv)
|
||||
-k, --keep-video keeps the video file on disk after the
|
||||
post-processing; the video is erased by
|
||||
default
|
||||
--no-post-overwrites do not overwrite post-processed files; the
|
||||
post-processed files are overwritten by
|
||||
default
|
||||
--embed-subs embed subtitles in the video (only for mp4
|
||||
videos)
|
||||
-x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K
|
||||
(default 5)
|
||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
||||
-k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default
|
||||
--no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||
--embed-subs embed subtitles in the video (only for mp4 videos)
|
||||
--embed-thumbnail embed thumbnail in the audio as cover art
|
||||
--add-metadata write metadata to the video file
|
||||
--xattrs write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
file. One of never (do nothing), warn (only
|
||||
emit a warning), detect_or_warn(the
|
||||
default; fix file if we can, warn
|
||||
otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading, similar to find's -exec
|
||||
syntax. Example: --exec 'adb push {}
|
||||
/sdcard/Music/ && rm {}'
|
||||
--metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
||||
parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
|
||||
%(title)s" matches a title like "Coldplay - Paradise"
|
||||
--xattrs write metadata to the video file's xattrs (using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
|
||||
fix file if we can, warn otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
|
||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.
|
||||
--exec CMD Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm
|
||||
{}'
|
||||
--convert-subtitles FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt)
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
@ -490,11 +349,19 @@ If you want to play the video on a machine that is not running youtube-dl, you c
|
||||
|
||||
### ERROR: no fmt_url_map or conn information found in video info
|
||||
|
||||
youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
||||
YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||
|
||||
### ERROR: unable to download video ###
|
||||
|
||||
youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
||||
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||
|
||||
### ExtractorError: Could not find JS function u'OF'
|
||||
|
||||
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||
|
||||
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
||||
|
||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
|
||||
|
||||
### SyntaxError: Non-ASCII character ###
|
||||
|
||||
@ -532,9 +399,29 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
||||
youtube-dl -- -wNyEUrxzFU
|
||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||
|
||||
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||
|
||||
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||
|
||||
A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization.
|
||||
|
||||
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
|
||||
|
||||
### How can I speed up work on my issue?
|
||||
|
||||
(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
|
||||
|
||||
First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
|
||||
|
||||
Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
|
||||
|
||||
If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
|
||||
|
||||
Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
|
||||
|
||||
### How can I detect whether a given URL is supported by youtube-dl?
|
||||
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
|
||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||
|
||||
@ -631,6 +518,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
|
||||
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
||||
|
||||
```python
|
||||
from __future__ import unicode_literals
|
||||
import youtube_dl
|
||||
|
||||
ydl_opts = {}
|
||||
@ -643,6 +531,7 @@ Most likely, you'll want to use various options. For a list of what can be done,
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
```python
|
||||
from __future__ import unicode_literals
|
||||
import youtube_dl
|
||||
|
||||
|
||||
@ -700,7 +589,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
||||
|
||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||
|
||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
|
||||
### Are you using the latest version?
|
||||
|
||||
@ -728,7 +619,7 @@ In particular, every site support request issue should only pertain to services
|
||||
|
||||
### Is anyone going to need the feature?
|
||||
|
||||
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||
Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
|
@ -45,12 +45,12 @@ for test in get_testcases():
|
||||
|
||||
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
|
||||
|
||||
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
|
||||
or test['info_dict']['age_limit'] != 18):
|
||||
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
|
||||
test['info_dict']['age_limit'] != 18):
|
||||
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
||||
|
||||
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
|
||||
and test['info_dict']['age_limit'] == 18):
|
||||
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
|
||||
test['info_dict']['age_limit'] == 18):
|
||||
print('\nPotential false negative: {0}'.format(test['name']))
|
||||
|
||||
else:
|
||||
|
42
devscripts/generate_aes_testdata.py
Normal file
42
devscripts/generate_aes_testdata.py
Normal file
@ -0,0 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import codecs
|
||||
import subprocess
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.utils import intlist_to_bytes
|
||||
from youtube_dl.aes import aes_encrypt, key_expansion
|
||||
|
||||
secret_msg = b'Secret message goes here'
|
||||
|
||||
|
||||
def hex_str(int_list):
|
||||
return codecs.encode(intlist_to_bytes(int_list), 'hex')
|
||||
|
||||
|
||||
def openssl_encode(algo, key, iv):
|
||||
cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
|
||||
prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
out, _ = prog.communicate(secret_msg)
|
||||
return out
|
||||
|
||||
iv = key = [0x20, 0x15] + 14 * [0]
|
||||
|
||||
r = openssl_encode('aes-128-cbc', key, iv)
|
||||
print('aes_cbc_decrypt')
|
||||
print(repr(r))
|
||||
|
||||
password = key
|
||||
new_key = aes_encrypt(password, key_expansion(password))
|
||||
r = openssl_encode('aes-128-ctr', new_key, iv)
|
||||
print('aes_decrypt_text 16')
|
||||
print(repr(r))
|
||||
|
||||
password = key + 16 * [0]
|
||||
new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
|
||||
r = openssl_encode('aes-256-ctr', new_key, iv)
|
||||
print('aes_decrypt_text 32')
|
||||
print(repr(r))
|
@ -35,7 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us
|
||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make cleanall
|
||||
make clean
|
||||
if $skip_tests ; then
|
||||
echo 'SKIPPING TESTS'
|
||||
else
|
||||
@ -45,9 +45,9 @@ fi
|
||||
/bin/echo -e "\n### Changing version in version.py..."
|
||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||
|
||||
/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
|
||||
make README.md
|
||||
git add README.md youtube_dl/version.py
|
||||
/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..."
|
||||
make README.md CONTRIBUTING.md supportedsites
|
||||
git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py
|
||||
git commit -m "release $version"
|
||||
|
||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||
|
@ -1,4 +1,5 @@
|
||||
# Supported sites
|
||||
- **1tv**: Первый канал
|
||||
- **1up.com**
|
||||
- **220.ro**
|
||||
- **24video**
|
||||
@ -9,16 +10,22 @@
|
||||
- **8tracks**
|
||||
- **9gag**
|
||||
- **abc.net.au**
|
||||
- **Abc7News**
|
||||
- **AcademicEarth:Course**
|
||||
- **AddAnime**
|
||||
- **AdobeTV**
|
||||
- **AdultSwim**
|
||||
- **Aftenposten**
|
||||
- **Aftonbladet**
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
- **Aparat**
|
||||
- **AppleDailyAnimationNews**
|
||||
- **AppleDailyRealtimeNews**
|
||||
- **AppleTrailers**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
@ -30,14 +37,17 @@
|
||||
- **arte.tv:ddc**
|
||||
- **arte.tv:embed**
|
||||
- **arte.tv:future**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **audiomack**
|
||||
- **AUEngine**
|
||||
- **audiomack:album**
|
||||
- **Azubu**
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **BeatportPro**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
- **Bet**
|
||||
@ -53,14 +63,19 @@
|
||||
- **Brightcove**
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
- **Canal13cl**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **CBS**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSSports**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
- **Chilloutzone**
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **Cinemassacre**
|
||||
- **clipfish**
|
||||
@ -71,8 +86,10 @@
|
||||
- **cmt.com**
|
||||
- **CNET**
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
- **CollegeHumor**
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
@ -82,32 +99,40 @@
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **dailymotion**
|
||||
- **dailymotion:playlist**
|
||||
- **dailymotion:user**
|
||||
- **daum.net**
|
||||
- **DBTV**
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
- **Discovery**
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **DouyuTV**
|
||||
- **DRBonanza**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
- **DRTV**
|
||||
- **Dump**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **EaglePlatform**
|
||||
- **EbaumsWorld**
|
||||
- **EchoMsk**
|
||||
- **eHow**
|
||||
- **Einthusan**
|
||||
- **eitb.tv**
|
||||
- **EllenTV**
|
||||
- **EllenTV:clips**
|
||||
- **ElPais**: El País
|
||||
- **Embedly**
|
||||
- **EMPFlix**
|
||||
- **Engadget**
|
||||
- **Eporner**
|
||||
- **EroProfile**
|
||||
- **Escapist**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
@ -120,9 +145,9 @@
|
||||
- **fernsehkritik.tv:postecke**
|
||||
- **Firedrive**
|
||||
- **Firstpost**
|
||||
- **firsttv**: Видеоархив - Первый канал
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Foxgay**
|
||||
- **FoxNews**
|
||||
- **france2.fr:generation-quoi**
|
||||
@ -140,9 +165,11 @@
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gametrailers**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
- **GiantBomb**
|
||||
- **Giga**
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GodTube**
|
||||
@ -153,9 +180,15 @@
|
||||
- **Grooveshark**
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
- **HearThisAt**
|
||||
- **Heise**
|
||||
- **HellPorno**
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HistoricFilms**
|
||||
- **History**
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HornBunny**
|
||||
- **HostingBulk**
|
||||
- **HotNewHipHop**
|
||||
@ -167,6 +200,7 @@
|
||||
- **ign.com**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **Ina**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
@ -181,7 +215,10 @@
|
||||
- **Jove**
|
||||
- **jpopsuki.tv**
|
||||
- **Jukebox**
|
||||
- **Kaltura**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
- **keek**
|
||||
- **KeezMovies**
|
||||
- **KhanAcademy**
|
||||
@ -191,10 +228,15 @@
|
||||
- **Ku6**
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **Letv**
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **LiveLeak**
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
- **lrt.lt**
|
||||
- **lynda**: lynda.com videos
|
||||
- **lynda:course**: lynda.com online courses
|
||||
@ -203,6 +245,7 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **Malemotion**
|
||||
- **MDR**
|
||||
- **media.ccc.de**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
@ -235,6 +278,8 @@
|
||||
- **MySpass**
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@ -242,11 +287,16 @@
|
||||
- **ndr**: NDR.de - Mediathek
|
||||
- **NDTV**
|
||||
- **NerdCubedFeed**
|
||||
- **Nerdist**
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **Newstube**
|
||||
- **NextMedia**
|
||||
- **NextMediaActionNews**
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **nhl.com**
|
||||
- **nhl.com:news**: NHL news
|
||||
- **nhl.com:videocenter**: NHL videocenter category
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
@ -257,39 +307,53 @@
|
||||
- **Nowness**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo.nl**
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKTV**
|
||||
- **NTV**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **ocw.mit.edu**
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **Ooyala**
|
||||
- **OpenFilm**
|
||||
- **orf:fm4**: radio FM4
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **ORFFM4**: radio FM4
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **PBS**
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
- **Pladform**
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **Playvid**
|
||||
- **Playwire**
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
- **PornHd**
|
||||
- **PornHub**
|
||||
- **PornHubPlaylist**
|
||||
- **Pornotube**
|
||||
- **PornoXO**
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
- **radiobremen**
|
||||
- **radiofrance**
|
||||
- **Rai**
|
||||
- **RBMARadio**
|
||||
@ -300,18 +364,23 @@
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **RTBF**
|
||||
- **Rte**
|
||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||
- **RTL2**
|
||||
- **RTLnow**
|
||||
- **rtlxl.nl**
|
||||
- **RTP**
|
||||
- **RTS**: RTS.ch
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
- **rutube:embed**: Rutube embedded videos
|
||||
- **rutube:movie**: Rutube movies
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Sandia**: Sandia National Laboratories
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@ -339,7 +408,8 @@
|
||||
- **soundcloud:playlist**
|
||||
- **soundcloud:set**
|
||||
- **soundcloud:user**
|
||||
- **Soundgasm**
|
||||
- **soundgasm**
|
||||
- **soundgasm:profile**
|
||||
- **southpark.cc.com**
|
||||
- **southpark.de**
|
||||
- **Space**
|
||||
@ -351,12 +421,15 @@
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **SRMediathek**: Süddeutscher Rundfunk
|
||||
- **SRMediathek**: Saarländischer Rundfunk
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
- **SunPorno**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
@ -375,7 +448,9 @@
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **TenPlay**
|
||||
- **TestTube**
|
||||
- **TF1**
|
||||
- **TheOnion**
|
||||
- **ThePlatform**
|
||||
@ -401,13 +476,23 @@
|
||||
- **Turbo**
|
||||
- **Tutv**
|
||||
- **tv.dfb.de**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvp.pl**
|
||||
- **tvp.pl:Series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Twitch**
|
||||
- **Tweakers**
|
||||
- **twitch:bookmarks**
|
||||
- **twitch:chapter**
|
||||
- **twitch:past_broadcasts**
|
||||
- **twitch:profile**
|
||||
- **twitch:stream**
|
||||
- **twitch:video**
|
||||
- **twitch:vod**
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **Ultimedia**
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **ustream**
|
||||
@ -433,6 +518,9 @@
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidme**
|
||||
- **Vidzi**
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
- **Viewster**
|
||||
- **viki**
|
||||
- **vimeo**
|
||||
- **vimeo:album**
|
||||
@ -460,11 +548,13 @@
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
- **WebOfStories**
|
||||
- **Weibo**
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **WorldStarHipHop**
|
||||
- **wrzuta.pl**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XHamster**
|
||||
@ -472,8 +562,14 @@
|
||||
- **XNXX**
|
||||
- **XTube**
|
||||
- **XTubeUser**: XTube user profile
|
||||
- **Xuite**
|
||||
- **XVideos**
|
||||
- **XXXYMovies**
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **Yam**
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YesJapan**
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
@ -491,9 +587,9 @@
|
||||
- **youtube:search_url**: YouTube.com search URLs
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **Zapiks**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3:album**: mp3.zing.vn albums
|
||||
|
@ -3,4 +3,4 @@ universal = True
|
||||
|
||||
[flake8]
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
|
||||
ignore = E501
|
||||
ignore = E402,E501,E731
|
||||
|
@ -103,6 +103,26 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
self.assertTrue(
|
||||
match_rex.match(got),
|
||||
'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||
elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
|
||||
got = got_dict.get(info_field)
|
||||
start_str = expected[len('startswith:'):]
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str),
|
||||
'Expected a %s object, but got %s for field %s' % (
|
||||
compat_str.__name__, type(got).__name__, info_field))
|
||||
self.assertTrue(
|
||||
got.startswith(start_str),
|
||||
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
|
||||
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
|
||||
got = got_dict.get(info_field)
|
||||
contains_str = expected[len('contains:'):]
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str),
|
||||
'Expected a %s object, but got %s for field %s' % (
|
||||
compat_str.__name__, type(got).__name__, info_field))
|
||||
self.assertTrue(
|
||||
contains_str in got,
|
||||
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
|
||||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
@ -153,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
info_dict_str += ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||
info_dict_str += '\n'
|
||||
|
||||
if info_dict_str:
|
||||
info_dict_str += '\n'
|
||||
info_dict_str += ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
||||
for k in missing_keys)
|
||||
write_string(
|
||||
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
||||
'\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
|
||||
self.assertFalse(
|
||||
missing_keys,
|
||||
'Missing keys in test definition: %s' % (
|
||||
|
@ -28,7 +28,7 @@
|
||||
"retries": 10,
|
||||
"simulate": false,
|
||||
"subtitleslang": null,
|
||||
"subtitlesformat": "srt",
|
||||
"subtitlesformat": "best",
|
||||
"test": true,
|
||||
"updatetime": true,
|
||||
"usenetrc": false,
|
||||
@ -39,5 +39,6 @@
|
||||
"writesubtitles": false,
|
||||
"allsubtitles": false,
|
||||
"listssubtitles": false,
|
||||
"socket_timeout": 20
|
||||
"socket_timeout": 20,
|
||||
"fixup": "never"
|
||||
}
|
||||
|
@ -13,6 +13,10 @@ import copy
|
||||
from test.helper import FakeYDL, assertRegexpMatches
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.postprocessor.common import PostProcessor
|
||||
from youtube_dl.utils import match_filter_func
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
|
||||
class YDL(FakeYDL):
|
||||
@ -45,8 +49,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 460, 'url': 'x'},
|
||||
{'ext': 'mp4', 'height': 460, 'url': 'y'},
|
||||
{'ext': 'webm', 'height': 460, 'url': TEST_URL},
|
||||
{'ext': 'mp4', 'height': 460, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
yie = YoutubeIE(ydl)
|
||||
@ -59,8 +63,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 720, 'url': 'a'},
|
||||
{'ext': 'mp4', 'height': 1080, 'url': 'b'},
|
||||
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||
{'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@ -73,9 +77,9 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
||||
{'ext': 'mp4', 'height': 720, 'url': '_'},
|
||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
||||
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||
{'ext': 'mp4', 'height': 720, 'url': TEST_URL},
|
||||
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@ -87,8 +91,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{'ext': 'flv', 'height': 720, 'url': '_'},
|
||||
{'ext': 'webm', 'height': 720, 'url': '_'},
|
||||
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
|
||||
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
|
||||
]
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
@ -132,10 +136,10 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
@ -166,10 +170,10 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection_audio(self):
|
||||
formats = [
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
@ -184,8 +188,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
self.assertEqual(downloaded['format_id'], 'audio-low')
|
||||
|
||||
formats = [
|
||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
|
||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
|
||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
@ -227,9 +231,9 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_format_selection_video(self):
|
||||
formats = [
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
@ -336,6 +340,67 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'G')
|
||||
|
||||
|
||||
class TestYoutubeDL(unittest.TestCase):
|
||||
def test_subtitles(self):
|
||||
def s_formats(lang, autocaption=False):
|
||||
return [{
|
||||
'ext': ext,
|
||||
'url': 'http://localhost/video.%s.%s' % (lang, ext),
|
||||
'_auto': autocaption,
|
||||
} for ext in ['vtt', 'srt', 'ass']]
|
||||
subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
|
||||
auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
|
||||
info_dict = {
|
||||
'id': 'test',
|
||||
'title': 'Test',
|
||||
'url': 'http://localhost/video.mp4',
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': auto_captions,
|
||||
'extractor': 'TEST',
|
||||
}
|
||||
|
||||
def get_info(params={}):
|
||||
params.setdefault('simulate', True)
|
||||
ydl = YDL(params)
|
||||
ydl.report_warning = lambda *args, **kargs: None
|
||||
return ydl.process_video_result(info_dict, download=False)
|
||||
|
||||
result = get_info()
|
||||
self.assertFalse(result.get('requested_subtitles'))
|
||||
self.assertEqual(result['subtitles'], subtitles)
|
||||
self.assertEqual(result['automatic_captions'], auto_captions)
|
||||
|
||||
result = get_info({'writesubtitles': True})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['en']))
|
||||
self.assertTrue(subs['en'].get('data') is None)
|
||||
self.assertEqual(subs['en']['ext'], 'ass')
|
||||
|
||||
result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertEqual(subs['en']['ext'], 'srt')
|
||||
|
||||
result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
|
||||
|
||||
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
|
||||
self.assertFalse(subs['es']['_auto'])
|
||||
self.assertTrue(subs['pt']['_auto'])
|
||||
|
||||
result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||
subs = result['requested_subtitles']
|
||||
self.assertTrue(subs)
|
||||
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
|
||||
self.assertTrue(subs['es']['_auto'])
|
||||
self.assertTrue(subs['pt']['_auto'])
|
||||
|
||||
def test_add_extra_info(self):
|
||||
test_dict = {
|
||||
'extractor': 'Foo',
|
||||
@ -370,5 +435,102 @@ class TestFormatSelection(unittest.TestCase):
|
||||
'vbr': 10,
|
||||
}), '^\s*10k$')
|
||||
|
||||
def test_postprocessors(self):
|
||||
filename = 'post-processor-testfile.mp4'
|
||||
audiofile = filename + '.mp3'
|
||||
|
||||
class SimplePP(PostProcessor):
|
||||
def run(self, info):
|
||||
with open(audiofile, 'wt') as f:
|
||||
f.write('EXAMPLE')
|
||||
info['filepath']
|
||||
return False, info
|
||||
|
||||
def run_pp(params):
|
||||
with open(filename, 'wt') as f:
|
||||
f.write('EXAMPLE')
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_post_processor(SimplePP())
|
||||
ydl.post_process(filename, {'filepath': filename})
|
||||
|
||||
run_pp({'keepvideo': True})
|
||||
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
|
||||
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||
os.unlink(filename)
|
||||
os.unlink(audiofile)
|
||||
|
||||
run_pp({'keepvideo': False})
|
||||
self.assertFalse(os.path.exists(filename), '%s exists' % filename)
|
||||
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||
os.unlink(audiofile)
|
||||
|
||||
def test_match_filter(self):
|
||||
class FilterYDL(YDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(FilterYDL, self).__init__(*args, **kwargs)
|
||||
self.params['simulate'] = True
|
||||
|
||||
def process_info(self, info_dict):
|
||||
super(YDL, self).process_info(info_dict)
|
||||
|
||||
def _match_entry(self, info_dict, incomplete):
|
||||
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
|
||||
if res is None:
|
||||
self.downloaded_info_dicts.append(info_dict)
|
||||
return res
|
||||
|
||||
first = {
|
||||
'id': '1',
|
||||
'url': TEST_URL,
|
||||
'title': 'one',
|
||||
'extractor': 'TEST',
|
||||
'duration': 30,
|
||||
'filesize': 10 * 1024,
|
||||
}
|
||||
second = {
|
||||
'id': '2',
|
||||
'url': TEST_URL,
|
||||
'title': 'two',
|
||||
'extractor': 'TEST',
|
||||
'duration': 10,
|
||||
'description': 'foo',
|
||||
'filesize': 5 * 1024,
|
||||
}
|
||||
videos = [first, second]
|
||||
|
||||
def get_videos(filter_=None):
|
||||
ydl = FilterYDL({'match_filter': filter_})
|
||||
for v in videos:
|
||||
ydl.process_ie_result(v, download=True)
|
||||
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||
|
||||
res = get_videos()
|
||||
self.assertEqual(res, ['1', '2'])
|
||||
|
||||
def f(v):
|
||||
if v['id'] == '1':
|
||||
return None
|
||||
else:
|
||||
return 'Video id is not 1'
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('duration < 30')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['2'])
|
||||
|
||||
f = match_filter_func('description = foo')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['2'])
|
||||
|
||||
f = match_filter_func('description =? foo')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1', '2'])
|
||||
|
||||
f = match_filter_func('filesize > 5KiB')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
55
test/test_aes.py
Normal file
55
test/test_aes.py
Normal file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
|
||||
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||
import base64
|
||||
|
||||
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
|
||||
|
||||
|
||||
class TestAES(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.key = self.iv = [0x20, 0x15] + 14 * [0]
|
||||
self.secret_msg = b'Secret message goes here'
|
||||
|
||||
def test_encrypt(self):
|
||||
msg = b'message'
|
||||
key = list(range(16))
|
||||
encrypted = aes_encrypt(bytes_to_intlist(msg), key)
|
||||
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
|
||||
self.assertEqual(decrypted, msg)
|
||||
|
||||
def test_cbc_decrypt(self):
|
||||
data = bytes_to_intlist(
|
||||
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
|
||||
)
|
||||
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
|
||||
def test_decrypt_text(self):
|
||||
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||
encrypted = base64.b64encode(
|
||||
intlist_to_bytes(self.iv[:8]) +
|
||||
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
|
||||
)
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||
encrypted = base64.b64encode(
|
||||
intlist_to_bytes(self.iv[:8]) +
|
||||
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
|
||||
)
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||
|
||||
def test_vimeo_matching(self):
|
||||
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
|
||||
self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
|
||||
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
||||
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
|
||||
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
|
||||
self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
|
||||
self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
|
||||
self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
|
||||
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
||||
|
||||
# https://github.com/rg3/youtube-dl/issues/1930
|
||||
|
@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import unittest
|
||||
@ -27,5 +29,12 @@ class TestExecution(unittest.TestCase):
|
||||
def test_main_exec(self):
|
||||
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
|
||||
def test_cmdline_umlauts(self):
|
||||
p = subprocess.Popen(
|
||||
[sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'],
|
||||
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
self.assertFalse(stderr)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -8,7 +8,7 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
proxy_name = name
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
|
||||
return HTTPTestRequestHandler
|
||||
|
||||
|
||||
class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('normal'))
|
||||
self.port = self.proxy.socket.getsockname()[1]
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.cn_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('cn'))
|
||||
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||
self.cn_proxy_thread.daemon = True
|
||||
self.cn_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'cn_verification_proxy': cn_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
response = ydl.urlopen(url).read().decode('utf-8')
|
||||
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Ytdl-request-proxy', cn_proxy)
|
||||
response = ydl.urlopen(req).read().decode('utf-8')
|
||||
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self.assertEqual(jsi.call_function('f'), -11)
|
||||
|
||||
def test_comments(self):
|
||||
'Skipping: Not yet fully implemented'
|
||||
return
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
var x = /* 1 + */ 2;
|
||||
@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 52)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function f() {
|
||||
var x = "/*";
|
||||
var y = 1 /* comment */ + 2;
|
||||
return y;
|
||||
}
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('f'), 3)
|
||||
|
||||
def test_precedence(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
|
26
test/test_netrc.py
Normal file
26
test/test_netrc.py
Normal file
@ -0,0 +1,26 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
gen_extractors,
|
||||
)
|
||||
|
||||
|
||||
class TestNetRc(unittest.TestCase):
|
||||
def test_netrc_present(self):
|
||||
for ie in gen_extractors():
|
||||
if not hasattr(ie, '_login'):
|
||||
continue
|
||||
self.assertTrue(
|
||||
hasattr(ie, '_NETRC_MACHINE'),
|
||||
'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
17
test/test_postprocessors.py
Normal file
17
test/test_postprocessors.py
Normal file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.postprocessor import MetadataFromTitlePP
|
||||
|
||||
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
@ -18,6 +18,15 @@ from youtube_dl.extractor import (
|
||||
VimeoIE,
|
||||
WallaIE,
|
||||
CeskaTelevizeIE,
|
||||
LyndaIE,
|
||||
NPOIE,
|
||||
ComedyCentralIE,
|
||||
NRKTVIE,
|
||||
RaiIE,
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
)
|
||||
|
||||
|
||||
@ -27,42 +36,38 @@ class BaseTestSubtitles(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.DL = FakeYDL()
|
||||
self.ie = self.IE(self.DL)
|
||||
self.ie = self.IE()
|
||||
self.DL.add_info_extractor(self.ie)
|
||||
|
||||
def getInfoDict(self):
|
||||
info_dict = self.ie.extract(self.url)
|
||||
info_dict = self.DL.extract_info(self.url, download=False)
|
||||
return info_dict
|
||||
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict['subtitles']
|
||||
subtitles = info_dict['requested_subtitles']
|
||||
if not subtitles:
|
||||
return subtitles
|
||||
for sub_info in subtitles.values():
|
||||
if sub_info.get('data') is None:
|
||||
uf = self.DL.urlopen(sub_info['url'])
|
||||
sub_info['data'] = uf.read().decode('utf-8')
|
||||
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
|
||||
|
||||
|
||||
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
url = 'QRS8MkLhQmM'
|
||||
IE = YoutubeIE
|
||||
|
||||
def test_youtube_no_writesubtitles(self):
|
||||
self.DL.params['writesubtitles'] = False
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_youtube_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
|
||||
def test_youtube_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
|
||||
def test_youtube_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
for lang in ['it', 'fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
@ -76,12 +81,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||
|
||||
def test_youtube_list_subtitles(self):
|
||||
self.DL.expect_warning('Video doesn\'t have automatic captions')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_youtube_automatic_captions(self):
|
||||
self.url = '8YoUxe5ncPo'
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
@ -103,55 +102,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_youtube_multiple_langs(self):
|
||||
self.url = 'QRS8MkLhQmM'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['it', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.dailymotion.com/video/xczg00'
|
||||
IE = DailymotionIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 5)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
self.assertTrue(len(subtitles.keys()) >= 6)
|
||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||
for lang in ['es', 'fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
@ -159,61 +125,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestTedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||
IE = TEDIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) >= 28)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
|
||||
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
|
||||
for lang in ['es', 'fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
||||
@ -221,14 +147,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
|
||||
url = 'http://blip.tv/a/a-6603250'
|
||||
IE = BlipTVIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
@ -240,39 +159,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vimeo.com/76979871'
|
||||
IE = VimeoIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
@ -280,27 +173,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestWallaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
||||
IE = WallaIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
@ -315,26 +194,20 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||
IE = CeskaTelevizeIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
||||
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
|
||||
self.assertTrue(len(subtitles['cs']) > 20000)
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
@ -342,7 +215,122 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
class TestLyndaSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
|
||||
IE = LyndaIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||
|
||||
|
||||
class TestNPOSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
||||
IE = NPOIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['nl']))
|
||||
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
|
||||
|
||||
|
||||
class TestMTVSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
|
||||
IE = ComedyCentralIE
|
||||
|
||||
def getInfoDict(self):
|
||||
return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
|
||||
|
||||
|
||||
class TestNRKSubtitles(BaseTestSubtitles):
|
||||
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
|
||||
IE = NRKTVIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['no']))
|
||||
self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
|
||||
|
||||
|
||||
class TestRaiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
IE = RaiIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['it']))
|
||||
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
|
||||
|
||||
|
||||
class TestVikiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||
IE = VikiIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
|
||||
|
||||
|
||||
class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
|
||||
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
|
||||
url = 'theplatform:JFUjUE1_ehvq'
|
||||
IE = ThePlatformIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||
|
||||
|
||||
class TestRtveSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||
IE = RTVEALaCartaIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
print('Skipping, only available from Spain')
|
||||
return
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['es']))
|
||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||
|
||||
|
||||
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
||||
IE = FunnyOrDieIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -34,8 +34,8 @@ def _make_testfunc(testfile):
|
||||
def test_func(self):
|
||||
as_file = os.path.join(TEST_DIR, testfile)
|
||||
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
|
||||
if ((not os.path.exists(swf_file))
|
||||
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
||||
if ((not os.path.exists(swf_file)) or
|
||||
os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
||||
# Recompile
|
||||
try:
|
||||
subprocess.check_call([
|
||||
|
@ -17,13 +17,22 @@ IGNORED_FILES = [
|
||||
'buildserver.py',
|
||||
]
|
||||
|
||||
IGNORED_DIRS = [
|
||||
'.git',
|
||||
'.tox',
|
||||
]
|
||||
|
||||
from test.helper import assertRegexpMatches
|
||||
|
||||
|
||||
class TestUnicodeLiterals(unittest.TestCase):
|
||||
def test_all_files(self):
|
||||
for dirpath, _, filenames in os.walk(rootDir):
|
||||
for dirpath, dirnames, filenames in os.walk(rootDir):
|
||||
for ignore_dir in IGNORED_DIRS:
|
||||
if ignore_dir in dirnames:
|
||||
# If we remove the directory from dirnames os.walk won't
|
||||
# recurse into it
|
||||
dirnames.remove(ignore_dir)
|
||||
for basename in filenames:
|
||||
if not basename.endswith('.py'):
|
||||
continue
|
||||
|
@ -24,6 +24,7 @@ from youtube_dl.utils import (
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
InAdvancePagedList,
|
||||
@ -38,6 +39,8 @@ from youtube_dl.utils import (
|
||||
parse_iso8601,
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url_path_consecutive_slashes,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
@ -52,7 +55,9 @@ from youtube_dl.utils import (
|
||||
urlencode_postdata,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
render_table,
|
||||
match_str,
|
||||
)
|
||||
|
||||
|
||||
@ -85,6 +90,11 @@ class TestUtil(unittest.TestCase):
|
||||
sanitize_filename('New World record at 0:12:34'),
|
||||
'New World record at 0_12_34')
|
||||
|
||||
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
|
||||
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
|
||||
self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
|
||||
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
|
||||
|
||||
forbidden = '"\0\\/'
|
||||
for fc in forbidden:
|
||||
for fbc in forbidden:
|
||||
@ -125,6 +135,62 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||
|
||||
def test_sanitize_path(self):
|
||||
if sys.platform != 'win32':
|
||||
return
|
||||
|
||||
self.assertEqual(sanitize_path('abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
||||
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
||||
self.assertEqual(sanitize_path('abc|def'), 'abc#def')
|
||||
self.assertEqual(sanitize_path('<>:"|?*'), '#######')
|
||||
self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
|
||||
self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
|
||||
|
||||
self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||
self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
|
||||
|
||||
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||
self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
|
||||
self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
|
||||
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
|
||||
|
||||
self.assertEqual(
|
||||
sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
|
||||
'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
|
||||
|
||||
self.assertEqual(
|
||||
sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
|
||||
'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
|
||||
self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
|
||||
self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
|
||||
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
|
||||
|
||||
self.assertEqual(sanitize_path('../abc'), '..\\abc')
|
||||
self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
|
||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
def test_sanitize_url_path_consecutive_slashes(self):
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||
'http://hostname/abc/')
|
||||
|
||||
def test_ordered_set(self):
|
||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||
self.assertEqual(orderedSet([]), [])
|
||||
@ -156,6 +222,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(
|
||||
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
|
||||
'20141126')
|
||||
self.assertEqual(
|
||||
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||
'20150202')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
@ -183,6 +252,17 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||
|
||||
def test_xpath_text(self):
|
||||
testxml = '''<root>
|
||||
<div>
|
||||
<p>Foo</p>
|
||||
</div>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
||||
|
||||
def test_smuggle_url(self):
|
||||
data = {"ö": "ö", "abc": [3]}
|
||||
url = 'https://foo.bar/baz?x=y#a'
|
||||
@ -238,6 +318,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('5 s'), 5)
|
||||
self.assertEqual(parse_duration('3 min'), 180)
|
||||
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
||||
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
@ -364,6 +447,10 @@ class TestUtil(unittest.TestCase):
|
||||
"playlist":[{"controls":{"all":null}}]
|
||||
}''')
|
||||
|
||||
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
|
||||
json_code = js_to_json(inp)
|
||||
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
@ -371,6 +458,16 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{"abc": true}')
|
||||
self.assertEqual(json.loads(on), {'abc': True})
|
||||
|
||||
# Ignore JavaScript code as well
|
||||
on = js_to_json('''{
|
||||
"x": 1,
|
||||
y: "a",
|
||||
z: some.code
|
||||
}''')
|
||||
d = json.loads(on)
|
||||
self.assertEqual(d['x'], 1)
|
||||
self.assertEqual(d['y'], 'a')
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
@ -444,6 +541,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
'123 4\n'
|
||||
'9999 51')
|
||||
|
||||
def test_match_str(self):
|
||||
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
|
||||
self.assertFalse(match_str('xy', {'x': 1200}))
|
||||
self.assertTrue(match_str('!xy', {'x': 1200}))
|
||||
self.assertTrue(match_str('x', {'x': 1200}))
|
||||
self.assertFalse(match_str('!x', {'x': 1200}))
|
||||
self.assertTrue(match_str('x', {'x': 0}))
|
||||
self.assertFalse(match_str('x>0', {'x': 0}))
|
||||
self.assertFalse(match_str('x>0', {}))
|
||||
self.assertTrue(match_str('x>?0', {}))
|
||||
self.assertTrue(match_str('x>1K', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>2K', {'x': 1200}))
|
||||
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
|
||||
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
|
||||
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 90, 'description': 'foo'}))
|
||||
self.assertTrue(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
|
||||
self.assertFalse(match_str(
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 10}))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -8,11 +8,11 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import io
|
||||
import re
|
||||
import string
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||
|
||||
@ -64,6 +64,12 @@ _TESTS = [
|
||||
'js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||
'js',
|
||||
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||
)
|
||||
]
|
||||
|
||||
@ -88,7 +94,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
if not os.path.exists(fn):
|
||||
compat_urlretrieve(url, fn)
|
||||
|
||||
ie = YoutubeIE()
|
||||
ydl = FakeYDL()
|
||||
ie = YoutubeIE(ydl)
|
||||
if stype == 'js':
|
||||
with io.open(fn, encoding='utf-8') as testf:
|
||||
jscode = testf.read()
|
||||
|
7
tox.ini
7
tox.ini
@ -1,8 +1,11 @@
|
||||
[tox]
|
||||
envlist = py26,py27,py33
|
||||
envlist = py26,py27,py33,py34
|
||||
[testenv]
|
||||
deps =
|
||||
nose
|
||||
coverage
|
||||
commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
|
||||
--exclude test_subtitles.py --exclude test_write_annotations.py
|
||||
--exclude test_youtube_lists.py
|
||||
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||
# test.test_download:TestDownload.test_NowVideo
|
||||
|
@ -4,8 +4,10 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import collections
|
||||
import contextlib
|
||||
import datetime
|
||||
import errno
|
||||
import fileinput
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
@ -28,6 +30,7 @@ from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_get_terminal_size,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
@ -46,18 +49,19 @@ from .utils import (
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
get_term_width,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
PagedList,
|
||||
parse_filesize,
|
||||
PerRequestProxyHandler,
|
||||
PostProcessingError,
|
||||
platform_name,
|
||||
preferredencoding,
|
||||
render_table,
|
||||
SameFileError,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
std_headers,
|
||||
subtitles_filename,
|
||||
takewhile_inclusive,
|
||||
@ -154,7 +158,7 @@ class YoutubeDL(object):
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
(requires writesubtitles or writeautomaticsub)
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
|
||||
subtitlesformat: The format code for subtitles
|
||||
subtitleslangs: List of languages of the subtitles to download
|
||||
keepvideo: Keep the video file after post-processing
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
@ -181,6 +185,8 @@ class YoutubeDL(object):
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
proxy: URL of the proxy server to use
|
||||
cn_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on Chinese sites. (Experimental)
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
support, using fridibi
|
||||
@ -199,18 +205,25 @@ class YoutubeDL(object):
|
||||
postprocessor.
|
||||
progress_hooks: A list of functions that get called on download
|
||||
progress, with a dictionary with the entries
|
||||
* status: One of "downloading" and "finished".
|
||||
* status: One of "downloading", "error", or "finished".
|
||||
Check this first and ignore unknown values.
|
||||
|
||||
If status is one of "downloading" or "finished", the
|
||||
If status is one of "downloading", or "finished", the
|
||||
following properties may also be present:
|
||||
* filename: The final filename (always present)
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* downloaded_bytes: Bytes on disk
|
||||
* total_bytes: Size of the whole file, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* total_bytes_estimate: Guess of the eventual file size,
|
||||
None if unavailable.
|
||||
* elapsed: The number of seconds since download started.
|
||||
* eta: The estimated time in seconds, None if unknown
|
||||
* speed: The download speed in bytes/second, None if
|
||||
unknown
|
||||
* fragment_index: The counter of the currently
|
||||
downloaded video fragment.
|
||||
* fragment_count: The number of fragments (= individual
|
||||
files that will be merged)
|
||||
|
||||
Progress hooks are guaranteed to be called at least once
|
||||
(with status "finished") if the download is successful.
|
||||
@ -225,16 +238,25 @@ class YoutubeDL(object):
|
||||
call_home: Boolean, true iff we are allowed to contact the
|
||||
youtube-dl servers for debugging.
|
||||
sleep_interval: Number of seconds to sleep before each download.
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
listformats: Print an overview of available video formats and exit.
|
||||
list_thumbnails: Print a table of all thumbnails and exit.
|
||||
match_filter: A function that gets called with the info_dict of
|
||||
every video.
|
||||
If it returns a message, the video is ignored.
|
||||
If it returns None, the video is downloaded.
|
||||
match_filter_func in utils.py is one example for this.
|
||||
no_color: Do not emit color codes in output.
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
None or unset for standard (built-in) downloader.
|
||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
the downloader (see youtube_dl/downloader/common.py):
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||
xattr_set_filesize.
|
||||
xattr_set_filesize, external_downloader_args.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
@ -268,7 +290,7 @@ class YoutubeDL(object):
|
||||
try:
|
||||
import pty
|
||||
master, slave = pty.openpty()
|
||||
width = get_term_width()
|
||||
width = compat_get_terminal_size().columns
|
||||
if width is None:
|
||||
width_args = []
|
||||
else:
|
||||
@ -292,8 +314,8 @@ class YoutubeDL(object):
|
||||
raise
|
||||
|
||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||
and not params.get('restrictfilenames', False)):
|
||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
|
||||
not params.get('restrictfilenames', False)):
|
||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||
self.report_warning(
|
||||
'Assuming --restrict-filenames since file system encoding '
|
||||
@ -301,8 +323,10 @@ class YoutubeDL(object):
|
||||
'Set the LC_ALL environment variable to fix this.')
|
||||
self.params['restrictfilenames'] = True
|
||||
|
||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
if isinstance(params.get('outtmpl'), bytes):
|
||||
self.report_warning(
|
||||
'Parameter outtmpl is bytes, but should be a unicode string. '
|
||||
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
@ -485,7 +509,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
if self.params.get('no_warnings'):
|
||||
return
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
@ -497,7 +521,7 @@ class YoutubeDL(object):
|
||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||
in red if stderr is a tty file.
|
||||
'''
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
||||
else:
|
||||
_msg_header = 'ERROR:'
|
||||
@ -541,7 +565,7 @@ class YoutubeDL(object):
|
||||
if v is not None)
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
# Temporary fix for #4787
|
||||
@ -554,7 +578,7 @@ class YoutubeDL(object):
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict):
|
||||
def _match_entry(self, info_dict, incomplete):
|
||||
""" Returns None iff the file should be downloaded """
|
||||
|
||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||
@ -583,9 +607,17 @@ class YoutubeDL(object):
|
||||
if max_views is not None and view_count > max_views:
|
||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||
return 'Skipping "%s" because it is age restricted' % title
|
||||
return 'Skipping "%s" because it is age restricted' % video_title
|
||||
if self.in_download_archive(info_dict):
|
||||
return '%s has already been recorded in archive' % video_title
|
||||
|
||||
if not incomplete:
|
||||
match_filter = self.params.get('match_filter')
|
||||
if match_filter is not None:
|
||||
ret = match_filter(info_dict)
|
||||
if ret is not None:
|
||||
return ret
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@ -600,7 +632,7 @@ class YoutubeDL(object):
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
'''
|
||||
|
||||
if ie_key:
|
||||
ies = [self.get_info_extractor(ie_key)]
|
||||
@ -779,7 +811,7 @@ class YoutubeDL(object):
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry)
|
||||
reason = self._match_entry(entry, incomplete=True)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
@ -826,27 +858,44 @@ class YoutubeDL(object):
|
||||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*\[
|
||||
(?P<key>width|height|tbr|abr|vbr|filesize|fps)
|
||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||
\]$
|
||||
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||
m = operator_rex.search(format_spec)
|
||||
if m:
|
||||
try:
|
||||
comparison_value = int(m.group('value'))
|
||||
except ValueError:
|
||||
comparison_value = parse_filesize(m.group('value'))
|
||||
if comparison_value is None:
|
||||
comparison_value = parse_filesize(m.group('value') + 'B')
|
||||
if comparison_value is None:
|
||||
raise ValueError(
|
||||
'Invalid value %r in format specification %r' % (
|
||||
m.group('value'), format_spec))
|
||||
op = OPERATORS[m.group('op')]
|
||||
|
||||
if not m:
|
||||
STR_OPERATORS = {
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)\s*\[
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9_-]+)
|
||||
\s*\]$
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.search(format_spec)
|
||||
if m:
|
||||
comparison_value = m.group('value')
|
||||
op = STR_OPERATORS[m.group('op')]
|
||||
|
||||
if not m:
|
||||
raise ValueError('Invalid format specification %r' % format_spec)
|
||||
|
||||
try:
|
||||
comparison_value = int(m.group('value'))
|
||||
except ValueError:
|
||||
comparison_value = parse_filesize(m.group('value'))
|
||||
if comparison_value is None:
|
||||
comparison_value = parse_filesize(m.group('value') + 'B')
|
||||
if comparison_value is None:
|
||||
raise ValueError(
|
||||
'Invalid value %r in format specification %r' % (
|
||||
m.group('value'), format_spec))
|
||||
op = OPERATORS[m.group('op')]
|
||||
|
||||
def _filter(f):
|
||||
actual_value = f.get(m.group('key'))
|
||||
if actual_value is None:
|
||||
@ -920,27 +969,9 @@ class YoutubeDL(object):
|
||||
return res
|
||||
|
||||
def _calc_cookies(self, info_dict):
|
||||
class _PseudoRequest(object):
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.headers = {}
|
||||
self.unverifiable = False
|
||||
|
||||
def add_unredirected_header(self, k, v):
|
||||
self.headers[k] = v
|
||||
|
||||
def get_full_url(self):
|
||||
return self.url
|
||||
|
||||
def is_unverifiable(self):
|
||||
return self.unverifiable
|
||||
|
||||
def has_header(self, h):
|
||||
return h in self.headers
|
||||
|
||||
pr = _PseudoRequest(info_dict['url'])
|
||||
pr = compat_urllib_request.Request(info_dict['url'])
|
||||
self.cookiejar.add_cookie_header(pr)
|
||||
return pr.headers.get('Cookie')
|
||||
return pr.get_header('Cookie')
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
@ -964,9 +995,11 @@ class YoutubeDL(object):
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
t.get('id'), t.get('url')))
|
||||
for t in thumbnails:
|
||||
for i, t in enumerate(thumbnails):
|
||||
if 'width' in t and 'height' in t:
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
if t.get('id') is None:
|
||||
t['id'] = '%d' % i
|
||||
|
||||
if thumbnails and 'thumbnail' not in info_dict:
|
||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||
@ -983,6 +1016,15 @@ class YoutubeDL(object):
|
||||
info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
|
||||
if self.params.get('listsubtitles', False):
|
||||
if 'automatic_captions' in info_dict:
|
||||
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
|
||||
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
|
||||
return
|
||||
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||
info_dict['id'], info_dict.get('subtitles'),
|
||||
info_dict.get('automatic_captions'))
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in ['Youku']:
|
||||
if download:
|
||||
@ -1046,8 +1088,7 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format = 'best'
|
||||
formats_to_download = []
|
||||
# The -1 is for supporting YoutubeIE
|
||||
if req_format in ('-1', 'all'):
|
||||
if req_format == 'all':
|
||||
formats_to_download = formats
|
||||
else:
|
||||
for rfstr in req_format.split(','):
|
||||
@ -1074,7 +1115,8 @@ class YoutubeDL(object):
|
||||
else self.params['merge_output_format'])
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'format': '%s+%s' % (formats_info[0].get('format'),
|
||||
formats_info[1].get('format')),
|
||||
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||
formats_info[1].get('format_id')),
|
||||
'width': formats_info[0].get('width'),
|
||||
@ -1110,6 +1152,55 @@ class YoutubeDL(object):
|
||||
info_dict.update(formats_to_download[-1])
|
||||
return info_dict
|
||||
|
||||
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||
"""Select the requested subtitles and their format"""
|
||||
available_subs = {}
|
||||
if normal_subtitles and self.params.get('writesubtitles'):
|
||||
available_subs.update(normal_subtitles)
|
||||
if automatic_captions and self.params.get('writeautomaticsub'):
|
||||
for lang, cap_info in automatic_captions.items():
|
||||
if lang not in available_subs:
|
||||
available_subs[lang] = cap_info
|
||||
|
||||
if (not self.params.get('writesubtitles') and not
|
||||
self.params.get('writeautomaticsub') or not
|
||||
available_subs):
|
||||
return None
|
||||
|
||||
if self.params.get('allsubtitles', False):
|
||||
requested_langs = available_subs.keys()
|
||||
else:
|
||||
if self.params.get('subtitleslangs', False):
|
||||
requested_langs = self.params.get('subtitleslangs')
|
||||
elif 'en' in available_subs:
|
||||
requested_langs = ['en']
|
||||
else:
|
||||
requested_langs = [list(available_subs.keys())[0]]
|
||||
|
||||
formats_query = self.params.get('subtitlesformat', 'best')
|
||||
formats_preference = formats_query.split('/') if formats_query else []
|
||||
subs = {}
|
||||
for lang in requested_langs:
|
||||
formats = available_subs.get(lang)
|
||||
if formats is None:
|
||||
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
|
||||
continue
|
||||
for ext in formats_preference:
|
||||
if ext == 'best':
|
||||
f = formats[-1]
|
||||
break
|
||||
matches = list(filter(lambda f: f['ext'] == ext, formats))
|
||||
if matches:
|
||||
f = matches[-1]
|
||||
break
|
||||
else:
|
||||
f = formats[-1]
|
||||
self.report_warning(
|
||||
'No subtitle format found matching "%s" for language %s, '
|
||||
'using %s' % (formats_query, lang, f['ext']))
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result."""
|
||||
|
||||
@ -1124,13 +1215,10 @@ class YoutubeDL(object):
|
||||
if len(info_dict['title']) > 200:
|
||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
|
||||
if 'format' not in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
reason = self._match_entry(info_dict)
|
||||
reason = self._match_entry(info_dict, incomplete=False)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
return
|
||||
@ -1172,7 +1260,7 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
try:
|
||||
dn = os.path.dirname(encodeFilename(filename))
|
||||
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
|
||||
if dn and not os.path.exists(dn):
|
||||
os.makedirs(dn)
|
||||
except (OSError, IOError) as err:
|
||||
@ -1212,15 +1300,23 @@ class YoutubeDL(object):
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
self.params.get('writeautomaticsub')])
|
||||
|
||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
if subtitles_are_requested and info_dict.get('requested_subtitles'):
|
||||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
# that way it will silently go on when used with unsupporting IE
|
||||
subtitles = info_dict['subtitles']
|
||||
sub_format = self.params.get('subtitlesformat', 'srt')
|
||||
for sub_lang in subtitles.keys():
|
||||
sub = subtitles[sub_lang]
|
||||
if sub is None:
|
||||
continue
|
||||
subtitles = info_dict['requested_subtitles']
|
||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
if sub_info.get('data') is not None:
|
||||
sub_data = sub_info['data']
|
||||
else:
|
||||
try:
|
||||
sub_data = ie._download_webpage(
|
||||
sub_info['url'], info_dict['id'], note=False)
|
||||
except ExtractorError as err:
|
||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||
(sub_lang, compat_str(err.cause)))
|
||||
continue
|
||||
try:
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
@ -1228,7 +1324,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
subfile.write(sub_data)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
return
|
||||
@ -1261,7 +1357,7 @@ class YoutubeDL(object):
|
||||
downloaded = []
|
||||
success = True
|
||||
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
||||
if not merger._executable:
|
||||
if not merger.available:
|
||||
postprocessors = []
|
||||
self.report_warning('You have requested multiple '
|
||||
'formats but ffmpeg or avconv are not installed.'
|
||||
@ -1340,8 +1436,8 @@ class YoutubeDL(object):
|
||||
"""Download a given list of URLs."""
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
if (len(url_list) > 1 and
|
||||
'%' not in outtmpl
|
||||
and self.params.get('max_downloads') != 1):
|
||||
'%' not in outtmpl and
|
||||
self.params.get('max_downloads') != 1):
|
||||
raise SameFileError(outtmpl)
|
||||
|
||||
for url in url_list:
|
||||
@ -1360,8 +1456,11 @@ class YoutubeDL(object):
|
||||
return self._download_retcode
|
||||
|
||||
def download_with_info_file(self, info_filename):
|
||||
with io.open(info_filename, 'r', encoding='utf-8') as f:
|
||||
info = json.load(f)
|
||||
with contextlib.closing(fileinput.FileInput(
|
||||
[info_filename], mode='r',
|
||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
||||
# FileInput doesn't have a read method, we can't call json.load
|
||||
info = json.loads('\n'.join(f))
|
||||
try:
|
||||
self.process_ie_result(info, download=True)
|
||||
except DownloadError:
|
||||
@ -1508,30 +1607,18 @@ class YoutubeDL(object):
|
||||
return res
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def line(format, idlen=20):
|
||||
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
|
||||
format['format_id'],
|
||||
format['ext'],
|
||||
self.format_resolution(format),
|
||||
self._format_note(format),
|
||||
))
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
idlen = max(len('format code'),
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [
|
||||
line(f, idlen) for f in formats
|
||||
table = [
|
||||
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
|
||||
for f in formats
|
||||
if f.get('preference') is None or f['preference'] >= -1000]
|
||||
if len(formats) > 1:
|
||||
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
||||
table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
|
||||
|
||||
header_line = line({
|
||||
'format_id': 'format code', 'ext': 'extension',
|
||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
||||
header_line = ['format code', 'extension', 'resolution', 'note']
|
||||
self.to_screen(
|
||||
'[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
||||
'[info] Available formats for %s:\n%s' %
|
||||
(info_dict['id'], render_table(header_line, table)))
|
||||
|
||||
def list_thumbnails(self, info_dict):
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
@ -1550,6 +1637,17 @@ class YoutubeDL(object):
|
||||
['ID', 'width', 'height', 'URL'],
|
||||
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
||||
|
||||
def list_subtitles(self, video_id, subtitles, name='subtitles'):
|
||||
if not subtitles:
|
||||
self.to_screen('%s has no %s' % (video_id, name))
|
||||
return
|
||||
self.to_screen(
|
||||
'Available %s for %s:' % (name, video_id))
|
||||
self.to_screen(render_table(
|
||||
['Language', 'formats'],
|
||||
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
|
||||
for lang, formats in subtitles.items()]))
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
|
||||
@ -1611,7 +1709,7 @@ class YoutubeDL(object):
|
||||
self._write_string('[debug] Python version %s - %s\n' % (
|
||||
platform.python_version(), platform_name()))
|
||||
|
||||
exe_versions = FFmpegPostProcessor.get_versions()
|
||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
exe_str = ', '.join(
|
||||
'%s %s' % (exe, v)
|
||||
@ -1666,13 +1764,14 @@ class YoutubeDL(object):
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
proxy_handler = PerRequestProxyHandler(proxies)
|
||||
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
opener = compat_urllib_request.build_opener(
|
||||
https_handler, proxy_handler, cookie_processor, ydlh)
|
||||
proxy_handler, https_handler, cookie_processor, ydlh)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
|
@ -9,6 +9,7 @@ import codecs
|
||||
import io
|
||||
import os
|
||||
import random
|
||||
import shlex
|
||||
import sys
|
||||
|
||||
|
||||
@ -23,9 +24,10 @@ from .compat import (
|
||||
)
|
||||
from .utils import (
|
||||
DateRange,
|
||||
DEFAULT_OUTTMPL,
|
||||
decodeOption,
|
||||
DEFAULT_OUTTMPL,
|
||||
DownloadError,
|
||||
match_filter_func,
|
||||
MaxDownloadsReached,
|
||||
preferredencoding,
|
||||
read_batch_urls,
|
||||
@ -169,6 +171,9 @@ def _real_main(argv=None):
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
||||
parser.error('invalid video recode format specified')
|
||||
if opts.convertsubtitles is not None:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
||||
parser.error('invalid subtitle format specified')
|
||||
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
@ -188,14 +193,14 @@ def _real_main(argv=None):
|
||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||
if opts.outtmpl is not None:
|
||||
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
|
||||
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
|
||||
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
|
||||
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
|
||||
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
|
||||
or (opts.useid and '%(id)s.%(ext)s')
|
||||
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
|
||||
or DEFAULT_OUTTMPL)
|
||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
|
||||
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
|
||||
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
|
||||
(opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
|
||||
(opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
|
||||
(opts.useid and '%(id)s.%(ext)s') or
|
||||
(opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
|
||||
DEFAULT_OUTTMPL)
|
||||
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||
parser.error('Cannot download a video and extract audio into the same'
|
||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||
@ -208,6 +213,11 @@ def _real_main(argv=None):
|
||||
# PostProcessors
|
||||
postprocessors = []
|
||||
# Add the metadata pp first, the other pps will copy it
|
||||
if opts.metafromtitle:
|
||||
postprocessors.append({
|
||||
'key': 'MetadataFromTitle',
|
||||
'titleformat': opts.metafromtitle
|
||||
})
|
||||
if opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||
if opts.extractaudio:
|
||||
@ -222,10 +232,14 @@ def _real_main(argv=None):
|
||||
'key': 'FFmpegVideoConvertor',
|
||||
'preferedformat': opts.recodevideo,
|
||||
})
|
||||
if opts.convertsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
})
|
||||
if opts.embedsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
})
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
@ -247,6 +261,12 @@ def _real_main(argv=None):
|
||||
xattr # Confuse flake8
|
||||
except ImportError:
|
||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||
external_downloader_args = None
|
||||
if opts.external_downloader_args:
|
||||
external_downloader_args = shlex.split(opts.external_downloader_args)
|
||||
match_filter = (
|
||||
None if opts.match_filter is None
|
||||
else match_filter_func(opts.match_filter))
|
||||
|
||||
ydl_opts = {
|
||||
'usenetrc': opts.usenetrc,
|
||||
@ -344,6 +364,12 @@ def _real_main(argv=None):
|
||||
'list_thumbnails': opts.list_thumbnails,
|
||||
'playlist_items': opts.playlist_items,
|
||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||
'match_filter': match_filter,
|
||||
'no_color': opts.no_color,
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||
|
||||
import base64
|
||||
from math import ceil
|
||||
|
||||
@ -329,3 +327,5 @@ def inc(data):
|
||||
data[i] = data[i] + 1
|
||||
break
|
||||
return data
|
||||
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||
|
@ -1,9 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import collections
|
||||
import getpass
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
@ -364,6 +366,33 @@ def workaround_optparse_bug9161():
|
||||
return real_add_option(self, *bargs, **bkwargs)
|
||||
optparse.OptionGroup.add_option = _compat_add_option
|
||||
|
||||
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
|
||||
compat_get_terminal_size = shutil.get_terminal_size
|
||||
else:
|
||||
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
|
||||
|
||||
def compat_get_terminal_size():
|
||||
columns = compat_getenv('COLUMNS', None)
|
||||
if columns:
|
||||
columns = int(columns)
|
||||
else:
|
||||
columns = None
|
||||
lines = compat_getenv('LINES', None)
|
||||
if lines:
|
||||
lines = int(lines)
|
||||
else:
|
||||
lines = None
|
||||
|
||||
try:
|
||||
sp = subprocess.Popen(
|
||||
['stty', 'size'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = sp.communicate()
|
||||
lines, columns = map(int, out.split())
|
||||
except:
|
||||
pass
|
||||
return _terminal_size(columns, lines)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'compat_HTTPError',
|
||||
@ -371,6 +400,7 @@ __all__ = [
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_expanduser',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
'compat_getpass',
|
||||
'compat_html_entities',
|
||||
|
@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
if ed.supports(info_dict):
|
||||
return ed
|
||||
|
||||
if protocol == 'm3u8' and params.get('hls_prefer_native'):
|
||||
return NativeHlsFD
|
||||
|
||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
@ -42,6 +42,8 @@ class FileDownloader(object):
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
(experimenatal)
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
@ -54,6 +56,7 @@ class FileDownloader(object):
|
||||
self.ydl = ydl
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
self.add_progress_hook(self.report_progress)
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
@ -226,42 +229,64 @@ class FileDownloader(object):
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title('youtube-dl ' + msg)
|
||||
|
||||
def report_progress(self, percent, data_len_str, speed, eta):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
def report_progress(self, s):
|
||||
if s['status'] == 'finished':
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen('[download] Download completed')
|
||||
else:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
if s.get('elapsed') is not None:
|
||||
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
|
||||
else:
|
||||
msg_template = '100%% of %(_total_bytes_str)s'
|
||||
self._report_progress_status(
|
||||
msg_template % s, is_last_line=True)
|
||||
|
||||
if self.params.get('noprogress'):
|
||||
return
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
eta_str = 'Unknown ETA'
|
||||
if percent is not None:
|
||||
percent_str = self.format_percent(percent)
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
|
||||
msg = ('%s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
||||
if self.params.get('noprogress', False):
|
||||
if s['status'] != 'downloading':
|
||||
return
|
||||
downloaded_str = format_bytes(downloaded_data_len)
|
||||
speed_str = self.format_speed(speed)
|
||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
||||
msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen('[download] Download completed')
|
||||
if s.get('eta') is not None:
|
||||
s['_eta_str'] = self.format_eta(s['eta'])
|
||||
else:
|
||||
self._report_progress_status(
|
||||
('100%% of %s in %s' %
|
||||
(data_len_str, self.format_seconds(tot_time))),
|
||||
is_last_line=True)
|
||||
s['_eta_str'] = 'Unknown ETA'
|
||||
|
||||
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
|
||||
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
|
||||
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
|
||||
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
|
||||
else:
|
||||
if s.get('downloaded_bytes') == 0:
|
||||
s['_percent_str'] = self.format_percent(0)
|
||||
else:
|
||||
s['_percent_str'] = 'Unknown %'
|
||||
|
||||
if s.get('speed') is not None:
|
||||
s['_speed_str'] = self.format_speed(s['speed'])
|
||||
else:
|
||||
s['_speed_str'] = 'Unknown speed'
|
||||
|
||||
if s.get('total_bytes') is not None:
|
||||
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||
elif s.get('total_bytes_estimate') is not None:
|
||||
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
|
||||
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||
else:
|
||||
if s.get('downloaded_bytes') is not None:
|
||||
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
|
||||
if s.get('elapsed'):
|
||||
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
|
||||
else:
|
||||
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
|
||||
else:
|
||||
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
|
||||
|
||||
self._report_progress_status(msg_template % s)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
@ -288,14 +313,14 @@ class FileDownloader(object):
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
self.params.get('nooverwrites', False)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
self.params.get('nooverwrites', False) and
|
||||
os.path.exists(encodeFilename(filename))
|
||||
)
|
||||
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', False)
|
||||
and os.path.isfile(encodeFilename(filename))
|
||||
and not self.params.get('nopart', False)
|
||||
self.params.get('continuedl', False) and
|
||||
os.path.isfile(encodeFilename(filename)) and
|
||||
not self.params.get('nopart', False)
|
||||
)
|
||||
|
||||
# Check file already present
|
||||
|
@ -51,6 +51,13 @@ class ExternalFD(FileDownloader):
|
||||
return []
|
||||
return [command_option, source_address]
|
||||
|
||||
def _configuration_args(self, default=[]):
|
||||
ex_args = self.params.get('external_downloader_args')
|
||||
if ex_args is None:
|
||||
return default
|
||||
assert isinstance(ex_args, list)
|
||||
return ex_args
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = self._make_cmd(tmpfilename, info_dict)
|
||||
@ -75,10 +82,11 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
class CurlFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
@ -89,15 +97,16 @@ class WgetFD(ExternalFD):
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--bind-address')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class Aria2cFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [
|
||||
self.exe, '-c',
|
||||
'--min-split-size', '1M', '--max-connection-per-server', '4']
|
||||
cmd = [self.exe, '-c']
|
||||
cmd += self._configuration_args([
|
||||
'--min-split-size', '1M', '--max-connection-per-server', '4'])
|
||||
dn = os.path.dirname(tmpfilename)
|
||||
if dn:
|
||||
cmd += ['--dir', dn]
|
||||
|
@ -1,4 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
@ -11,11 +11,11 @@ from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
)
|
||||
from ..utils import (
|
||||
struct_pack,
|
||||
struct_unpack,
|
||||
format_bytes,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
xpath_text,
|
||||
@ -122,7 +122,8 @@ class FlvReader(io.BytesIO):
|
||||
|
||||
self.read_unsigned_int() # BootstrapinfoVersion
|
||||
# Profile,Live,Update,Reserved
|
||||
self.read(1)
|
||||
flags = self.read_unsigned_char()
|
||||
live = flags & 0x20 != 0
|
||||
# time scale
|
||||
self.read_unsigned_int()
|
||||
# CurrentMediaTime
|
||||
@ -161,6 +162,7 @@ class FlvReader(io.BytesIO):
|
||||
return {
|
||||
'segments': segments,
|
||||
'fragments': fragments,
|
||||
'live': live,
|
||||
}
|
||||
|
||||
def read_bootstrap_info(self):
|
||||
@ -183,6 +185,10 @@ def build_fragments_list(boot_info):
|
||||
for segment, fragments_count in segment_run_table['segment_run']:
|
||||
for _ in range(fragments_count):
|
||||
res.append((segment, next(fragments_counter)))
|
||||
|
||||
if boot_info['live']:
|
||||
res = res[-2:]
|
||||
|
||||
return res
|
||||
|
||||
|
||||
@ -247,22 +253,43 @@ class F4mFD(FileDownloader):
|
||||
self.report_error('Unsupported DRM')
|
||||
return media
|
||||
|
||||
def _get_bootstrap_from_url(self, bootstrap_url):
|
||||
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
||||
return read_bootstrap_info(bootstrap)
|
||||
|
||||
def _update_live_fragments(self, bootstrap_url, latest_fragment):
|
||||
fragments_list = []
|
||||
retries = 30
|
||||
while (not fragments_list) and (retries > 0):
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
|
||||
if not fragments_list:
|
||||
# Retry after a while
|
||||
time.sleep(5.0)
|
||||
retries -= 1
|
||||
|
||||
if not fragments_list:
|
||||
self.report_error('Failed to update fragments')
|
||||
|
||||
return fragments_list
|
||||
|
||||
def _parse_bootstrap_node(self, node, base_url):
|
||||
if node.text is None:
|
||||
bootstrap_url = compat_urlparse.urljoin(
|
||||
base_url, node.attrib['url'])
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
else:
|
||||
bootstrap_url = None
|
||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
return (boot_info, bootstrap_url)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
@ -277,18 +304,13 @@ class F4mFD(FileDownloader):
|
||||
|
||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
||||
if bootstrap_node.text is None:
|
||||
bootstrap_url = compat_urlparse.urljoin(
|
||||
base_url, bootstrap_node.attrib['url'])
|
||||
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
||||
else:
|
||||
bootstrap = base64.b64decode(bootstrap_node.text)
|
||||
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
|
||||
live = boot_info['live']
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
metadata = base64.b64decode(metadata_node.text)
|
||||
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
|
||||
else:
|
||||
metadata = None
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
if self.params.get('test', False):
|
||||
@ -298,64 +320,112 @@ class F4mFD(FileDownloader):
|
||||
# For some akamai manifests we'll need to add a query to the fragment url
|
||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
|
||||
write_flv_header(dest_stream)
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
if not live:
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': 0,
|
||||
'frag_counter': 0,
|
||||
'frag_index': 0,
|
||||
'frag_count': total_frags,
|
||||
'filename': filename,
|
||||
'tmpfilename': tmpfilename,
|
||||
}
|
||||
start = time.time()
|
||||
|
||||
def frag_progress_hook(status):
|
||||
frag_total_bytes = status.get('total_bytes', 0)
|
||||
estimated_size = (state['downloaded_bytes'] +
|
||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
||||
if status['status'] == 'finished':
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
frag_total_bytes = s.get('total_bytes', 0)
|
||||
if s['status'] == 'finished':
|
||||
state['downloaded_bytes'] += frag_total_bytes
|
||||
state['frag_counter'] += 1
|
||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
||||
byte_counter = state['downloaded_bytes']
|
||||
state['frag_index'] += 1
|
||||
|
||||
estimated_size = (
|
||||
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
time_now = time.time()
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
state['elapsed'] = time_now - start
|
||||
|
||||
if s['status'] == 'finished':
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
else:
|
||||
frag_downloaded_bytes = status['downloaded_bytes']
|
||||
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||
frag_total_bytes)
|
||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
progress += frag_progress / float(total_frags)
|
||||
|
||||
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
||||
self.report_progress(progress, format_bytes(estimated_size),
|
||||
status.get('speed'), eta)
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||
state['speed'] = s.get('speed')
|
||||
self._hook_progress(state)
|
||||
|
||||
http_dl.add_progress_hook(frag_progress_hook)
|
||||
|
||||
frags_filenames = []
|
||||
for (seg_i, frag_i) in fragments_list:
|
||||
while fragments_list:
|
||||
seg_i, frag_i = fragments_list.pop(0)
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
url = base_url + name
|
||||
if akamai_pv:
|
||||
url += '?' + akamai_pv.strip(';')
|
||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||
success = http_dl.download(frag_filename, {'url': url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
down_data = down.read()
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
_, box_type, box_data = reader.read_box_info()
|
||||
if box_type == b'mdat':
|
||||
dest_stream.write(box_data)
|
||||
break
|
||||
frags_filenames.append(frag_filename)
|
||||
try:
|
||||
success = http_dl.download(frag_filename, {'url': url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
down_data = down.read()
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
_, box_type, box_data = reader.read_box_info()
|
||||
if box_type == b'mdat':
|
||||
dest_stream.write(box_data)
|
||||
break
|
||||
if live:
|
||||
os.remove(frag_filename)
|
||||
else:
|
||||
frags_filenames.append(frag_filename)
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
# with the next available fragment.
|
||||
msg = 'Fragment %d unavailable' % frag_i
|
||||
self.report_warning(msg)
|
||||
fragments_list = []
|
||||
else:
|
||||
raise
|
||||
|
||||
if not fragments_list and live and bootstrap_url:
|
||||
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
||||
total_frags += len(fragments_list)
|
||||
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
||||
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||
self.report_warning(msg)
|
||||
|
||||
dest_stream.close()
|
||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
||||
|
||||
elapsed = time.time() - start
|
||||
self.try_rename(tmpfilename, filename)
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
@ -366,6 +436,7 @@ class F4mFD(FileDownloader):
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
||||
|
||||
return True
|
||||
|
@ -23,15 +23,14 @@ class HlsFD(FileDownloader):
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
program = ffpp._executable
|
||||
if program is None:
|
||||
if not ffpp.available:
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
ffpp.check_version()
|
||||
|
||||
args = [
|
||||
encodeArgument(opt)
|
||||
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
args.append(encodeFilename(tmpfilename, True))
|
||||
|
||||
retval = subprocess.call(args)
|
||||
@ -48,7 +47,7 @@ class HlsFD(FileDownloader):
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('%s exited with code %d' % (program, retval))
|
||||
self.report_error('%s exited with code %d' % (ffpp.basename, retval))
|
||||
return False
|
||||
|
||||
|
||||
|
@ -1,10 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
from socket import error as SocketError
|
||||
import errno
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
@ -15,7 +14,6 @@ from ..utils import (
|
||||
ContentTooShortError,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
format_bytes,
|
||||
)
|
||||
|
||||
|
||||
@ -94,6 +92,8 @@ class HttpFD(FileDownloader):
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'downloaded_bytes': resume_len,
|
||||
'total_bytes': resume_len,
|
||||
})
|
||||
return True
|
||||
else:
|
||||
@ -102,7 +102,7 @@ class HttpFD(FileDownloader):
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
except SocketError as e:
|
||||
except socket.error as e:
|
||||
if e.errno != errno.ECONNRESET:
|
||||
# Connection reset is no problem, just retry
|
||||
raise
|
||||
@ -137,7 +137,6 @@ class HttpFD(FileDownloader):
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = format_bytes(data_len)
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
@ -196,20 +195,19 @@ class HttpFD(FileDownloader):
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = percent = None
|
||||
eta = None
|
||||
else:
|
||||
percent = self.calc_percent(byte_counter, data_len)
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
'elapsed': now - start,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
@ -221,7 +219,7 @@ class HttpFD(FileDownloader):
|
||||
return False
|
||||
if tmpfilename != '-':
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
@ -235,6 +233,7 @@ class HttpFD(FileDownloader):
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - start,
|
||||
})
|
||||
|
||||
return True
|
||||
|
@ -11,7 +11,6 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
get_exe_version,
|
||||
)
|
||||
|
||||
@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
|
||||
speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
|
||||
time_now = time.time()
|
||||
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = '~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes': data_len,
|
||||
'total_bytes_estimate': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
|
||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
@ -120,7 +119,9 @@ class RtmpFD(FileDownloader):
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||
basic_args = [
|
||||
'rtmpdump', '--verbose', '-r', url,
|
||||
'-o', encodeFilename(tmpfilename, True)]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
|
@ -6,7 +6,9 @@ from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .adobetv import AdobeTVIE
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aftenposten import AftenpostenIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .anitube import AnitubeIE
|
||||
@ -35,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .beatportpro import BeatportProIE
|
||||
from .bet import BetIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
@ -48,14 +51,24 @@ from .brightcove import BrightcoveIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
CamdemyFolderIE
|
||||
)
|
||||
from .canal13cl import Canal13clIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cbs import CBSIE
|
||||
from .cbsnews import CBSNewsIE
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .chirbit import (
|
||||
ChirbitIE,
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
@ -73,7 +86,7 @@ from .collegehumor import CollegeHumorIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .criterion import CriterionIE
|
||||
@ -94,6 +107,7 @@ from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import DouyuTVIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
@ -104,6 +118,7 @@ from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .divxstage import DivxStageIE
|
||||
from .dropbox import DropboxIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .echomsk import EchoMskIE
|
||||
from .ehow import EHowIE
|
||||
@ -115,6 +130,7 @@ from .ellentv import (
|
||||
EllenTVClipsIE,
|
||||
)
|
||||
from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .empflix import EMPFlixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
@ -137,6 +153,7 @@ from .fktv import (
|
||||
)
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .fourtube import FourTubeIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
@ -161,6 +178,7 @@ from .gameone import (
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .giantbomb import GiantBombIE
|
||||
@ -183,6 +201,7 @@ from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .history import HistoryIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hostingbulk import HostingBulkIE
|
||||
@ -197,6 +216,7 @@ from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
)
|
||||
from .imgur import ImgurIE
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
@ -212,6 +232,8 @@ from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
@ -223,6 +245,12 @@ from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .letv import (
|
||||
LetvIE,
|
||||
LetvTvIE,
|
||||
LetvPlaylistIE
|
||||
)
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import (
|
||||
@ -275,6 +303,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import NationalGeographicIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
@ -285,6 +314,7 @@ from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nerdist import NerdistIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
@ -311,15 +341,20 @@ from .nowvideo import NowVideoIE
|
||||
from .npo import (
|
||||
NPOIE,
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
TegenlichtVproIE,
|
||||
)
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKPlaylistIE,
|
||||
NRKTVIE,
|
||||
)
|
||||
from .ntv import NTVIE
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
from .nytimes import NYTimesIE
|
||||
from .nuvid import NuvidIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .openfilm import OpenFilmIE
|
||||
@ -327,6 +362,7 @@ from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFOE1IE,
|
||||
ORFFM4IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
@ -334,18 +370,26 @@ from .pbs import PBSIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import PornHubIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
PornHubPlaylistIE,
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
from .radiode import RadioDeIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
@ -360,12 +404,12 @@ from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE
|
||||
from .rtlnl import RtlXlIE
|
||||
from .rtlnl import RtlNlIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@ -375,6 +419,7 @@ from .rutube import (
|
||||
RutubePersonIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .sandia import SandiaIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
@ -405,7 +450,10 @@ from .soundcloud import (
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE
|
||||
)
|
||||
from .soundgasm import SoundgasmIE
|
||||
from .soundgasm import (
|
||||
SoundgasmIE,
|
||||
SoundgasmProfileIE
|
||||
)
|
||||
from .southpark import (
|
||||
SouthParkIE,
|
||||
SouthparkDeIE,
|
||||
@ -419,12 +467,14 @@ from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .svtplay import SVTPlayIE
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
@ -470,9 +520,11 @@ from .tumblr import TumblrIE
|
||||
from .tunein import TuneInIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tv4 import TV4IE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
@ -488,6 +540,7 @@ from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .ultimedia import UltimediaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
@ -511,6 +564,7 @@ from .videoweed import VideoWeedIE
|
||||
from .vidme import VidmeIE
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewster import ViewsterIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoAlbumIE,
|
||||
@ -552,6 +606,7 @@ from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wrzuta import WrzutaIE
|
||||
from .wsj import WSJIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
@ -565,6 +620,12 @@ from .yahoo import (
|
||||
YahooIE,
|
||||
YahooSearchIE,
|
||||
)
|
||||
from .yam import YamIE
|
||||
from .yandexmusic import (
|
||||
YandexMusicTrackIE,
|
||||
YandexMusicAlbumIE,
|
||||
YandexMusicPlaylistIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
@ -588,6 +649,7 @@ from .youtube import (
|
||||
YoutubeUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
|
@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player = self._parse_json(
|
||||
@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
|
||||
self._html_search_meta('datepublished', webpage, 'upload date'))
|
||||
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration')
|
||||
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
|
||||
self._html_search_meta('duration', webpage, 'duration') or
|
||||
self._search_regex(
|
||||
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
||||
|
@ -2,13 +2,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
xpath_text,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@ -38,6 +37,7 @@ class AdultSwimIE(InfoExtractor):
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||
'title': 'Rick and Morty - Pilot',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
}
|
||||
@ -55,9 +55,28 @@ class AdultSwimIE(InfoExtractor):
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': '-t8CamQlQ2aYZ49ItZCFog',
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
|
||||
'info_dict': {
|
||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
|
||||
'ext': 'flv',
|
||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||
},
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
|
||||
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
|
||||
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -78,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
|
||||
for video in collection.get('videos'):
|
||||
if video.get('slug') == slug:
|
||||
return collection, video
|
||||
return None, None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -88,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, episode_path)
|
||||
|
||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
|
||||
|
||||
try:
|
||||
bootstrappedData = json.loads(bootstrappedDataJS)
|
||||
except ValueError as ve:
|
||||
errmsg = '%s: Failed to parse JSON ' % episode_path
|
||||
raise ExtractorError(errmsg, cause=ve)
|
||||
bootstrapped_data = self._parse_json(self._search_regex(
|
||||
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
|
||||
|
||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||
if is_playlist:
|
||||
collections = bootstrappedData['playlists']['collections']
|
||||
collections = bootstrapped_data['playlists']['collections']
|
||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||
video_info = self.find_video_info(collection, episode_path)
|
||||
|
||||
show_title = video_info['showTitle']
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
collections = bootstrappedData['show']['collections']
|
||||
collections = bootstrapped_data['show']['collections']
|
||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||
|
||||
show = bootstrappedData['show']
|
||||
# Video wasn't found in the collections, let's try `slugged_video`.
|
||||
if video_info is None:
|
||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||
video_info = bootstrapped_data['slugged_video']
|
||||
else:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
|
||||
show = bootstrapped_data['show']
|
||||
show_title = show['title']
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
|
||||
|
98
youtube_dl/extractor/aftenposten.py
Normal file
98
youtube_dl/extractor/aftenposten.py
Normal file
@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class AftenpostenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||
'info_dict': {
|
||||
'id': '21039',
|
||||
'ext': 'mov',
|
||||
'title': 'TRAILER: "Sweatshop" - I can´t take any more',
|
||||
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
|
||||
'timestamp': 1416927969,
|
||||
'upload_date': '20141125',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_xml(
|
||||
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'atom': 'http://www.w3.org/2005/Atom',
|
||||
'xt': 'http://xstream.dk/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
|
||||
|
||||
formats = []
|
||||
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
|
||||
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
|
||||
media_url = media_content.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media_content.get('bitrate'))
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'play_path': 'mp4:%s' % mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
link = find_xpath_attr(
|
||||
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
|
||||
if link is not None:
|
||||
formats.append({
|
||||
'url': link.get('href'),
|
||||
'format_id': link.get('rel'),
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'url': splash.get('url'),
|
||||
'width': int_or_none(splash.get('width')),
|
||||
'height': int_or_none(splash.get('height')),
|
||||
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@ -1,8 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@ -21,9 +19,7 @@ class AftonbladetIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
|
74
youtube_dl/extractor/airmozilla.py
Normal file
74
youtube_dl/extractor/airmozilla.py
Normal file
@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class AirMozillaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
|
||||
_TEST = {
|
||||
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
|
||||
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
|
||||
'info_dict': {
|
||||
'id': '6x4q2w',
|
||||
'ext': 'mp4',
|
||||
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
|
||||
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
|
||||
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
|
||||
'timestamp': 1422487800,
|
||||
'upload_date': '20150128',
|
||||
'location': 'SFO Commons',
|
||||
'duration': 3780,
|
||||
'view_count': int,
|
||||
'categories': ['Main'],
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
|
||||
|
||||
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
|
||||
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
|
||||
metadata = self._parse_json(jwconfig, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': source['file'],
|
||||
'ext': source['type'],
|
||||
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
|
||||
'format': source['label'],
|
||||
'height': int(source['label'].rstrip('p')),
|
||||
} for source in metadata['playlist'][0]['sources']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views since archived: ([0-9]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'url': self._og_search_url(webpage),
|
||||
'display_id': display_id,
|
||||
'thumbnail': metadata['playlist'][0].get('image'),
|
||||
'description': self._og_search_description(webpage),
|
||||
'timestamp': timestamp,
|
||||
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
|
||||
}
|
@ -20,6 +20,7 @@ class AparatIE(InfoExtractor):
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
'title': 'تیم گلکسی 11 - زومیت',
|
||||
'age_limit': 0,
|
||||
},
|
||||
# 'skip': 'Extremely unreliable',
|
||||
}
|
||||
@ -34,7 +35,8 @@ class AparatIE(InfoExtractor):
|
||||
video_id + '/vt/frame')
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
|
||||
video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
|
||||
r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
|
||||
for i, video_url in enumerate(video_urls):
|
||||
req = HEADRequest(video_url)
|
||||
res = self._request_webpage(
|
||||
@ -46,7 +48,7 @@ class AparatIE(InfoExtractor):
|
||||
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -54,4 +56,5 @@ class AparatIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
}
|
||||
|
@ -11,9 +11,12 @@ from ..utils import (
|
||||
|
||||
|
||||
class AppleTrailersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TESTS = [{
|
||||
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||
'info_dict': {
|
||||
'id': 'manofsteel',
|
||||
},
|
||||
"playlist": [
|
||||
{
|
||||
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
||||
@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
||||
|
||||
|
@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||
|
||||
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
|
||||
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
|
@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
formats.append(format)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info_dict['formats'] = formats
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import time
|
||||
import hmac
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
@ -17,8 +17,9 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AtresPlayerIE(SubtitlesInfoExtractor):
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||
@ -144,13 +145,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
|
||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||
if subtitle:
|
||||
subtitles['es'] = subtitle
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||
if subtitle_url:
|
||||
subtitles['es'] = [{
|
||||
'ext': 'srt',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -159,5 +159,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
|
||||
'duration': int(info['length']),
|
||||
'view_count': int(info['views_total']),
|
||||
'uploader': info['username'],
|
||||
'uploader_id': info['uid'],
|
||||
'uploader_id': info['owner']['uid'],
|
||||
}
|
||||
|
||||
|
||||
|
@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
download_link = m_download.group(1)
|
||||
video_id = self._search_regex(
|
||||
r'var TralbumData = {.*?id: (?P<id>\d+),?$',
|
||||
webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
|
||||
r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
|
||||
webpage, 'video id')
|
||||
|
||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascript code
|
||||
info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
|
||||
info = json.loads(info)[0]
|
||||
all_info = self._parse_json(self._search_regex(
|
||||
r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
|
||||
info = all_info[0]
|
||||
# We pick mp3-320 for now, until format selection can be easily implemented.
|
||||
mp3_info = info['downloads']['mp3-320']
|
||||
# If we try to use this url it says the link has expired
|
||||
initial_url = mp3_info['url']
|
||||
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
|
||||
m_url = re.match(re_url, initial_url)
|
||||
m_url = re.match(
|
||||
r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
|
||||
initial_url)
|
||||
# We build the url we will use to get the final track url
|
||||
# This url is build in Bandcamp in the script download_bunde_*.js
|
||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
||||
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
|
||||
# If we could correctly generate the .rand field the url would be
|
||||
# in the "download_url" key
|
||||
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
|
||||
final_url = self._search_regex(
|
||||
r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -106,7 +109,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@ -130,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
],
|
||||
'info_dict': {
|
||||
'title': 'Jazz Format Mixtape vol.1',
|
||||
'id': 'jazz-format-mixtape-vol-1',
|
||||
'uploader_id': 'blazo',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
},
|
||||
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||
'skip': 'Bandcamp imposes download limits.'
|
||||
}, {
|
||||
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
||||
'info_dict': {
|
||||
'title': 'Hierophany of the Open Grave',
|
||||
'uploader_id': 'nightbringer',
|
||||
'id': 'hierophany-of-the-open-grave',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'title': 'Loom',
|
||||
'id': 'dotscale',
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('subdomain')
|
||||
title = mobj.group('title')
|
||||
display_id = title or playlist_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
uploader_id = mobj.group('subdomain')
|
||||
album_id = mobj.group('album_id')
|
||||
playlist_id = album_id or uploader_id
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
@ -165,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
'id': playlist_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@ -2,12 +2,12 @@ from __future__ import unicode_literals
|
||||
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_captions(self, media, programme_id):
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||
srt = ''
|
||||
|
||||
def _extract_text(p):
|
||||
if p.text is not None:
|
||||
stripped_text = p.text.strip()
|
||||
if stripped_text:
|
||||
return stripped_text
|
||||
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
|
||||
for pos, p in enumerate(ps):
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
||||
p.text.strip() if p.text is not None else '')
|
||||
subtitles[lang] = srt
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
{
|
||||
'data': srt,
|
||||
'ext': 'srt',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
@ -273,7 +288,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
return
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
103
youtube_dl/extractor/beatportpro.py
Normal file
103
youtube_dl/extractor/beatportpro.py
Normal file
@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BeatportProIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
|
||||
'md5': 'b3c34d8639a2f6a7f734382358478887',
|
||||
'info_dict': {
|
||||
'id': '5379371',
|
||||
'display_id': 'synesthesia-original-mix',
|
||||
'ext': 'mp4',
|
||||
'title': 'Froxic - Synesthesia (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
|
||||
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
|
||||
'info_dict': {
|
||||
'id': '3756896',
|
||||
'display_id': 'love-and-war-original-mix',
|
||||
'ext': 'mp3',
|
||||
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
|
||||
'md5': 'a1fd8e8046de3950fd039304c186c05f',
|
||||
'info_dict': {
|
||||
'id': '4991738',
|
||||
'display_id': 'birds-original-mix',
|
||||
'ext': 'mp4',
|
||||
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
track_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playables = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.Playables\s*=\s*({.+?});', webpage,
|
||||
'playables info', flags=re.DOTALL),
|
||||
track_id)
|
||||
|
||||
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
|
||||
|
||||
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
|
||||
if track['mix']:
|
||||
title += ' (' + track['mix'] + ')'
|
||||
|
||||
formats = []
|
||||
for ext, info in track['preview'].items():
|
||||
if not info['url']:
|
||||
continue
|
||||
fmt = {
|
||||
'url': info['url'],
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if ext == 'mp3':
|
||||
fmt['preference'] = 0
|
||||
fmt['acodec'] = 'mp3'
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
elif ext == 'mp4':
|
||||
fmt['preference'] = 1
|
||||
fmt['acodec'] = 'aac'
|
||||
fmt['abr'] = 96
|
||||
fmt['asr'] = 44100
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
images = []
|
||||
for name, info in track['images'].items():
|
||||
image_url = info.get('url')
|
||||
if name == 'dynamic' or not image_url:
|
||||
continue
|
||||
image = {
|
||||
'id': name,
|
||||
'url': image_url,
|
||||
'height': int_or_none(info.get('height')),
|
||||
'width': int_or_none(info.get('width')),
|
||||
}
|
||||
images.append(image)
|
||||
|
||||
return {
|
||||
'id': compat_str(track.get('id')) or track_id,
|
||||
'display_id': track.get('slug') or display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': images,
|
||||
}
|
@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': '634526ae978711f6b748fe0dd6c11f57',
|
||||
'md5': '1bff67111adb785c51d1b42959ec10e5',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'ext': 'mp4',
|
||||
|
@ -1,40 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BlinkxIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
IE_NAME = 'blinkx'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
|
||||
'md5': '2e9a07364af40163a908edbf10bb2492',
|
||||
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||
'info_dict': {
|
||||
'id': '8aQUy7GV',
|
||||
'id': 'Da0Gw3xc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Police Car Rolls Away',
|
||||
'uploader': 'stupidvideos.com',
|
||||
'upload_date': '20131215',
|
||||
'timestamp': 1387068000,
|
||||
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
|
||||
'duration': 14.886,
|
||||
'thumbnails': [{
|
||||
'width': 100,
|
||||
'height': 76,
|
||||
'resolution': '100x76',
|
||||
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
|
||||
}],
|
||||
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||
'uploader': 'IGN News',
|
||||
'upload_date': '20150217',
|
||||
'timestamp': 1424215740,
|
||||
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||
'duration': 47.743333,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, rl):
|
||||
m = re.match(self._VALID_URL, rl)
|
||||
video_id = m.group('id')
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id[:8]
|
||||
|
||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||
@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
|
||||
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||
tbr = vbr + abr if vbr and abr else None
|
||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': m['link'],
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
'abr': int(m['abr']) // 1000,
|
||||
'vbr': int(m['vbr']) // 1000,
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'tbr': tbr,
|
||||
'width': int(m['w']),
|
||||
'height': int(m['h']),
|
||||
'width': int_or_none(m.get('w')),
|
||||
'height': int_or_none(m.get('h')),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
@ -18,7 +17,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class BlipTVIE(SubtitlesInfoExtractor):
|
||||
class BlipTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
|
||||
|
||||
_TESTS = [
|
||||
@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
categories = [category.text for category in item.findall('category')]
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
subtitles_urls = {}
|
||||
|
||||
media_group = item.find(media('group'))
|
||||
for media_content in media_group.findall(media('content')):
|
||||
@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
}
|
||||
lang = role.rpartition('-')[-1].strip().lower()
|
||||
langcode = LANGS.get(lang, lang)
|
||||
subtitles[langcode] = url
|
||||
subtitles_urls[langcode] = url
|
||||
elif media_type.startswith('video/'):
|
||||
formats.append({
|
||||
'url': real_url,
|
||||
@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
subtitles = self.extract_subtitles(video_id, subtitles_urls)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'subtitles': video_subtitles,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _download_subtitle_url(self, sub_lang, url):
|
||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||
# when we request with a common UA
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
return self._download_webpage(req, None, note=False)
|
||||
def _get_subtitles(self, video_id, subtitles_urls):
|
||||
subtitles = {}
|
||||
for lang, url in subtitles_urls.items():
|
||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||
# when we request with a common UA
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('User-Agent', 'youtube-dl')
|
||||
subtitles[lang] = [{
|
||||
# The extension is 'srt' but it's actually an 'ass' file
|
||||
'ext': 'ass',
|
||||
'data': self._download_webpage(req, None, note=False),
|
||||
}]
|
||||
return subtitles
|
||||
|
||||
|
||||
class BlipTVUserIE(InfoExtractor):
|
||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
f4m_url = self._search_regex(
|
||||
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
||||
'f4m url')
|
||||
|
@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
|
||||
'tbr': media['bitRate'],
|
||||
'width': media['width'],
|
||||
'height': media['height'],
|
||||
} for media in info['media']]
|
||||
} for media in info['media'] if media.get('mediaPurpose') == 'play']
|
||||
|
||||
if not formats:
|
||||
formats.append({
|
||||
|
@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||
'info_dict': {
|
||||
'title': 'Sealife',
|
||||
'id': '3550319591001',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
@ -108,7 +109,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
"""
|
||||
|
||||
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
|
||||
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
|
||||
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
|
||||
lambda m: m.group(1) + '/>', object_str)
|
||||
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
||||
object_str = object_str.replace('<--', '<!--')
|
||||
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
playlist_info = json_data['videoList']
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
|
@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'skip_download': True, # Got enough YouTube download tests
|
||||
},
|
||||
'info_dict': {
|
||||
'id': 'look-at-this-cute-dog-omg',
|
||||
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||
},
|
||||
@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'description': 're:© 2014 Munchkin the Shih Tzu',
|
||||
'uploader': 'Munchkin the Shih Tzu',
|
||||
'description': 're:© 2014 Munchkin the',
|
||||
'uploader': 're:^Munchkin the',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
},
|
||||
}]
|
||||
|
153
youtube_dl/extractor/camdemy.py
Normal file
153
youtube_dl/extractor/camdemy.py
Normal file
@ -0,0 +1,153 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class CamdemyIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# single file
|
||||
'url': 'http://www.camdemy.com/media/5181/',
|
||||
'md5': '5a5562b6a98b37873119102e052e311b',
|
||||
'info_dict': {
|
||||
'id': '5181',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'creator': 'ss11spring',
|
||||
'upload_date': '20130114',
|
||||
'timestamp': 1358154556,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# With non-empty description
|
||||
'url': 'http://www.camdemy.com/media/13885',
|
||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||
'info_dict': {
|
||||
'id': '13885',
|
||||
'ext': 'mp4',
|
||||
'title': 'EverCam + Camdemy QuickStart',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
||||
'creator': 'evercam',
|
||||
'upload_date': '20140620',
|
||||
'timestamp': 1403271569,
|
||||
}
|
||||
}, {
|
||||
# External source
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
||||
'info_dict': {
|
||||
'id': '2vsYQzNIsJo',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130211',
|
||||
'uploader': 'Hun Kim',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'uploader_id': 'hunkimtutorials',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
src_from = self._html_search_regex(
|
||||
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
||||
'external source', default=None)
|
||||
if src_from:
|
||||
return self.url_result(src_from)
|
||||
|
||||
oembed_obj = self._download_json(
|
||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'creation time', fatal=False),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': oembed_obj['title'],
|
||||
'thumbnail': thumb_url,
|
||||
'description': self._html_search_meta('description', page),
|
||||
'creator': oembed_obj['author_name'],
|
||||
'duration': oembed_obj['duration'],
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
||||
class CamdemyFolderIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# links with trailing slash
|
||||
'url': 'http://www.camdemy.com/folder/450',
|
||||
'info_dict': {
|
||||
'id': '450',
|
||||
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
|
||||
},
|
||||
'playlist_mincount': 145
|
||||
}, {
|
||||
# links without trailing slash
|
||||
# and multi-page
|
||||
'url': 'http://www.camdemy.com/folder/853',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab'
|
||||
},
|
||||
'playlist_mincount': 20
|
||||
}, {
|
||||
# with displayMode parameter. For testing the codes to add parameters
|
||||
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab'
|
||||
},
|
||||
'playlist_mincount': 20
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id = self._match_id(url)
|
||||
|
||||
# Add displayMode=list so that all links are displayed in a single page
|
||||
parsed_url = list(compat_urlparse.urlparse(url))
|
||||
query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
|
||||
query.update({'displayMode': 'list'})
|
||||
parsed_url[4] = compat_urllib_parse.urlencode(query)
|
||||
final_url = compat_urlparse.urlunparse(parsed_url)
|
||||
|
||||
page = self._download_webpage(final_url, folder_id)
|
||||
matches = re.findall(r"href='(/media/\d+/?)'", page)
|
||||
|
||||
entries = [self.url_result('http://www.camdemy.com' + media_path)
|
||||
for media_path in matches]
|
||||
|
||||
folder_title = self._html_search_meta('keywords', page)
|
||||
|
||||
return self.playlist_result(entries, folder_id, folder_title)
|
@ -15,12 +15,13 @@ from ..utils import (
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
||||
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
||||
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
|
||||
_SITE_ID_MAP = {
|
||||
'canalplus.fr': 'cplus',
|
||||
'piwiplus.fr': 'teletoon',
|
||||
'd8.tv': 'd8',
|
||||
'itele.fr': 'itele',
|
||||
}
|
||||
|
||||
_TESTS = [{
|
||||
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
|
||||
'upload_date': '20131108',
|
||||
},
|
||||
'skip': 'videos get deleted after a while',
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
|
||||
'md5': '65aa83ad62fe107ce29e564bb8712580',
|
||||
'info_dict': {
|
||||
'id': '1213714',
|
||||
'ext': 'flv',
|
||||
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
|
||||
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
|
||||
'upload_date': '20150211',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
real_id = self._search_regex(
|
||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||
|
30
youtube_dl/extractor/cbssports.py
Normal file
30
youtube_dl/extractor/cbssports.py
Normal file
@ -0,0 +1,30 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CBSSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||
'info_dict': {
|
||||
'id': '_d5_GbO8p1sT',
|
||||
'ext': 'flv',
|
||||
'title': 'US Open flashbacks: 1990s',
|
||||
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
section = mobj.group('section')
|
||||
video_id = mobj.group('id')
|
||||
all_videos = self._download_json(
|
||||
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||
video_id)
|
||||
# The json file contains the info of all the videos in the section
|
||||
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
99
youtube_dl/extractor/ccc.py
Normal file
99
youtube_dl/extractor/ccc.py
Normal file
@ -0,0 +1,99 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CCCIE(InfoExtractor):
|
||||
IE_NAME = 'media.ccc.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
|
||||
'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
|
||||
'info_dict': {
|
||||
'id': '20131228183',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introduction to Processor Design',
|
||||
'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'upload_date': '20131229',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
|
||||
else:
|
||||
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r"(?s)<p class='description'>(.*?)</p>",
|
||||
webpage, 'description', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
|
||||
webpage, 'upload date', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
matches = re.finditer(r'''(?xs)
|
||||
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
|
||||
<a\s+href='(?P<http_url>[^']+)'>\s*
|
||||
(?:
|
||||
.*?
|
||||
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
|
||||
)?''', webpage)
|
||||
formats = []
|
||||
for m in matches:
|
||||
format = m.group('format')
|
||||
format_id = self._search_regex(
|
||||
r'.*/([a-z0-9_-]+)/[^/]*$',
|
||||
m.group('http_url'), 'format id', default=None)
|
||||
vcodec = 'h264' if 'h264' in format_id else (
|
||||
'none' if format_id in ('mp3', 'opus') else None
|
||||
)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'format': format,
|
||||
'url': m.group('http_url'),
|
||||
'vcodec': vcodec,
|
||||
'preference': preference(format_id),
|
||||
})
|
||||
|
||||
if m.group('torrent_url'):
|
||||
formats.append({
|
||||
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
|
||||
'format': '%s (torrent)' % format,
|
||||
'proto': 'torrent',
|
||||
'format_note': '(unsupported; will just download the .torrent file)',
|
||||
'vcodec': vcodec,
|
||||
'preference': -100 + preference(format_id),
|
||||
'url': m.group('torrent_url'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
@ -15,7 +15,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [
|
||||
@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
||||
subtitles = {}
|
||||
subs = item.get('subtitles')
|
||||
if subs:
|
||||
subtitles['cs'] = subs[0]['url']
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
|
||||
subtitles = self.extract_subtitles(episode_id, subs)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, episode_id, subs):
|
||||
original_subtitles = self._download_webpage(
|
||||
subs[0]['url'], episode_id, 'Downloading subtitles')
|
||||
srt_subs = self._fix_subtitles(original_subtitles)
|
||||
return {
|
||||
'cs': [{
|
||||
'ext': 'srt',
|
||||
'data': srt_subs,
|
||||
}]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_subtitles(subtitles):
|
||||
""" Convert millisecond-based subtitles to SRT """
|
||||
if subtitles is None:
|
||||
return subtitles # subtitles not requested
|
||||
|
||||
def _msectotimecode(msec):
|
||||
""" Helper utility to convert milliseconds to timecode """
|
||||
@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
||||
else:
|
||||
yield line
|
||||
|
||||
fixed_subtitles = {}
|
||||
for k, v in subtitles.items():
|
||||
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
|
||||
return fixed_subtitles
|
||||
return "\r\n".join(_fix_subtitle(subtitles))
|
||||
|
84
youtube_dl/extractor/chirbit.py
Normal file
84
youtube_dl/extractor/chirbit.py
Normal file
@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ChirbitIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://chirb.it/PrIPv5',
|
||||
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
||||
'info_dict': {
|
||||
'id': 'PrIPv5',
|
||||
'ext': 'mp3',
|
||||
'title': 'Фасадстрой',
|
||||
'duration': 52,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://chirb.it/%s' % audio_id, audio_id)
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
||||
|
||||
title = self._search_regex(
|
||||
r'itemprop="name">([^<]+)', webpage, 'title')
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'itemprop="playCount"\s*>(\d+)', webpage,
|
||||
'listen count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'>(\d+) Comments?:', webpage,
|
||||
'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
|
||||
class ChirbitProfileIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit:profile'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://chirbit.com/ScarletBeauty',
|
||||
'info_dict': {
|
||||
'id': 'ScarletBeauty',
|
||||
'title': 'Chirbits by ScarletBeauty',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
profile_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(
|
||||
'http://chirbit.com/rss/%s' % profile_id, profile_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(audio_url.text, 'Chirbit')
|
||||
for audio_url in rss.findall('./channel/item/link')]
|
||||
|
||||
title = rss.find('./channel/title').text
|
||||
|
||||
return self.playlist_result(entries, profile_id, title)
|
@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||
webpage, 'file_key')
|
||||
|
||||
return self._extract_video(video_host, video_id, file_key)
|
||||
|
@ -250,6 +250,8 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self._extract_subtitles(cdoc, guid)
|
||||
|
||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||
entries.append({
|
||||
'id': guid,
|
||||
@ -260,6 +262,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return {
|
||||
|
@ -27,7 +27,6 @@ from ..utils import (
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
@ -145,17 +144,25 @@ class InfoExtractor(object):
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
creator: The main artist who created the video.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The subtitle file contents as a dictionary in the format
|
||||
{language: subtitles}.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{language: subformats}. "subformats" is a list sorted from
|
||||
lower to higher preference, each element is a dictionary
|
||||
with the "ext" entry and one of:
|
||||
* "data": The subtitles file contents
|
||||
* "url": A url pointing to the subtitles file
|
||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||
automatically generated captions
|
||||
duration: Length of the video in seconds, as an integer.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
properties (all but one of text or html optional):
|
||||
@ -263,8 +270,15 @@ class InfoExtractor(object):
|
||||
|
||||
def extract(self, url):
|
||||
"""Extracts URL information and returns it in list of dicts."""
|
||||
self.initialize()
|
||||
return self._real_extract(url)
|
||||
try:
|
||||
self.initialize()
|
||||
return self._real_extract(url)
|
||||
except ExtractorError:
|
||||
raise
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
raise ExtractorError('A network error has occured.', cause=e, expected=True)
|
||||
except (KeyError, StopIteration) as e:
|
||||
raise ExtractorError('An extractor error has occured.', cause=e)
|
||||
|
||||
def set_downloader(self, downloader):
|
||||
"""Sets the downloader for this IE."""
|
||||
@ -383,6 +397,16 @@ class InfoExtractor(object):
|
||||
if blocked_iframe:
|
||||
msg += ' Visit %s for more details' % blocked_iframe
|
||||
raise ExtractorError(msg, expected=True)
|
||||
if '<title>The URL you requested has been blocked</title>' in content[:512]:
|
||||
msg = (
|
||||
'Access to this webpage has been blocked by Indian censorship. '
|
||||
'Use a VPN or proxy server (with --proxy) to route around it.')
|
||||
block_msg = self._html_search_regex(
|
||||
r'</h1><p>(.*?)</p>',
|
||||
content, 'block message', default=None)
|
||||
if block_msg:
|
||||
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
return content
|
||||
|
||||
@ -506,7 +530,7 @@ class InfoExtractor(object):
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if os.name != 'nt' and sys.stderr.isatty():
|
||||
if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
|
||||
_name = '\033[0;34m%s\033[0m' % name
|
||||
else:
|
||||
_name = name
|
||||
@ -655,6 +679,21 @@ class InfoExtractor(object):
|
||||
}
|
||||
return RATING_TABLE.get(rating.lower(), None)
|
||||
|
||||
def _family_friendly_search(self, html):
|
||||
# See http://schema.org/VideoObject
|
||||
family_friendly = self._html_search_meta('isFamilyFriendly', html)
|
||||
|
||||
if not family_friendly:
|
||||
return None
|
||||
|
||||
RATING_TABLE = {
|
||||
'1': 0,
|
||||
'true': 0,
|
||||
'0': 18,
|
||||
'false': 18,
|
||||
}
|
||||
return RATING_TABLE.get(family_friendly.lower(), None)
|
||||
|
||||
def _twitter_search_player(self, html):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
@ -704,15 +743,15 @@ class InfoExtractor(object):
|
||||
preference,
|
||||
f.get('language_preference') if f.get('language_preference') is not None else -1,
|
||||
f.get('quality') if f.get('quality') is not None else -1,
|
||||
f.get('tbr') if f.get('tbr') is not None else -1,
|
||||
f.get('filesize') if f.get('filesize') is not None else -1,
|
||||
f.get('vbr') if f.get('vbr') is not None else -1,
|
||||
f.get('height') if f.get('height') is not None else -1,
|
||||
f.get('width') if f.get('width') is not None else -1,
|
||||
ext_preference,
|
||||
f.get('tbr') if f.get('tbr') is not None else -1,
|
||||
f.get('vbr') if f.get('vbr') is not None else -1,
|
||||
f.get('abr') if f.get('abr') is not None else -1,
|
||||
audio_ext_preference,
|
||||
f.get('fps') if f.get('fps') is not None else -1,
|
||||
f.get('filesize') if f.get('filesize') is not None else -1,
|
||||
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
|
||||
f.get('source_preference') if f.get('source_preference') is not None else -1,
|
||||
f.get('format_id'),
|
||||
@ -728,10 +767,12 @@ class InfoExtractor(object):
|
||||
formats)
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video'):
|
||||
url = self._proto_relative_url(url, scheme='http:')
|
||||
# For now assume non HTTP(S) URLs always valid
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
return True
|
||||
try:
|
||||
self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
'Checking %s URL' % item)
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
@ -764,7 +805,7 @@ class InfoExtractor(object):
|
||||
self.to_screen(msg)
|
||||
time.sleep(timeout)
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id):
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest')
|
||||
@ -777,30 +818,32 @@ class InfoExtractor(object):
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
|
||||
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
|
||||
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
format_id = 'f4m-%d' % (i if tbr is None else tbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(media_el.attrib.get('width')),
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
'preference': preference,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None):
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None):
|
||||
|
||||
formats = [{
|
||||
'format_id': 'm3u8-meta',
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': 'm3u8',
|
||||
'preference': -1,
|
||||
'preference': preference - 1 if preference else -1,
|
||||
'resolution': 'multiple',
|
||||
'format_note': 'Quality selection URL',
|
||||
}]
|
||||
@ -815,6 +858,7 @@ class InfoExtractor(object):
|
||||
note='Downloading m3u8 information',
|
||||
errnote='Failed to download m3u8 information')
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||
for line in m3u8_doc.splitlines():
|
||||
@ -825,6 +869,13 @@ class InfoExtractor(object):
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_info[m.group('key')] = v
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
last_media = {}
|
||||
for m in kv_rex.finditer(line):
|
||||
v = m.group('val')
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_media[m.group('key')] = v
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
@ -832,9 +883,13 @@ class InfoExtractor(object):
|
||||
formats.append({'url': format_url(line)})
|
||||
continue
|
||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media else None
|
||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
||||
f = {
|
||||
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(line.strip()),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
@ -854,54 +909,78 @@ class InfoExtractor(object):
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
# TODO: improve extraction
|
||||
def _extract_smil_formats(self, smil_url, video_id):
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
||||
smil = self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file')
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
if smil is False:
|
||||
assert not fatal
|
||||
return []
|
||||
|
||||
base = smil.find('./head/meta').get('base')
|
||||
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
if not proto:
|
||||
if base:
|
||||
if base.startswith('rtmp'):
|
||||
proto = 'rtmp'
|
||||
elif base.startswith('http'):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
formats.append({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
if smil.findall('./body/seq/video'):
|
||||
video = smil.findall('./body/seq/video')[0]
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
else:
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
return ([], rtmp_count)
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
if not proto:
|
||||
if base:
|
||||
if base.startswith('rtmp'):
|
||||
proto = 'rtmp'
|
||||
elif base.startswith('http'):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
return ([{
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
elif proto.startswith('http'):
|
||||
return ([{
|
||||
'url': base + src,
|
||||
'ext': ext or 'flv',
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@ -965,6 +1044,27 @@ class InfoExtractor(object):
|
||||
any_restricted = any_restricted or is_restricted
|
||||
return not any_restricted
|
||||
|
||||
def extract_subtitles(self, *args, **kwargs):
|
||||
if (self._downloader.params.get('writesubtitles', False) or
|
||||
self._downloader.params.get('listsubtitles')):
|
||||
return self._get_subtitles(*args, **kwargs)
|
||||
return {}
|
||||
|
||||
def _get_subtitles(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
def extract_automatic_captions(self, *args, **kwargs):
|
||||
if (self._downloader.params.get('writeautomaticsub', False) or
|
||||
self._downloader.params.get('listsubtitles')):
|
||||
return self._get_automatic_captions(*args, **kwargs)
|
||||
return {}
|
||||
|
||||
def _get_automatic_captions(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
def _subtitles_timecode(self, seconds):
|
||||
return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@ -24,6 +24,23 @@ class CommonMistakesIE(InfoExtractor):
|
||||
'That doesn\'t make any sense. '
|
||||
'Simply remove the parameter in your command or configuration.'
|
||||
) % url
|
||||
if self._downloader.params.get('verbose'):
|
||||
if not self._downloader.params.get('verbose'):
|
||||
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
|
||||
class UnicodeBOMIE(InfoExtractor):
|
||||
IE_DESC = False
|
||||
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
real_url = self._match_id(url)
|
||||
self.report_warning(
|
||||
'Your URL starts with a Byte Order Mark (BOM). '
|
||||
'Removing the BOM and looking for "%s" ...' % real_url)
|
||||
return self.url_result(real_url)
|
||||
|
@ -9,7 +9,7 @@ import xml.etree.ElementTree
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
@ -23,13 +23,12 @@ from ..utils import (
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
inc,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||
class CrunchyrollIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
@ -102,13 +101,6 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||
|
||||
key = obfuscate_key(id)
|
||||
|
||||
class Counter:
|
||||
__value = iv
|
||||
|
||||
def next_value(self):
|
||||
temp = self.__value
|
||||
self.__value = inc(self.__value)
|
||||
return temp
|
||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||
return zlib.decompress(decrypted_data)
|
||||
|
||||
@ -187,6 +179,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
return output
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
subtitles = {}
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||
video_id, note='Downloading subtitles for ' + sub_name)
|
||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||
if not id or not iv or not data:
|
||||
continue
|
||||
id = int(id)
|
||||
iv = base64.b64decode(iv)
|
||||
data = base64.b64decode(data)
|
||||
|
||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||
if not lang_code:
|
||||
continue
|
||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||
subtitles[lang_code] = [
|
||||
{
|
||||
'ext': 'srt',
|
||||
'data': self._convert_subtitles_to_srt(sub_root),
|
||||
},
|
||||
{
|
||||
'ext': 'ass',
|
||||
'data': self._convert_subtitles_to_ass(sub_root),
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
@ -249,34 +273,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'format_id': video_format,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||
video_id, note='Downloading subtitles for ' + sub_name)
|
||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||
if not id or not iv or not data:
|
||||
continue
|
||||
id = int(id)
|
||||
iv = base64.b64decode(iv)
|
||||
data = base64.b64decode(data)
|
||||
|
||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||
if not lang_code:
|
||||
continue
|
||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||
if sub_format == 'ass':
|
||||
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
|
||||
else:
|
||||
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -6,7 +6,6 @@ import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
return request
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||
@ -47,13 +46,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
||||
'md5': '392c4b85a60a90dc4792da41ce3144eb',
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||
'info_dict': {
|
||||
'id': 'x33vw9',
|
||||
'id': 'x2iuewm',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Amphora Alex and Van .',
|
||||
'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
|
||||
'uploader': 'IGN',
|
||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||
}
|
||||
},
|
||||
# Vevo video
|
||||
@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, webpage)
|
||||
return
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
||||
@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
sub_list = self._download_webpage(
|
||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
||||
@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
return {}
|
||||
info = json.loads(sub_list)
|
||||
if (info['total'] > 0):
|
||||
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
|
||||
sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
|
||||
return sub_lang_list
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
@ -194,6 +190,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||
'info_dict': {
|
||||
'title': 'SPORT',
|
||||
'id': 'xv4bw_nqtv_sport',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
|
@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
|
||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
|
||||
r"flashvars.pvg_id=\"(\d+)\";",
|
||||
webpage, 'ID')
|
||||
|
||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||
+ video_id)
|
||||
json_url = (
|
||||
'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
|
||||
video_id)
|
||||
info = self._download_json(json_url, title, 'Downloading JSON config')
|
||||
video_url = info['renditions'][0]['url']
|
||||
|
||||
|
@ -1,13 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
'md5': '0914d4d69605090f623b7ac329fea66e',
|
||||
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
|
||||
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
'ext': 'flv',
|
||||
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
||||
'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||
'duration': 3169,
|
||||
'uploader': '4v4l0n42',
|
||||
'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
||||
'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||
'timestamp': 1292248482.625,
|
||||
'upload_date': '20101213',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
|
||||
info = self._download_json(info_url, video_id)
|
||||
date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
|
||||
video_url = info.get('mediaURI')
|
||||
|
||||
if not video_url:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': info['mediaURI'],
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['screenshotURI'],
|
||||
'description': info['description'],
|
||||
'uploader': info['user'],
|
||||
'view_count': info['numberOfViews'],
|
||||
'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('screenshotURI'),
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
'uploader': info.get('user'),
|
||||
'timestamp': float_or_none(info.get('dateCreated'), 1000),
|
||||
'view_count': int_or_none(info.get('numberOfViews')),
|
||||
}
|
||||
|
77
youtube_dl/extractor/douyutv.py
Normal file
77
youtube_dl/extractor/douyutv.py
Normal file
@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'info_dict': {
|
||||
'id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:9e525642c25a0a24302869937cf69d17',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_json(
|
||||
'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
|
||||
|
||||
data = config['data']
|
||||
|
||||
error_code = config.get('error', 0)
|
||||
show_status = data.get('show_status')
|
||||
if error_code is not 0:
|
||||
raise ExtractorError(
|
||||
'Server reported error %i' % error_code, expected=True)
|
||||
|
||||
# 1 = live, 2 = offline
|
||||
if show_status == '2':
|
||||
raise ExtractorError(
|
||||
'Live stream is offline', expected=True)
|
||||
|
||||
base_url = data['rtmp_url']
|
||||
live_path = data['rtmp_live']
|
||||
|
||||
title = self._live_title(data['room_name'])
|
||||
description = data.get('show_details')
|
||||
thumbnail = data.get('room_src')
|
||||
|
||||
uploader = data.get('nickname')
|
||||
uploader_id = data.get('owner_uid')
|
||||
|
||||
multi_formats = data.get('rtmp_multi_bitrate')
|
||||
if not isinstance(multi_formats, dict):
|
||||
multi_formats = {}
|
||||
multi_formats['live'] = live_path
|
||||
|
||||
formats = [{
|
||||
'url': '%s/%s' % (base_url, format_path),
|
||||
'format_id': format_id,
|
||||
'preference': 1 if format_id == 'live' else 0,
|
||||
} for format_id, format_path in multi_formats.items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
|
||||
'id': '1740434',
|
||||
'display_id': 'hot-perky-blonde-naked-golf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot Perky Blonde Naked Golf',
|
||||
'title': 'hot perky blonde naked golf',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
|
||||
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
|
||||
webpage, 'title')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"',
|
||||
|
@ -1,11 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import ExtractorError
|
||||
from .common import InfoExtractor, ExtractorError
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class DRTVIE(SubtitlesInfoExtractor):
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_TEST = {
|
||||
@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
lang = subs['Language']
|
||||
subtitles[LANGS.get(lang, lang)] = subs['Uri']
|
||||
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
raise ExtractorError(
|
||||
@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
98
youtube_dl/extractor/eagleplatform.py
Normal file
98
youtube_dl/extractor/eagleplatform.py
Normal file
@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class EaglePlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
eagleplatform:(?P<custom_host>[^/]+):|
|
||||
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
# http://lenta.ru/news/2015/03/06/navalny/
|
||||
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
||||
'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Навальный вышел на свободу',
|
||||
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 87,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# http://muz-tv.ru/play/7129/
|
||||
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
||||
'url': 'eagleplatform:media.clipyou.ru:12820',
|
||||
'md5': '6c2ebeab03b739597ce8d86339d5a905',
|
||||
'info_dict': {
|
||||
'id': '12820',
|
||||
'ext': 'mp4',
|
||||
'title': "'O Sole Mio",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 216,
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
status = int_or_none(response.get('status', 200))
|
||||
if status != 200:
|
||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
|
||||
|
||||
player_data = self._download_json(
|
||||
'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
|
||||
|
||||
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
|
||||
|
||||
title = media['title']
|
||||
description = media.get('description')
|
||||
thumbnail = media.get('snapshot')
|
||||
duration = int_or_none(media.get('duration'))
|
||||
view_count = int_or_none(media.get('views'))
|
||||
|
||||
age_restriction = media.get('age_restriction')
|
||||
age_limit = None
|
||||
if age_restriction:
|
||||
age_limit = 0 if age_restriction == 'allow_all' else 18
|
||||
|
||||
m3u8_data = self._download_json(
|
||||
media['sources']['secure_m3u8']['auto'],
|
||||
video_id, 'Downloading m3u8 JSON')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_data['data'][0], video_id,
|
||||
'mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_like = self._search_regex(
|
||||
r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
|
||||
data = json.loads(json_like)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
|
||||
playlist_id)
|
||||
|
||||
session = str(random.randint(0, 1000000000))
|
||||
mix_id = data['id']
|
||||
track_count = data['tracks_count']
|
||||
duration = data['duration']
|
||||
avg_song_duration = float(duration) / track_count
|
||||
# duration is sometimes negative, use predefined avg duration
|
||||
if avg_song_duration <= 0:
|
||||
avg_song_duration = 300
|
||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||
next_url = first_url
|
||||
entries = []
|
||||
|
16
youtube_dl/extractor/embedly.py
Normal file
16
youtube_dl/extractor/embedly.py
Normal file
@ -0,0 +1,16 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class EmbedlyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
|
@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?) - EPORNER', webpage, 'title')
|
||||
|
||||
redirect_code = self._html_search_regex(
|
||||
r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
|
||||
webpage, 'redirect_code')
|
||||
redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
|
||||
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
|
||||
player_code = self._download_webpage(
|
||||
redirect_url, display_id, note='Downloading player config')
|
||||
|
||||
@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@ -1,18 +1,20 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
|
||||
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
||||
_TEST = {
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
@ -20,64 +22,107 @@ class EscapistIE(InfoExtractor):
|
||||
'id': '6618',
|
||||
'ext': 'mp4',
|
||||
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||
'uploader': 'the-escapist-presents',
|
||||
'uploader_id': 'the-escapist-presents',
|
||||
'uploader': 'The Escapist Presents',
|
||||
'title': "Breaking Down Baldur's Gate",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 264,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
showName = mobj.group('showname')
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage_req = compat_urllib_request.Request(url)
|
||||
webpage_req.add_header('User-Agent', self._USER_AGENT)
|
||||
webpage = self._download_webpage(webpage_req, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
uploader_id = self._html_search_regex(
|
||||
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
|
||||
webpage, 'uploader ID', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r"<h1\s+class='headline'>(.*?)</a>",
|
||||
webpage, 'uploader', fatal=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
||||
videoDesc = self._html_search_regex(
|
||||
r'<meta name="description" content="([^"]*)"',
|
||||
webpage, 'description', fatal=False)
|
||||
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
||||
title = raw_title.partition(' : ')[2]
|
||||
|
||||
playerUrl = self._og_search_video_url(webpage, name='player URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<meta name="title" content="([^"]*)"',
|
||||
webpage, 'title').split(' : ')[-1]
|
||||
|
||||
configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
|
||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
||||
config_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||
r'''(?x)
|
||||
(?:
|
||||
<param\s+name="flashvars".*?\s+value="config=|
|
||||
flashvars="config=
|
||||
)
|
||||
(https?://[^"&]+)
|
||||
''',
|
||||
webpage, 'config URL'))
|
||||
|
||||
formats = []
|
||||
ad_formats = []
|
||||
|
||||
def _add_format(name, cfgurl, quality):
|
||||
def _add_format(name, cfg_url, quality):
|
||||
cfg_req = compat_urllib_request.Request(cfg_url)
|
||||
cfg_req.add_header('User-Agent', self._USER_AGENT)
|
||||
config = self._download_json(
|
||||
cfgurl, video_id,
|
||||
cfg_req, video_id,
|
||||
'Downloading ' + name + ' configuration',
|
||||
'Unable to download ' + name + ' configuration',
|
||||
transform_source=lambda s: s.replace("'", '"'))
|
||||
transform_source=js_to_json)
|
||||
|
||||
playlist = config['playlist']
|
||||
formats.append({
|
||||
'url': playlist[1]['url'],
|
||||
'format_id': name,
|
||||
'quality': quality,
|
||||
})
|
||||
for p in playlist:
|
||||
if p.get('eventCategory') == 'Video':
|
||||
ar = formats
|
||||
elif p.get('eventCategory') == 'Video Postroll':
|
||||
ar = ad_formats
|
||||
else:
|
||||
continue
|
||||
|
||||
_add_format('normal', configUrl, quality=0)
|
||||
hq_url = (configUrl +
|
||||
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
||||
ar.append({
|
||||
'url': p['url'],
|
||||
'format_id': name,
|
||||
'quality': quality,
|
||||
'http_headers': {
|
||||
'User-Agent': self._USER_AGENT,
|
||||
},
|
||||
})
|
||||
|
||||
_add_format('normal', config_url, quality=0)
|
||||
hq_url = (config_url +
|
||||
('&hq=1' if '?' in config_url else config_url + '?hq=1'))
|
||||
try:
|
||||
_add_format('hq', hq_url, quality=1)
|
||||
except ExtractorError:
|
||||
pass # That's fine, we'll just use normal quality
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
if '/escapist/sales-marketing/' in formats[-1]['url']:
|
||||
raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
|
||||
|
||||
res = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': showName,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': videoDesc,
|
||||
'player_url': playerUrl,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
if self._downloader.params.get('include_ads') and ad_formats:
|
||||
self._sort_formats(ad_formats)
|
||||
ad_res = {
|
||||
'id': '%s-ad' % video_id,
|
||||
'title': '%s (Postroll)' % title,
|
||||
'formats': ad_formats,
|
||||
}
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [res, ad_res],
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
return res
|
||||
|
@ -4,11 +4,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
qualities,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
@ -17,7 +17,7 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||
'info_dict': {
|
||||
'id': '652431',
|
||||
'ext': 'mp4',
|
||||
@ -49,19 +49,27 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||
r'video_url=(.+?)&', webpage, 'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
flash_vars = compat_parse_qs(self._search_regex(
|
||||
r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
|
||||
|
||||
formats = []
|
||||
quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
|
||||
for k, vals in flash_vars.items():
|
||||
m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
|
||||
if m is not None:
|
||||
formats.append({
|
||||
'format_id': m.group('quality'),
|
||||
'quality': quality(m.group('quality')),
|
||||
'url': vals[0],
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'url': video_url,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
|
||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
video_data = params['video_data'][0]
|
||||
video_url = video_data.get('hd_src')
|
||||
if not video_url:
|
||||
video_url = video_data['sd_src']
|
||||
if not video_url:
|
||||
raise ExtractorError('Cannot find video URL')
|
||||
|
||||
formats = []
|
||||
for quality in ['sd', 'hd']:
|
||||
src = video_data.get('%s_src' % quality)
|
||||
if src is not None:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': src,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
|
||||
@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('video_duration')),
|
||||
'thumbnail': video_data.get('thumbnail_src'),
|
||||
}
|
||||
|
@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
title = self._html_search_meta('twitter:title', page, 'title')
|
||||
|
||||
title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
|
||||
description = self._html_search_meta('twitter:description', page, 'title')
|
||||
|
||||
data = self._download_xml(
|
||||
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
|
||||
'height': int(details.find('./height').text.strip()),
|
||||
} for details in item.findall('./source/file_details') if details.find('./file').text
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -1,52 +1,71 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = 'firsttv'
|
||||
IE_DESC = 'Видеоархив - Первый канал'
|
||||
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
|
||||
IE_NAME = '1tv'
|
||||
IE_DESC = 'Первый канал'
|
||||
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.1tv.ru/videoarchive/73390',
|
||||
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
|
||||
'md5': '777f525feeec4806130f4f764bc18a4f',
|
||||
'info_dict': {
|
||||
'id': '73390',
|
||||
'ext': 'mp4',
|
||||
'title': 'Олимпийские канатные дороги',
|
||||
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
|
||||
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'duration': 149,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
|
||||
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||
'info_dict': {
|
||||
'id': '35930',
|
||||
'ext': 'mp4',
|
||||
'title': 'Наедине со всеми. Людмила Сенчина',
|
||||
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'duration': 2694,
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
|
||||
r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
|
||||
webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
|
||||
[r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
||||
duration = self._og_search_property(
|
||||
'video:duration', webpage,
|
||||
'video duration', fatal=False)
|
||||
|
||||
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'like count', fatal=False)
|
||||
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'dislike count', fatal=False)
|
||||
like_count = self._html_search_regex(
|
||||
r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'like count', default=None)
|
||||
dislike_count = self._html_search_regex(
|
||||
r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'dislike count', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
|
||||
5min:)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
41
youtube_dl/extractor/footyroom.py
Normal file
41
youtube_dl/extractor/footyroom.py
Normal file
@ -0,0 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FootyRoomIE(InfoExtractor):
|
||||
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
|
||||
'info_dict': {
|
||||
'id': 'schalke-04-0-2-real-madrid-2015-02',
|
||||
'title': 'Schalke 04 0 – 2 Real Madrid',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
|
||||
playlist_id)
|
||||
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
|
||||
entries = []
|
||||
for video in playlist:
|
||||
payload = video.get('payload')
|
||||
if not payload:
|
||||
continue
|
||||
playwire_url = self._search_regex(
|
||||
r'data-config="([^"]+)"', payload,
|
||||
'playwire url', default=None)
|
||||
if playwire_url:
|
||||
entries.append(self.url_result(playwire_url, 'Playwire'))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
@ -1,77 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
||||
'info_dict': {
|
||||
'id': '4795174',
|
||||
'ext': 'mp3',
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'alt_title': 'Carnet nomade | 13-14',
|
||||
'vcodec': 'none',
|
||||
'uploader': 'Colette Fellous',
|
||||
'upload_date': '20140301',
|
||||
'duration': 3601,
|
||||
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
||||
'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
|
||||
'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats',
|
||||
'timestamp': 1393700400,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
baseurl = mobj.group('baseurl')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
params_code = self._search_regex(
|
||||
r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
|
||||
webpage, 'parameter code')
|
||||
params = compat_parse_qs(params_code)
|
||||
video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
|
||||
|
||||
video_path = self._search_regex(
|
||||
r'<a id="player".*?href="([^"]+)"', webpage, 'video path')
|
||||
video_url = compat_urlparse.urljoin(url, video_path)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<a id="player".*?data-date="([0-9]+)"',
|
||||
webpage, 'upload date', fatal=False))
|
||||
thumbnail = self._search_regex(
|
||||
r'<a id="player".*?>\s+<img src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
|
||||
r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
|
||||
alt_title = self._html_search_regex(
|
||||
r'<span class="title">(.*?)</span>',
|
||||
webpage, 'alt_title', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<span class="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail_part = self._html_search_regex(
|
||||
r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
|
||||
'thumbnail', fatal=False)
|
||||
if thumbnail_part is None:
|
||||
thumbnail = None
|
||||
else:
|
||||
thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
|
||||
|
||||
info = json.loads(params['infoData'][0])[0]
|
||||
duration = info.get('media_length')
|
||||
upload_date_candidate = info.get('media_section5')
|
||||
upload_date = (
|
||||
upload_date_candidate
|
||||
if (upload_date_candidate is not None and
|
||||
re.match(r'[0-9]{8}$', upload_date_candidate))
|
||||
else None)
|
||||
webpage, 'uploader', default=None)
|
||||
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
|
||||
'duration': duration,
|
||||
'vcodec': vcodec,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'timestamp': timestamp,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
||||
|
@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
bitrates.sort()
|
||||
|
||||
formats = []
|
||||
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
'vbr': bitrate,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
|
||||
subtitles[src_lang] = [{
|
||||
'ext': src.split('/')[-1],
|
||||
'url': 'http://www.funnyordie.com%s' % src,
|
||||
}]
|
||||
|
||||
post_json = self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||
post = json.loads(post_json)
|
||||
@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
'description': post.get('description'),
|
||||
'thumbnail': post.get('picture'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -1,41 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
'info_dict': {
|
||||
'id': '20130811',
|
||||
'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
||||
}
|
||||
}
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
# vimeo video
|
||||
'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
|
||||
'md5': '12bf04dfd238e70058046937657ea68d',
|
||||
'info_dict': {
|
||||
'id': 'the-legend-of-zelda-majoras-mask',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Legend of Zelda: Majora’s Mask',
|
||||
'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_url = self._og_search_video_url(webpage)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video = re.search(r'[0-9]+', video_url)
|
||||
video_id = video.group(0)
|
||||
playlist_id = self._search_regex(
|
||||
r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
|
||||
|
||||
# Todo: add medium format
|
||||
video_url = video_url.replace(video_id, 'large/' + video_id)
|
||||
playlist = self._download_xml(
|
||||
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'jwplayer': 'http://rss.jwpcdn.com/'
|
||||
}
|
||||
|
||||
item = playlist.find('./channel/item')
|
||||
|
||||
thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
|
||||
video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@ -31,7 +33,7 @@ class GameStarIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
og_title = self._og_search_title(webpage)
|
||||
title = og_title.replace(' - Video bei GameStar.de', '').strip()
|
||||
title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
|
||||
|
||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||
|
||||
|
38
youtube_dl/extractor/gazeta.py
Normal file
38
youtube_dl/extractor/gazeta.py
Normal file
@ -0,0 +1,38 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class GazetaIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
||||
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
|
||||
'info_dict': {
|
||||
'id': '205566',
|
||||
'ext': 'mp4',
|
||||
'title': '«70–80 процентов гражданских в Донецке на грани голода»',
|
||||
'description': 'md5:38617526050bd17b234728e7f9620a71',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
display_id = mobj.group('id')
|
||||
embed_url = '%s?p=embed' % mobj.group('url')
|
||||
embed_page = self._download_webpage(
|
||||
embed_url, display_id, 'Downloading embed page')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
|
||||
|
||||
return self.url_result(
|
||||
'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
|
@ -7,10 +7,12 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||
_NETRC_MACHINE = 'gdcvault'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
|
||||
@ -65,10 +67,12 @@ class GDCVaultIE(InfoExtractor):
|
||||
|
||||
def _parse_flv(self, xml_description):
|
||||
video_formats = []
|
||||
akami_url = xml_description.find('./metadata/akamaiHost').text
|
||||
akamai_url = xml_description.find('./metadata/akamaiHost').text
|
||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||
video_formats.append({
|
||||
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(slide_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'slide deck video',
|
||||
'quality': -2,
|
||||
'preference': -2,
|
||||
@ -76,7 +80,9 @@ class GDCVaultIE(InfoExtractor):
|
||||
})
|
||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||
video_formats.append({
|
||||
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
'format_note': 'speaker video',
|
||||
'quality': -1,
|
||||
'preference': -1,
|
||||
|
@ -26,6 +26,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_basename,
|
||||
xpath_text,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
from .ooyala import OoyalaIE
|
||||
@ -140,6 +141,19 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
# multiple ooyala embeds on SBN network websites
|
||||
{
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'info_dict': {
|
||||
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@ -460,6 +474,7 @@ class GenericIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
'info_dict': {
|
||||
'id': '1986',
|
||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
@ -511,6 +526,109 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20150126',
|
||||
},
|
||||
'add_ie': ['Viddler'],
|
||||
},
|
||||
# Libsyn embed
|
||||
{
|
||||
'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
|
||||
'info_dict': {
|
||||
'id': '3377616',
|
||||
'ext': 'mp3',
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
},
|
||||
},
|
||||
# jwplayer YouTube
|
||||
{
|
||||
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
||||
'info_dict': {
|
||||
'id': 'Mrj4DVp2zeA',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150212',
|
||||
'uploader': 'The National Archives UK',
|
||||
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
||||
'uploader_id': 'NationalArchives08',
|
||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||
},
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'aanslagen-kopenhagen',
|
||||
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
||||
}
|
||||
},
|
||||
# Zapiks embed
|
||||
{
|
||||
'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
|
||||
'info_dict': {
|
||||
'id': '118046',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||
}
|
||||
},
|
||||
# Kaltura embed
|
||||
{
|
||||
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
|
||||
'info_dict': {
|
||||
'id': '1_eergr3h1',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150226',
|
||||
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
|
||||
'timestamp': int,
|
||||
'title': 'John Carlson Postgame 2/25/15',
|
||||
},
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
'info_dict': {
|
||||
'id': '227304',
|
||||
'ext': 'mp4',
|
||||
'title': 'Навальный вышел на свободу',
|
||||
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 87,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# ClipYou (Eagle.Platform) embed (custom URL)
|
||||
{
|
||||
'url': 'http://muz-tv.ru/play/7129/',
|
||||
'info_dict': {
|
||||
'id': '12820',
|
||||
'ext': 'mp4',
|
||||
'title': "'O Sole Mio",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 216,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
# Pladform embed
|
||||
{
|
||||
'url': 'http://muz-tv.ru/kinozal/view/7400/',
|
||||
'info_dict': {
|
||||
'id': '100183293',
|
||||
'ext': 'mp4',
|
||||
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
|
||||
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 694,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosure
|
||||
{
|
||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
'info_dict': {
|
||||
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
'ext': 'm4v',
|
||||
'upload_date': '20150228',
|
||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@ -523,11 +641,24 @@ class GenericIE(InfoExtractor):
|
||||
playlist_desc_el = doc.find('./channel/description')
|
||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': e.find('link').text,
|
||||
'title': e.find('title').text,
|
||||
} for e in doc.findall('./channel/item')]
|
||||
entries = []
|
||||
for it in doc.findall('./channel/item'):
|
||||
next_url = xpath_text(it, 'link', fatal=False)
|
||||
if not next_url:
|
||||
enclosure_nodes = it.findall('./enclosure')
|
||||
for e in enclosure_nodes:
|
||||
next_url = e.attrib.get('url')
|
||||
if next_url:
|
||||
break
|
||||
|
||||
if not next_url:
|
||||
continue
|
||||
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'url': next_url,
|
||||
'title': it.find('title').text,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
@ -756,6 +887,13 @@ class GenericIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
|
||||
webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
@ -763,7 +901,6 @@ class GenericIE(InfoExtractor):
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl)
|
||||
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
@ -880,12 +1017,34 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for NYTimes player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Libsyn player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
|
||||
if mobj is not None:
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
# Look for multiple Ooyala embeds on SBN network websites
|
||||
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
|
||||
if mobj is not None:
|
||||
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
|
||||
if embeds:
|
||||
return _playlist_from_matches(
|
||||
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
|
||||
|
||||
# Look for Aparat videos
|
||||
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||
if mobj is not None:
|
||||
@ -1012,7 +1171,12 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded sbs.com.au player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
|
||||
r'''(?x)
|
||||
(?:
|
||||
<meta\s+property="og:video"\s+content=|
|
||||
<iframe[^>]+?src=
|
||||
)
|
||||
(["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'SBS')
|
||||
@ -1042,7 +1206,39 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Livestream')
|
||||
|
||||
# Look for Zapiks embed
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = re.search(
|
||||
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||
|
||||
# Look for Eagle.Platform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'EaglePlatform')
|
||||
|
||||
# Look for ClipYou (uses Eagle.Platform) embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
|
||||
|
||||
# Look for Pladform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Pladform')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
||||
@ -1060,7 +1256,8 @@ class GenericIE(InfoExtractor):
|
||||
JWPlayerOptions|
|
||||
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
||||
)
|
||||
.*?file\s*:\s*["\'](.*?)["\']''', webpage))
|
||||
.*?
|
||||
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
|
||||
if not found:
|
||||
# Broaden the search a little bit
|
||||
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
|
||||
@ -1095,10 +1292,16 @@ class GenericIE(InfoExtractor):
|
||||
# HTML5 video
|
||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||
if not found:
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
found = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||
webpage)
|
||||
if not found:
|
||||
# Look also in Refresh HTTP header
|
||||
refresh_header = head_response.headers.get('Refresh')
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
self.report_following_redirect(new_url)
|
||||
@ -1134,7 +1337,9 @@ class GenericIE(InfoExtractor):
|
||||
return entries[0]
|
||||
else:
|
||||
for num, e in enumerate(entries, start=1):
|
||||
e['title'] = '%s (%d)' % (e['title'], num)
|
||||
# 'url' results don't have a title
|
||||
if e.get('title') is not None:
|
||||
e['title'] = '%s (%d)' % (e['title'], num)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
|
@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
|
||||
|
||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
|
||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id="(\d+)"',
|
||||
|
@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor):
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<span class="duration">\s*-?\s*(.*?)</span>',
|
||||
webpage, 'duration', fatal=False))
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, default='false')
|
||||
|
||||
flashvars = compat_parse_qs(self._html_search_regex(
|
||||
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
|
||||
@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': 0 if family_friendly == 'true' else 18,
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
}
|
||||
|
@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
|
||||
|
||||
if webpage is not None:
|
||||
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
|
||||
return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
|
||||
|
||||
return (webpage, None)
|
||||
return webpage, None
|
||||
|
||||
def _real_initialize(self):
|
||||
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||
@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
|
||||
swf_referer = None
|
||||
if self.do_playerpage_request:
|
||||
(_, player_objs) = self._get_playerpage(url)
|
||||
if player_objs is not None:
|
||||
if player_objs:
|
||||
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||
|
||||
|
31
youtube_dl/extractor/history.py
Normal file
31
youtube_dl/extractor/history.py
Normal file
@ -0,0 +1,31 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class HistoryIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
|
||||
'info_dict': {
|
||||
'id': 'bLx5Dv5Aka1G',
|
||||
'ext': 'mp4',
|
||||
'title': "Bet You Didn't Know: Valentine's Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||
webpage, 'video url')
|
||||
|
||||
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user