Compare commits
473 Commits
2016.04.13
...
2016.06.02
Author | SHA1 | Date | |
---|---|---|---|
5e3856a2c5 | |||
6e6b9f600f | |||
6a1df4fb5f | |||
dde1ce7c06 | |||
811586ebcf | |||
0ff3749bfe | |||
28bab13348 | |||
877032314f | |||
8ec2b2c41c | |||
197a5da1d0 | |||
abbb2938fa | |||
f657b1a5f2 | |||
86a52881c6 | |||
8267423652 | |||
917a3196f8 | |||
56bd028a0f | |||
681b923b5c | |||
9ed6d8c6c5 | |||
f3fb420b82 | |||
165e3561e9 | |||
27f17c0eab | |||
44c8892369 | |||
f574103d7c | |||
6d138e98e3 | |||
2a329110b9 | |||
2bee7b25f3 | |||
92cf872a48 | |||
6461f2b7ec | |||
807cf7b07f | |||
de7d76af52 | |||
11c70deba7 | |||
f36532404d | |||
77b8b4e696 | |||
2615fa7584 | |||
fac2af3c51 | |||
6f8cb24219 | |||
448bb5f333 | |||
293c255688 | |||
ac88d2316e | |||
5950cb1d6d | |||
761052db92 | |||
240b60453e | |||
85b0fe7d64 | |||
0a5685b26f | |||
6f748df43f | |||
b410cb83d4 | |||
da9d82840a | |||
4ee0b8afdb | |||
1de32771e1 | |||
688c634b7d | |||
0d6ee97508 | |||
6b43132ce9 | |||
a4690b3244 | |||
444417edb5 | |||
277c7465f5 | |||
25bcd3550e | |||
a4760d204f | |||
e8593f346a | |||
05b651e3a5 | |||
42a7439717 | |||
b1e9ebd080 | |||
0c50eeb987 | |||
4b464a6a78 | |||
5db9df622f | |||
5181759c0d | |||
e54373204a | |||
102810ef04 | |||
78d3b3e213 | |||
7a46542f97 | |||
eb7941e3e6 | |||
db3b8b2103 | |||
c5f5155100 | |||
4a12077855 | |||
a4a7c44bd3 | |||
70346165fe | |||
c776b99691 | |||
e9297256d4 | |||
e5871c672b | |||
9b06b0fb92 | |||
4f3a25c2b4 | |||
21a19aa94d | |||
c6b9cf05e1 | |||
4d8819d249 | |||
898f4b49cc | |||
0150a00f33 | |||
c8831015f4 | |||
92d221ad48 | |||
0db9a05f88 | |||
e03b35b8f9 | |||
d2fee3c99e | |||
598869afb1 | |||
7e642e4fd6 | |||
c8cc3745fb | |||
4c718d3c50 | |||
115c65793a | |||
661d46b28f | |||
5ce3d5bd1b | |||
612b5f403e | |||
9f54e692d2 | |||
7b2fcbfd4e | |||
16da9bbc29 | |||
c8602b2f9b | |||
b219f5e51b | |||
1846e9ade0 | |||
6756602be6 | |||
6c114b1210 | |||
7ded6545ed | |||
aa5957ac49 | |||
64413f7563 | |||
45f160a43c | |||
36ca2c55db | |||
f0c96af9cb | |||
31a70191e7 | |||
ad96b4c8f5 | |||
043dc9d36f | |||
52f7c75cff | |||
f6e588afc0 | |||
a001296703 | |||
2cbd8c6781 | |||
8585dc4cdc | |||
dd81769c62 | |||
46bc9b7d7c | |||
b78531a36a | |||
11e6a0b641 | |||
15cda1ef77 | |||
055f0d3d06 | |||
cdd94c2eae | |||
36755d9d69 | |||
f7199423e5 | |||
a0a81918f1 | |||
5572d598a5 | |||
cec9727c7f | |||
79298173c5 | |||
69c9cc2716 | |||
ed56f26039 | |||
6f41b2bcf1 | |||
cda6d47aad | |||
5d39176f6d | |||
5c86bfe70f | |||
364cf465dd | |||
ca950f49e9 | |||
89ac4a19e6 | |||
640eea0a0c | |||
bd1e484448 | |||
a834622b89 | |||
707bb426b1 | |||
66e7ace17a | |||
791ff52f75 | |||
98d560f205 | |||
afcc317800 | |||
b5abf86148 | |||
134c6ea856 | |||
0730be9022 | |||
96c2e3e909 | |||
f196508f7b | |||
cc1028aa6d | |||
ad55e10165 | |||
18cf6381f6 | |||
cdf32ff15d | |||
99d79b8692 | |||
b9e7bc55da | |||
d8d540cf0d | |||
0df79d552a | |||
0db3a66162 | |||
7581bfc958 | |||
f388f616c1 | |||
a3fa6024d6 | |||
1b405bb47d | |||
7e8ddca1bb | |||
778a1ccca7 | |||
4540515cb3 | |||
e0741fd449 | |||
e73b9c65e2 | |||
702ccf2dc0 | |||
28b4f73620 | |||
c2876afafe | |||
6ddb4888d2 | |||
fa5cb8d021 | |||
e21f17fc86 | |||
edaa23f822 | |||
d5ae6bb501 | |||
51fb4995a5 | |||
9e9cd7248d | |||
72f3289ac4 | |||
71aff18809 | |||
dab0daeeb0 | |||
4350b74545 | |||
2937590e8b | |||
fad7bbec3a | |||
e62d9c5caa | |||
20cfdcc910 | |||
1292638754 | |||
fe40f9eef2 | |||
6104cc2985 | |||
c15c47d19b | |||
965fefdcd8 | |||
3951e7eb93 | |||
f1f6f5aa5e | |||
eb785b856f | |||
c52f4efaee | |||
f23a92a0ce | |||
3b01a9fbb6 | |||
9c072d38c6 | |||
3e169233da | |||
f5436c5d9e | |||
5c24873a9e | |||
00c21c225d | |||
d013b26719 | |||
e2eca6f65e | |||
a0904c5d80 | |||
cb1fa58813 | |||
3fd6332c05 | |||
401d147893 | |||
e2ee97dcd5 | |||
f745403b5b | |||
3e80e6f40d | |||
25cb7a0eeb | |||
abc97b5eda | |||
04e88ca2ca | |||
6f59aa934b | |||
109db8ea64 | |||
915620fd68 | |||
ac12e888f9 | |||
b1c6a5bac8 | |||
7d08f6073d | |||
758a059241 | |||
4f8c56eb4e | |||
9da526aae7 | |||
75b81df3af | |||
aabdc83d6e | |||
2a48e6f01a | |||
203a3c0e6a | |||
d36724cca4 | |||
15fc0658f7 | |||
e960c3c223 | |||
bc7e77a04b | |||
964f49336f | |||
57d8e32a3e | |||
4174552391 | |||
80bc4106af | |||
7759be38da | |||
a0a309b973 | |||
c587cbb793 | |||
6c52a86f54 | |||
8a92e51c60 | |||
f0e14fdd43 | |||
df5f4e8888 | |||
7960b0563b | |||
5c9ced9504 | |||
31c4448f6e | |||
79a2e94e79 | |||
686cc89634 | |||
9508738f9a | |||
78a3ff33ab | |||
881dbc86c4 | |||
8e7d004888 | |||
9618c44824 | |||
516ea41a7d | |||
e2bd301ce7 | |||
0c9d288ba0 | |||
e0da32df6e | |||
174aba3223 | |||
0d66bd0eab | |||
4bd143a3a0 | |||
6f27bf1c74 | |||
68bb2fef95 | |||
854cc54bc1 | |||
651ad35ce0 | |||
6a0f9a24d0 | |||
9cf79e8f4b | |||
2844b09336 | |||
1a2b377cc2 | |||
4c1b2e5c0e | |||
9e1b96ae40 | |||
fc35cd9e0c | |||
339fe7228a | |||
ea7e7fecbd | |||
d00b93d58c | |||
93f7a31bf3 | |||
33a1ec950c | |||
4e0c0c1508 | |||
89c0dc9a5f | |||
f628d800fb | |||
11fa3d7f99 | |||
d41ee7b774 | |||
e0e9bbb0e9 | |||
7691184a31 | |||
35cd2f4c25 | |||
350d7963db | |||
cbc032c8b7 | |||
69c4cde4ba | |||
ca278a182b | |||
373e1230e4 | |||
cd63d091ce | |||
0571ffda7d | |||
5556047465 | |||
65a3bfb379 | |||
cef3f3011f | |||
e9c6cdf4a1 | |||
00a17a9e12 | |||
8312b1a3d1 | |||
6ff4469528 | |||
68835d687a | |||
9d186afac8 | |||
151d98130b | |||
b24d6336a7 | |||
065216d94f | |||
67167920db | |||
14638e2915 | |||
1910077ed7 | |||
5819edef03 | |||
f5535ed0e3 | |||
31ff3c074e | |||
72670c39de | |||
683d892bf9 | |||
497971cd4a | |||
e757fb3d05 | |||
0ba9e3ca22 | |||
4b53762914 | |||
eebe6b382e | |||
0cbcbdd89d | |||
7f776fa4b5 | |||
eb5ad31ce1 | |||
a5941305b6 | |||
f8dddaf456 | |||
618c71dc64 | |||
52af8f222b | |||
3cc8649c9d | |||
dcf094d626 | |||
5b5d7cc11e | |||
2ac2cbc0a3 | |||
a7e03861e8 | |||
046ea04a7d | |||
7464360379 | |||
175c2e9ec3 | |||
f1f879098a | |||
c9fd530670 | |||
749b0046a8 | |||
e3de3d6f2f | |||
ad58942d57 | |||
4645432d7a | |||
6bdc2d5358 | |||
2beff95da5 | |||
abc1723edd | |||
b248e6485b | |||
d6712378e7 | |||
fb72ec58ae | |||
c83a352227 | |||
e9063b5de9 | |||
594b0c4c69 | |||
eb9ee19422 | |||
a1394b820d | |||
aa9dc24f5a | |||
51762e1a31 | |||
8b38f2ac40 | |||
a82398bd72 | |||
c14dc00df3 | |||
03dd60ca41 | |||
0738187f9b | |||
a956cb6306 | |||
a8062eabcd | |||
2a7dee8cc5 | |||
d9ed362116 | |||
4f54958097 | |||
2a7c38831c | |||
949b6497cc | |||
2c21152ca7 | |||
fda9a1ca9e | |||
864d5e7231 | |||
5448b781f6 | |||
e239413fbc | |||
fd0ff8bad8 | |||
397ec446f3 | |||
29a7e8f6f8 | |||
eb01e97e10 | |||
cb7d4d0efd | |||
c80037918b | |||
237a41108a | |||
e962ae15d3 | |||
7c36ea7d54 | |||
9260cf1d97 | |||
bdbb8530c7 | |||
09a9fadb84 | |||
bf09af3acb | |||
88296ac326 | |||
870d525848 | |||
6577112890 | |||
1988647dda | |||
a292cba256 | |||
982e518a96 | |||
748e730099 | |||
b6c0d4f431 | |||
acaff49575 | |||
1da19488f9 | |||
442c4d361f | |||
ec59d657e7 | |||
99ef96f84c | |||
4dccea8ad0 | |||
2c0d9c6217 | |||
12a5134596 | |||
16e633a5d7 | |||
494ab6db73 | |||
107701fcfc | |||
f77970765a | |||
81215d5652 | |||
241a318f27 | |||
4fdf082375 | |||
1b6182d8f7 | |||
7bab22a402 | |||
0f97fb4d00 | |||
b1cf58f48f | |||
3014b0ae83 | |||
b9f2fdd37f | |||
bbb3f730bb | |||
d868f43c58 | |||
21525bb8ca | |||
d8f103159f | |||
663ee5f0a9 | |||
b6b950bf58 | |||
11e60fcad8 | |||
c23533a100 | |||
0dafea02e6 | |||
5d6360c3b7 | |||
5e5c30c3fd | |||
9154c87fc4 | |||
ef0e4e7bc0 | |||
67d46a3f90 | |||
bec47a0748 | |||
36b7d9dbfa | |||
8c65e4a527 | |||
6ad2ef8b7c | |||
00b426d66d | |||
0de968b584 | |||
0841d5013c | |||
a71fca8577 | |||
ee94e7e66d | |||
759e37c9e6 | |||
ae65567102 | |||
c394b4f4cb | |||
260c7036ba | |||
f74197a074 | |||
f3a58d46bf | |||
b6612c9b11 | |||
7e176effb2 | |||
4a252cc2d2 | |||
f0ec61b525 | |||
66d40ae3a5 | |||
e6da9240d4 | |||
dd91dfcd67 | |||
c773082692 | |||
9c250931f5 | |||
56f1750049 | |||
f2159c9815 | |||
b0cf2e7c1b | |||
74b47d00c3 | |||
8cb57bab8e | |||
e1bf277e19 | |||
ce599d5a7e | |||
9e28538726 | |||
404284132c | |||
5565be9dd9 | |||
b3a9474ad1 | |||
86475d59b1 | |||
73d93f948e | |||
f5d8743e0a | |||
d1c4e4ba15 | |||
f141fefab7 | |||
8334637f4a | |||
b8f67449ec | |||
6d67169509 | |||
dcaf00fb3e | |||
f896e1ccef | |||
c96eca426b |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.13**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.02**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2016.04.13
|
[debug] youtube-dl version 2016.06.02
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
8
.gitignore
vendored
8
.gitignore
vendored
@ -28,10 +28,16 @@ updates_key.pem
|
|||||||
*.mp4
|
*.mp4
|
||||||
*.m4a
|
*.m4a
|
||||||
*.m4v
|
*.m4v
|
||||||
|
*.mp3
|
||||||
*.part
|
*.part
|
||||||
*.swp
|
*.swp
|
||||||
test/testdata
|
test/testdata
|
||||||
|
test/local_parameters.json
|
||||||
.tox
|
.tox
|
||||||
youtube-dl.zsh
|
youtube-dl.zsh
|
||||||
|
|
||||||
|
# IntelliJ related files
|
||||||
.idea
|
.idea
|
||||||
.idea/*
|
*.iml
|
||||||
|
|
||||||
|
tmp/
|
||||||
|
@ -7,11 +7,13 @@ python:
|
|||||||
- "3.4"
|
- "3.4"
|
||||||
- "3.5"
|
- "3.5"
|
||||||
sudo: false
|
sudo: false
|
||||||
|
install:
|
||||||
|
- bash ./devscripts/install_srelay.sh
|
||||||
|
- export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
|
||||||
script: nosetests test --verbose
|
script: nosetests test --verbose
|
||||||
notifications:
|
notifications:
|
||||||
email:
|
email:
|
||||||
- filippo.valsorda@gmail.com
|
- filippo.valsorda@gmail.com
|
||||||
- phihag@phihag.de
|
|
||||||
- yasoob.khld@gmail.com
|
- yasoob.khld@gmail.com
|
||||||
# irc:
|
# irc:
|
||||||
# channels:
|
# channels:
|
||||||
|
5
AUTHORS
5
AUTHORS
@ -168,3 +168,8 @@ José Joaquín Atria
|
|||||||
Viťas Strádal
|
Viťas Strádal
|
||||||
Kagami Hiiragi
|
Kagami Hiiragi
|
||||||
Philip Huppert
|
Philip Huppert
|
||||||
|
blahgeek
|
||||||
|
Kevin Deldycke
|
||||||
|
inondle
|
||||||
|
Tomáš Čech
|
||||||
|
Déstin Reed
|
||||||
|
6
Makefile
6
Makefile
@ -1,7 +1,7 @@
|
|||||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||||
find . -name "*.pyc" -delete
|
find . -name "*.pyc" -delete
|
||||||
find . -name "*.class" -delete
|
find . -name "*.class" -delete
|
||||||
|
|
||||||
@ -37,7 +37,7 @@ test:
|
|||||||
ot: offlinetest
|
ot: offlinetest
|
||||||
|
|
||||||
offlinetest: codetest
|
offlinetest: codetest
|
||||||
$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
|
$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py
|
||||||
|
|
||||||
tar: youtube-dl.tar.gz
|
tar: youtube-dl.tar.gz
|
||||||
|
|
||||||
@ -69,7 +69,7 @@ README.txt: README.md
|
|||||||
pandoc -f markdown -t plain README.md -o README.txt
|
pandoc -f markdown -t plain README.md -o README.txt
|
||||||
|
|
||||||
youtube-dl.1: README.md
|
youtube-dl.1: README.md
|
||||||
$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
$(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
|
||||||
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
||||||
rm -f youtube-dl.1.temp.md
|
rm -f youtube-dl.1.temp.md
|
||||||
|
|
||||||
|
63
README.md
63
README.md
@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget:
|
|||||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
|
|
||||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
||||||
|
|
||||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
||||||
|
|
||||||
@ -73,8 +73,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
repairs broken URLs, but emits an error if
|
repairs broken URLs, but emits an error if
|
||||||
this is not possible instead of searching.
|
this is not possible instead of searching.
|
||||||
--ignore-config Do not read configuration files. When given
|
--ignore-config Do not read configuration files. When given
|
||||||
in the global configuration file /etc
|
in the global configuration file
|
||||||
/youtube-dl.conf: Do not read the user
|
/etc/youtube-dl.conf: Do not read the user
|
||||||
configuration in ~/.config/youtube-
|
configuration in ~/.config/youtube-
|
||||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||||
on Windows)
|
on Windows)
|
||||||
@ -85,9 +85,11 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--no-color Do not emit color codes in output
|
--no-color Do not emit color codes in output
|
||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||||
an empty string (--proxy "") for direct
|
To enable experimental SOCKS proxy, specify
|
||||||
connection
|
a proper scheme. For example
|
||||||
|
socks5://127.0.0.1:1080/. Pass in an empty
|
||||||
|
string (--proxy "") for direct connection
|
||||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||||
--source-address IP Client-side IP address to bind to
|
--source-address IP Client-side IP address to bind to
|
||||||
(experimental)
|
(experimental)
|
||||||
@ -160,7 +162,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
(experimental)
|
(experimental)
|
||||||
|
|
||||||
## Download Options:
|
## Download Options:
|
||||||
-r, --rate-limit LIMIT Maximum download rate in bytes per second
|
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||||
(e.g. 50K or 4.2M)
|
(e.g. 50K or 4.2M)
|
||||||
-R, --retries RETRIES Number of retries (default is 10), or
|
-R, --retries RETRIES Number of retries (default is 10), or
|
||||||
"infinite".
|
"infinite".
|
||||||
@ -176,7 +178,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||||
expected filesize (experimental)
|
expected filesize (experimental)
|
||||||
--hls-prefer-native Use the native HLS downloader instead of
|
--hls-prefer-native Use the native HLS downloader instead of
|
||||||
ffmpeg (experimental)
|
ffmpeg
|
||||||
|
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||||
|
downloader
|
||||||
--hls-use-mpegts Use the mpegts container for HLS videos,
|
--hls-use-mpegts Use the mpegts container for HLS videos,
|
||||||
allowing to play the video while
|
allowing to play the video while
|
||||||
downloading (some players may not be able
|
downloading (some players may not be able
|
||||||
@ -252,11 +256,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
jar in
|
jar in
|
||||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||||
can store some downloaded information
|
can store some downloaded information
|
||||||
permanently. By default $XDG_CACHE_HOME
|
permanently. By default
|
||||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
$XDG_CACHE_HOME/youtube-dl or
|
||||||
moment, only YouTube player files (for
|
~/.cache/youtube-dl . At the moment, only
|
||||||
videos with obfuscated signatures) are
|
YouTube player files (for videos with
|
||||||
cached, but that may change.
|
obfuscated signatures) are cached, but that
|
||||||
|
may change.
|
||||||
--no-cache-dir Disable filesystem caching
|
--no-cache-dir Disable filesystem caching
|
||||||
--rm-cache-dir Delete all filesystem cache files
|
--rm-cache-dir Delete all filesystem cache files
|
||||||
|
|
||||||
@ -413,7 +418,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
||||||
|
|
||||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||||
```
|
```
|
||||||
@ -429,7 +434,7 @@ You can use `--ignore-config` if you want to disable the configuration file for
|
|||||||
|
|
||||||
### Authentication with `.netrc` file
|
### Authentication with `.netrc` file
|
||||||
|
|
||||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||||
```
|
```
|
||||||
touch $HOME/.netrc
|
touch $HOME/.netrc
|
||||||
chmod a-rwx,u+rw $HOME/.netrc
|
chmod a-rwx,u+rw $HOME/.netrc
|
||||||
@ -463,7 +468,7 @@ The basic usage is not to set any template arguments when downloading a single f
|
|||||||
- `display_id`: An alternative identifier for the video
|
- `display_id`: An alternative identifier for the video
|
||||||
- `uploader`: Full name of the video uploader
|
- `uploader`: Full name of the video uploader
|
||||||
- `license`: License name the video is licensed under
|
- `license`: License name the video is licensed under
|
||||||
- `creator`: The main artist who created the video
|
- `creator`: The creator of the video
|
||||||
- `release_date`: The date (YYYYMMDD) when the video was released
|
- `release_date`: The date (YYYYMMDD) when the video was released
|
||||||
- `timestamp`: UNIX timestamp of the moment the video became available
|
- `timestamp`: UNIX timestamp of the moment the video became available
|
||||||
- `upload_date`: Video upload date (YYYYMMDD)
|
- `upload_date`: Video upload date (YYYYMMDD)
|
||||||
@ -515,6 +520,18 @@ Available for the video that is an episode of some series or programme:
|
|||||||
- `episode_number`: Number of the video episode within a season
|
- `episode_number`: Number of the video episode within a season
|
||||||
- `episode_id`: Id of the video episode
|
- `episode_id`: Id of the video episode
|
||||||
|
|
||||||
|
Available for the media that is a track or a part of a music album:
|
||||||
|
- `track`: Title of the track
|
||||||
|
- `track_number`: Number of the track within an album or a disc
|
||||||
|
- `track_id`: Id of the track
|
||||||
|
- `artist`: Artist(s) of the track
|
||||||
|
- `genre`: Genre(s) of the track
|
||||||
|
- `album`: Title of the album the track belongs to
|
||||||
|
- `album_type`: Type of the album
|
||||||
|
- `album_artist`: List of all artists appeared on the album
|
||||||
|
- `disc_number`: Number of the disc or other physical medium the track belongs to
|
||||||
|
- `release_year`: Year (YYYY) when the album was released
|
||||||
|
|
||||||
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||||
|
|
||||||
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||||
@ -677,12 +694,20 @@ hash -r
|
|||||||
|
|
||||||
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
|
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
|
||||||
|
|
||||||
|
### youtube-dl is extremely slow to start on Windows
|
||||||
|
|
||||||
|
Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
|
||||||
|
|
||||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
||||||
|
|
||||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||||
|
|
||||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||||
|
|
||||||
|
### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
|
||||||
|
|
||||||
|
Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any.
|
||||||
|
|
||||||
### Do I always have to pass `-citw`?
|
### Do I always have to pass `-citw`?
|
||||||
|
|
||||||
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
|
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
|
||||||
@ -703,7 +728,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
|
|||||||
|
|
||||||
### I have downloaded a video but how can I play it?
|
### I have downloaded a video but how can I play it?
|
||||||
|
|
||||||
Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
||||||
|
|
||||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
||||||
|
|
||||||
@ -760,9 +785,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
|
|||||||
|
|
||||||
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||||
|
|
||||||
### The exe throws a *Runtime error from Visual C++*
|
### The exe throws an error due to missing `MSVCR100.dll`
|
||||||
|
|
||||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
|
||||||
|
|
||||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||||
|
|
||||||
|
@ -1,17 +1,42 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
||||||
from socketserver import ThreadingMixIn
|
|
||||||
import argparse
|
import argparse
|
||||||
import ctypes
|
import ctypes
|
||||||
import functools
|
import functools
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
import traceback
|
import traceback
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_http_server,
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
|
||||||
class BuildHTTPServer(ThreadingMixIn, HTTPServer):
|
# These are not used outside of buildserver.py thus not in compat.py
|
||||||
|
|
||||||
|
try:
|
||||||
|
import winreg as compat_winreg
|
||||||
|
except ImportError: # Python 2
|
||||||
|
import _winreg as compat_winreg
|
||||||
|
|
||||||
|
try:
|
||||||
|
import socketserver as compat_socketserver
|
||||||
|
except ImportError: # Python 2
|
||||||
|
import SocketServer as compat_socketserver
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_input = raw_input
|
||||||
|
except NameError: # Python 3
|
||||||
|
compat_input = input
|
||||||
|
|
||||||
|
|
||||||
|
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
|
||||||
allow_reuse_address = True
|
allow_reuse_address = True
|
||||||
|
|
||||||
|
|
||||||
@ -191,7 +216,7 @@ def main(args=None):
|
|||||||
action='store_const', dest='action', const='service',
|
action='store_const', dest='action', const='service',
|
||||||
help='Run as a Windows service')
|
help='Run as a Windows service')
|
||||||
parser.add_argument('-b', '--bind', metavar='<host:port>',
|
parser.add_argument('-b', '--bind', metavar='<host:port>',
|
||||||
action='store', default='localhost:8142',
|
action='store', default='0.0.0.0:8142',
|
||||||
help='Bind to host:port (default %default)')
|
help='Bind to host:port (default %default)')
|
||||||
options = parser.parse_args(args=args)
|
options = parser.parse_args(args=args)
|
||||||
|
|
||||||
@ -216,7 +241,7 @@ def main(args=None):
|
|||||||
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
|
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
|
||||||
thr = threading.Thread(target=srv.serve_forever)
|
thr = threading.Thread(target=srv.serve_forever)
|
||||||
thr.start()
|
thr.start()
|
||||||
input('Press ENTER to shut down')
|
compat_input('Press ENTER to shut down')
|
||||||
srv.shutdown()
|
srv.shutdown()
|
||||||
thr.join()
|
thr.join()
|
||||||
|
|
||||||
@ -231,8 +256,6 @@ def rmtree(path):
|
|||||||
os.remove(fname)
|
os.remove(fname)
|
||||||
os.rmdir(path)
|
os.rmdir(path)
|
||||||
|
|
||||||
#==============================================================================
|
|
||||||
|
|
||||||
|
|
||||||
class BuildError(Exception):
|
class BuildError(Exception):
|
||||||
def __init__(self, output, code=500):
|
def __init__(self, output, code=500):
|
||||||
@ -249,15 +272,25 @@ class HTTPError(BuildError):
|
|||||||
|
|
||||||
class PythonBuilder(object):
|
class PythonBuilder(object):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
pythonVersion = kwargs.pop('python', '2.7')
|
python_version = kwargs.pop('python', '3.4')
|
||||||
|
python_path = None
|
||||||
|
for node in ('Wow6432Node\\', ''):
|
||||||
try:
|
try:
|
||||||
key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
|
key = compat_winreg.OpenKey(
|
||||||
|
compat_winreg.HKEY_LOCAL_MACHINE,
|
||||||
|
r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
|
||||||
try:
|
try:
|
||||||
self.pythonPath, _ = _winreg.QueryValueEx(key, '')
|
python_path, _ = compat_winreg.QueryValueEx(key, '')
|
||||||
finally:
|
finally:
|
||||||
_winreg.CloseKey(key)
|
compat_winreg.CloseKey(key)
|
||||||
|
break
|
||||||
except Exception:
|
except Exception:
|
||||||
raise BuildError('No such Python version: %s' % pythonVersion)
|
pass
|
||||||
|
|
||||||
|
if not python_path:
|
||||||
|
raise BuildError('No such Python version: %s' % python_version)
|
||||||
|
|
||||||
|
self.pythonPath = python_path
|
||||||
|
|
||||||
super(PythonBuilder, self).__init__(**kwargs)
|
super(PythonBuilder, self).__init__(**kwargs)
|
||||||
|
|
||||||
@ -305,8 +338,10 @@ class YoutubeDLBuilder(object):
|
|||||||
|
|
||||||
def build(self):
|
def build(self):
|
||||||
try:
|
try:
|
||||||
subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
|
||||||
cwd=self.buildPath)
|
proc.wait()
|
||||||
|
#subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||||
|
# cwd=self.buildPath)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
raise BuildError(e.output)
|
raise BuildError(e.output)
|
||||||
|
|
||||||
@ -369,12 +404,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
|
actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
path = urlparse.urlparse(self.path)
|
path = compat_urlparse.urlparse(self.path)
|
||||||
paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
|
paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
|
||||||
action, _, path = path.path.strip('/').partition('/')
|
action, _, path = path.path.strip('/').partition('/')
|
||||||
if path:
|
if path:
|
||||||
path = path.split('/')
|
path = path.split('/')
|
||||||
@ -388,7 +423,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
|||||||
builder.close()
|
builder.close()
|
||||||
except BuildError as e:
|
except BuildError as e:
|
||||||
self.send_response(e.code)
|
self.send_response(e.code)
|
||||||
msg = unicode(e).encode('UTF-8')
|
msg = compat_str(e).encode('UTF-8')
|
||||||
self.send_header('Content-Type', 'text/plain; charset=UTF-8')
|
self.send_header('Content-Type', 'text/plain; charset=UTF-8')
|
||||||
self.send_header('Content-Length', len(msg))
|
self.send_header('Content-Length', len(msg))
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
@ -400,7 +435,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
|||||||
else:
|
else:
|
||||||
self.send_response(500, 'Malformed URL')
|
self.send_response(500, 'Malformed URL')
|
||||||
|
|
||||||
#==============================================================================
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
8
devscripts/install_srelay.sh
Executable file
8
devscripts/install_srelay.sh
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
mkdir -p tmp && cd tmp
|
||||||
|
wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
|
||||||
|
tar zxvf srelay-0.4.8b6.tar.gz
|
||||||
|
cd srelay-0.4.8b6
|
||||||
|
./configure
|
||||||
|
make
|
@ -1,13 +1,46 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
import optparse
|
||||||
import os.path
|
import os.path
|
||||||
import sys
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||||
|
|
||||||
|
PREFIX = '''%YOUTUBE-DL(1)
|
||||||
|
|
||||||
|
# NAME
|
||||||
|
|
||||||
|
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||||
|
|
||||||
|
# SYNOPSIS
|
||||||
|
|
||||||
|
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||||
|
options, args = parser.parse_args()
|
||||||
|
if len(args) != 1:
|
||||||
|
parser.error('Expected an output filename')
|
||||||
|
|
||||||
|
outfile, = args
|
||||||
|
|
||||||
|
with io.open(README_FILE, encoding='utf-8') as f:
|
||||||
|
readme = f.read()
|
||||||
|
|
||||||
|
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||||
|
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||||
|
readme = PREFIX + readme
|
||||||
|
|
||||||
|
readme = filter_options(readme)
|
||||||
|
|
||||||
|
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||||
|
outf.write(readme)
|
||||||
|
|
||||||
|
|
||||||
def filter_options(readme):
|
def filter_options(readme):
|
||||||
ret = ''
|
ret = ''
|
||||||
@ -37,27 +70,5 @@ def filter_options(readme):
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
with io.open(README_FILE, encoding='utf-8') as f:
|
if __name__ == '__main__':
|
||||||
readme = f.read()
|
main()
|
||||||
|
|
||||||
PREFIX = '''%YOUTUBE-DL(1)
|
|
||||||
|
|
||||||
# NAME
|
|
||||||
|
|
||||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
|
||||||
|
|
||||||
# SYNOPSIS
|
|
||||||
|
|
||||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
|
||||||
|
|
||||||
'''
|
|
||||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
|
||||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
|
||||||
readme = PREFIX + readme
|
|
||||||
|
|
||||||
readme = filter_options(readme)
|
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
|
||||||
print(readme.encode('utf-8'))
|
|
||||||
else:
|
|
||||||
print(readme)
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
# * the git config user.signingkey is properly set
|
# * the git config user.signingkey is properly set
|
||||||
|
|
||||||
# You will need
|
# You will need
|
||||||
# pip install coverage nose rsa
|
# pip install coverage nose rsa wheel
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# release notes
|
# release notes
|
||||||
@ -15,10 +15,28 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
skip_tests=true
|
skip_tests=true
|
||||||
if [ "$1" = '--run-tests' ]; then
|
buildserver='localhost:8142'
|
||||||
|
|
||||||
|
while true
|
||||||
|
do
|
||||||
|
case "$1" in
|
||||||
|
--run-tests)
|
||||||
skip_tests=false
|
skip_tests=false
|
||||||
shift
|
shift
|
||||||
fi
|
;;
|
||||||
|
--buildserver)
|
||||||
|
buildserver="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--*)
|
||||||
|
echo "ERROR: unknown option $1"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
||||||
version="$1"
|
version="$1"
|
||||||
@ -33,6 +51,9 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th
|
|||||||
useless_files=$(find youtube_dl -type f -not -name '*.py')
|
useless_files=$(find youtube_dl -type f -not -name '*.py')
|
||||||
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
|
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
|
||||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||||
|
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
|
||||||
|
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||||
|
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||||
|
|
||||||
/bin/echo -e "\n### First of all, testing..."
|
/bin/echo -e "\n### First of all, testing..."
|
||||||
make clean
|
make clean
|
||||||
@ -64,7 +85,7 @@ git push origin "$version"
|
|||||||
REV=$(git rev-parse HEAD)
|
REV=$(git rev-parse HEAD)
|
||||||
make youtube-dl youtube-dl.tar.gz
|
make youtube-dl youtube-dl.tar.gz
|
||||||
read -p "VM running? (y/n) " -n 1
|
read -p "VM running? (y/n) " -n 1
|
||||||
wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||||
mkdir -p "build/$version"
|
mkdir -p "build/$version"
|
||||||
mv youtube-dl youtube-dl.exe "build/$version"
|
mv youtube-dl youtube-dl.exe "build/$version"
|
||||||
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
|
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
- **22tracks:genre**
|
- **22tracks:genre**
|
||||||
- **22tracks:track**
|
- **22tracks:track**
|
||||||
- **24video**
|
- **24video**
|
||||||
|
- **3qsdn**: 3Q SDN
|
||||||
- **3sat**
|
- **3sat**
|
||||||
- **4tube**
|
- **4tube**
|
||||||
- **56.com**
|
- **56.com**
|
||||||
@ -15,6 +16,8 @@
|
|||||||
- **9gag**
|
- **9gag**
|
||||||
- **abc.net.au**
|
- **abc.net.au**
|
||||||
- **Abc7News**
|
- **Abc7News**
|
||||||
|
- **abcnews**
|
||||||
|
- **abcnews:video**
|
||||||
- **AcademicEarth:Course**
|
- **AcademicEarth:Course**
|
||||||
- **acast**
|
- **acast**
|
||||||
- **acast:channel**
|
- **acast:channel**
|
||||||
@ -40,8 +43,8 @@
|
|||||||
- **appletrailers:section**
|
- **appletrailers:section**
|
||||||
- **archive.org**: archive.org videos
|
- **archive.org**: archive.org videos
|
||||||
- **ARD**
|
- **ARD**
|
||||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
|
||||||
- **ARD:mediathek**
|
- **ARD:mediathek**
|
||||||
|
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||||
- **arte.tv**
|
- **arte.tv**
|
||||||
- **arte.tv:+7**
|
- **arte.tv:+7**
|
||||||
- **arte.tv:cinema**
|
- **arte.tv:cinema**
|
||||||
@ -50,7 +53,9 @@
|
|||||||
- **arte.tv:ddc**
|
- **arte.tv:ddc**
|
||||||
- **arte.tv:embed**
|
- **arte.tv:embed**
|
||||||
- **arte.tv:future**
|
- **arte.tv:future**
|
||||||
|
- **arte.tv:info**
|
||||||
- **arte.tv:magazine**
|
- **arte.tv:magazine**
|
||||||
|
- **arte.tv:playlist**
|
||||||
- **AtresPlayer**
|
- **AtresPlayer**
|
||||||
- **ATTTechChannel**
|
- **ATTTechChannel**
|
||||||
- **AudiMedia**
|
- **AudiMedia**
|
||||||
@ -76,6 +81,7 @@
|
|||||||
- **Bild**: Bild.de
|
- **Bild**: Bild.de
|
||||||
- **BiliBili**
|
- **BiliBili**
|
||||||
- **BioBioChileTV**
|
- **BioBioChileTV**
|
||||||
|
- **BIQLE**
|
||||||
- **BleacherReport**
|
- **BleacherReport**
|
||||||
- **BleacherReportCMS**
|
- **BleacherReportCMS**
|
||||||
- **blinkx**
|
- **blinkx**
|
||||||
@ -101,6 +107,7 @@
|
|||||||
- **CBCPlayer**
|
- **CBCPlayer**
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSInteractive**
|
- **CBSInteractive**
|
||||||
|
- **CBSLocal**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||||
- **CBSSports**
|
- **CBSSports**
|
||||||
@ -112,7 +119,6 @@
|
|||||||
- **chirbit**
|
- **chirbit**
|
||||||
- **chirbit:profile**
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
- **Cinemassacre**
|
|
||||||
- **Clipfish**
|
- **Clipfish**
|
||||||
- **cliphunter**
|
- **cliphunter**
|
||||||
- **ClipRs**
|
- **ClipRs**
|
||||||
@ -126,12 +132,12 @@
|
|||||||
- **CNN**
|
- **CNN**
|
||||||
- **CNNArticle**
|
- **CNNArticle**
|
||||||
- **CNNBlogs**
|
- **CNNBlogs**
|
||||||
- **CollegeHumor**
|
|
||||||
- **CollegeRama**
|
- **CollegeRama**
|
||||||
- **ComCarCoff**
|
- **ComCarCoff**
|
||||||
- **ComedyCentral**
|
- **ComedyCentral**
|
||||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
|
- **Coub**
|
||||||
- **Cracked**
|
- **Cracked**
|
||||||
- **Crackle**
|
- **Crackle**
|
||||||
- **Criterion**
|
- **Criterion**
|
||||||
@ -144,6 +150,7 @@
|
|||||||
- **culturebox.francetvinfo.fr**
|
- **culturebox.francetvinfo.fr**
|
||||||
- **CultureUnplugged**
|
- **CultureUnplugged**
|
||||||
- **CWTV**
|
- **CWTV**
|
||||||
|
- **DailyMail**
|
||||||
- **dailymotion**
|
- **dailymotion**
|
||||||
- **dailymotion:playlist**
|
- **dailymotion:playlist**
|
||||||
- **dailymotion:user**
|
- **dailymotion:user**
|
||||||
@ -162,6 +169,7 @@
|
|||||||
- **defense.gouv.fr**
|
- **defense.gouv.fr**
|
||||||
- **democracynow**
|
- **democracynow**
|
||||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||||
|
- **DigitallySpeaking**
|
||||||
- **Digiteka**
|
- **Digiteka**
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
@ -173,7 +181,6 @@
|
|||||||
- **Dropbox**
|
- **Dropbox**
|
||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
- **DRTV**
|
- **DRTV**
|
||||||
- **Dump**
|
|
||||||
- **Dumpert**
|
- **Dumpert**
|
||||||
- **dvtv**: http://video.aktualne.cz/
|
- **dvtv**: http://video.aktualne.cz/
|
||||||
- **dw**
|
- **dw**
|
||||||
@ -200,6 +207,7 @@
|
|||||||
- **exfm**: ex.fm
|
- **exfm**: ex.fm
|
||||||
- **ExpoTV**
|
- **ExpoTV**
|
||||||
- **ExtremeTube**
|
- **ExtremeTube**
|
||||||
|
- **EyedoTV**
|
||||||
- **facebook**
|
- **facebook**
|
||||||
- **faz.net**
|
- **faz.net**
|
||||||
- **fc2**
|
- **fc2**
|
||||||
@ -211,6 +219,7 @@
|
|||||||
- **Flickr**
|
- **Flickr**
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **FootyRoom**
|
- **FootyRoom**
|
||||||
|
- **Formula1**
|
||||||
- **FOX**
|
- **FOX**
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
- **FoxNews**: Fox News and Fox Business Video
|
- **FoxNews**: Fox News and Fox Business Video
|
||||||
@ -314,20 +323,23 @@
|
|||||||
- **la7.tv**
|
- **la7.tv**
|
||||||
- **Laola1Tv**
|
- **Laola1Tv**
|
||||||
- **Le**: 乐视网
|
- **Le**: 乐视网
|
||||||
|
- **Learnr**
|
||||||
- **Lecture2Go**
|
- **Lecture2Go**
|
||||||
- **Lemonde**
|
- **Lemonde**
|
||||||
- **LePlaylist**
|
- **LePlaylist**
|
||||||
- **LetvCloud**: 乐视云
|
- **LetvCloud**: 乐视云
|
||||||
- **Libsyn**
|
- **Libsyn**
|
||||||
|
- **life**: Life.ru
|
||||||
- **life:embed**
|
- **life:embed**
|
||||||
- **lifenews**: LIFE | NEWS
|
|
||||||
- **limelight**
|
- **limelight**
|
||||||
- **limelight:channel**
|
- **limelight:channel**
|
||||||
- **limelight:channel_list**
|
- **limelight:channel_list**
|
||||||
|
- **LiTV**
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
- **livestream:original**
|
- **livestream:original**
|
||||||
- **LnkGo**
|
- **LnkGo**
|
||||||
|
- **LocalNews8**
|
||||||
- **LoveHomePorn**
|
- **LoveHomePorn**
|
||||||
- **lrt.lt**
|
- **lrt.lt**
|
||||||
- **lynda**: lynda.com videos
|
- **lynda**: lynda.com videos
|
||||||
@ -337,26 +349,28 @@
|
|||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **MakersChannel**
|
- **MakersChannel**
|
||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
- **Malemotion**
|
|
||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
- **Metacritic**
|
- **Metacritic**
|
||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
|
- **MGTV**: 芒果TV
|
||||||
- **Minhateca**
|
- **Minhateca**
|
||||||
- **MinistryGrid**
|
- **MinistryGrid**
|
||||||
- **Minoto**
|
- **Minoto**
|
||||||
- **miomio.tv**
|
- **miomio.tv**
|
||||||
- **MiTele**: mitele.es
|
- **MiTele**: mitele.es
|
||||||
- **mixcloud**
|
- **mixcloud**
|
||||||
|
- **mixcloud:playlist**
|
||||||
|
- **mixcloud:stream**
|
||||||
|
- **mixcloud:user**
|
||||||
- **MLB**
|
- **MLB**
|
||||||
- **Mnet**
|
- **Mnet**
|
||||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||||
- **Mofosex**
|
- **Mofosex**
|
||||||
- **Mojvideo**
|
- **Mojvideo**
|
||||||
- **Moniker**: allmyvideos.net and vidspot.net
|
- **Moniker**: allmyvideos.net and vidspot.net
|
||||||
- **mooshare**: Mooshare.biz
|
|
||||||
- **Morningstar**: morningstar.com
|
- **Morningstar**: morningstar.com
|
||||||
- **Motherless**
|
- **Motherless**
|
||||||
- **Motorsport**: motorsport.com
|
- **Motorsport**: motorsport.com
|
||||||
@ -371,8 +385,10 @@
|
|||||||
- **mtvservices:embedded**
|
- **mtvservices:embedded**
|
||||||
- **MuenchenTV**: münchen.tv
|
- **MuenchenTV**: münchen.tv
|
||||||
- **MusicPlayOn**
|
- **MusicPlayOn**
|
||||||
- **muzu.tv**
|
- **mva**: Microsoft Virtual Academy videos
|
||||||
|
- **mva:course**: Microsoft Virtual Academy courses
|
||||||
- **Mwave**
|
- **Mwave**
|
||||||
|
- **MwaveMeetGreet**
|
||||||
- **MySpace**
|
- **MySpace**
|
||||||
- **MySpace:album**
|
- **MySpace:album**
|
||||||
- **MySpass**
|
- **MySpass**
|
||||||
@ -393,7 +409,6 @@
|
|||||||
- **ndr:embed:base**
|
- **ndr:embed:base**
|
||||||
- **NDTV**
|
- **NDTV**
|
||||||
- **NerdCubedFeed**
|
- **NerdCubedFeed**
|
||||||
- **Nerdist**
|
|
||||||
- **netease:album**: 网易云音乐 - 专辑
|
- **netease:album**: 网易云音乐 - 专辑
|
||||||
- **netease:djradio**: 网易云音乐 - 电台
|
- **netease:djradio**: 网易云音乐 - 电台
|
||||||
- **netease:mv**: 网易云音乐 - MV
|
- **netease:mv**: 网易云音乐 - MV
|
||||||
@ -411,7 +426,8 @@
|
|||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
- **nhl.com**
|
- **nhl.com**
|
||||||
- **nhl.com:news**: NHL news
|
- **nhl.com:news**: NHL news
|
||||||
- **nhl.com:videocenter**: NHL videocenter category
|
- **nhl.com:videocenter**
|
||||||
|
- **nhl.com:videocenter:category**: NHL videocenter category
|
||||||
- **nick.com**
|
- **nick.com**
|
||||||
- **niconico**: ニコニコ動画
|
- **niconico**: ニコニコ動画
|
||||||
- **NiconicoPlaylist**
|
- **NiconicoPlaylist**
|
||||||
@ -459,13 +475,14 @@
|
|||||||
- **Patreon**
|
- **Patreon**
|
||||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||||
- **pcmag**
|
- **pcmag**
|
||||||
- **Periscope**: Periscope
|
- **People**
|
||||||
|
- **periscope**: Periscope
|
||||||
|
- **periscope:user**: Periscope user videos
|
||||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||||
- **phoenix.de**
|
- **phoenix.de**
|
||||||
- **Photobucket**
|
- **Photobucket**
|
||||||
- **Pinkbike**
|
- **Pinkbike**
|
||||||
- **Pladform**
|
- **Pladform**
|
||||||
- **PlanetaPlay**
|
|
||||||
- **play.fm**
|
- **play.fm**
|
||||||
- **played.to**
|
- **played.to**
|
||||||
- **PlaysTV**
|
- **PlaysTV**
|
||||||
@ -495,10 +512,11 @@
|
|||||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||||
- **QuickVid**
|
|
||||||
- **R7**
|
- **R7**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
- **radiobremen**
|
- **radiobremen**
|
||||||
|
- **radiocanada**
|
||||||
|
- **RadioCanadaAudioVideo**
|
||||||
- **radiofrance**
|
- **radiofrance**
|
||||||
- **RadioJavan**
|
- **RadioJavan**
|
||||||
- **Rai**
|
- **Rai**
|
||||||
@ -508,6 +526,7 @@
|
|||||||
- **RedTube**
|
- **RedTube**
|
||||||
- **RegioTV**
|
- **RegioTV**
|
||||||
- **Restudy**
|
- **Restudy**
|
||||||
|
- **Reuters**
|
||||||
- **ReverbNation**
|
- **ReverbNation**
|
||||||
- **Revision3**
|
- **Revision3**
|
||||||
- **RICE**
|
- **RICE**
|
||||||
@ -549,9 +568,9 @@
|
|||||||
- **ScreenJunkies**
|
- **ScreenJunkies**
|
||||||
- **ScreenwaveMedia**
|
- **ScreenwaveMedia**
|
||||||
- **SenateISVP**
|
- **SenateISVP**
|
||||||
|
- **SendtoNews**
|
||||||
- **ServingSys**
|
- **ServingSys**
|
||||||
- **Sexu**
|
- **Sexu**
|
||||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
|
||||||
- **Shahid**
|
- **Shahid**
|
||||||
- **Shared**: shared.sx and vivo.sx
|
- **Shared**: shared.sx and vivo.sx
|
||||||
- **ShareSix**
|
- **ShareSix**
|
||||||
@ -564,8 +583,6 @@
|
|||||||
- **smotri:broadcast**: Smotri.com broadcasts
|
- **smotri:broadcast**: Smotri.com broadcasts
|
||||||
- **smotri:community**: Smotri.com community videos
|
- **smotri:community**: Smotri.com community videos
|
||||||
- **smotri:user**: Smotri.com user videos
|
- **smotri:user**: Smotri.com user videos
|
||||||
- **SnagFilms**
|
|
||||||
- **SnagFilmsEmbed**
|
|
||||||
- **Snotr**
|
- **Snotr**
|
||||||
- **Sohu**
|
- **Sohu**
|
||||||
- **soundcloud**
|
- **soundcloud**
|
||||||
@ -607,8 +624,10 @@
|
|||||||
- **Syfy**
|
- **Syfy**
|
||||||
- **SztvHu**
|
- **SztvHu**
|
||||||
- **Tagesschau**
|
- **Tagesschau**
|
||||||
|
- **tagesschau:player**
|
||||||
- **Tapely**
|
- **Tapely**
|
||||||
- **Tass**
|
- **Tass**
|
||||||
|
- **TDSLifeway**
|
||||||
- **teachertube**: teachertube.com videos
|
- **teachertube**: teachertube.com videos
|
||||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||||
- **TeachingChannel**
|
- **TeachingChannel**
|
||||||
@ -625,7 +644,6 @@
|
|||||||
- **TeleTask**
|
- **TeleTask**
|
||||||
- **TF1**
|
- **TF1**
|
||||||
- **TheIntercept**
|
- **TheIntercept**
|
||||||
- **TheOnion**
|
|
||||||
- **ThePlatform**
|
- **ThePlatform**
|
||||||
- **ThePlatformFeed**
|
- **ThePlatformFeed**
|
||||||
- **TheScene**
|
- **TheScene**
|
||||||
@ -670,11 +688,10 @@
|
|||||||
- **TVCArticle**
|
- **TVCArticle**
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvland.com**
|
- **tvland.com**
|
||||||
- **tvp.pl**
|
- **tvp**: Telewizja Polska
|
||||||
- **tvp.pl:Series**
|
- **tvp:series**
|
||||||
- **TVPlay**: TV3Play and related services
|
- **TVPlay**: TV3Play and related services
|
||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
- **twitch:bookmarks**
|
|
||||||
- **twitch:chapter**
|
- **twitch:chapter**
|
||||||
- **twitch:past_broadcasts**
|
- **twitch:past_broadcasts**
|
||||||
- **twitch:profile**
|
- **twitch:profile**
|
||||||
@ -684,7 +701,6 @@
|
|||||||
- **twitter**
|
- **twitter**
|
||||||
- **twitter:amplify**
|
- **twitter:amplify**
|
||||||
- **twitter:card**
|
- **twitter:card**
|
||||||
- **Ubu**
|
|
||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
- **UDNEmbed**: 聯合影音
|
- **UDNEmbed**: 聯合影音
|
||||||
@ -693,7 +709,8 @@
|
|||||||
- **USAToday**
|
- **USAToday**
|
||||||
- **ustream**
|
- **ustream**
|
||||||
- **ustream:channel**
|
- **ustream:channel**
|
||||||
- **Ustudio**
|
- **ustudio**
|
||||||
|
- **ustudio:embed**
|
||||||
- **Varzesh3**
|
- **Varzesh3**
|
||||||
- **Vbox7**
|
- **Vbox7**
|
||||||
- **VeeHD**
|
- **VeeHD**
|
||||||
@ -701,6 +718,7 @@
|
|||||||
- **Vessel**
|
- **Vessel**
|
||||||
- **Vesti**: Вести.Ru
|
- **Vesti**: Вести.Ru
|
||||||
- **Vevo**
|
- **Vevo**
|
||||||
|
- **VevoPlaylist**
|
||||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||||
- **vh1.com**
|
- **vh1.com**
|
||||||
- **Vice**
|
- **Vice**
|
||||||
@ -723,6 +741,8 @@
|
|||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
- **vier**
|
- **vier**
|
||||||
- **vier:videos**
|
- **vier:videos**
|
||||||
|
- **ViewLift**
|
||||||
|
- **ViewLiftEmbed**
|
||||||
- **Viewster**
|
- **Viewster**
|
||||||
- **Viidea**
|
- **Viidea**
|
||||||
- **viki**
|
- **viki**
|
||||||
@ -752,9 +772,10 @@
|
|||||||
- **VuClip**
|
- **VuClip**
|
||||||
- **vulture.com**
|
- **vulture.com**
|
||||||
- **Walla**
|
- **Walla**
|
||||||
- **WashingtonPost**
|
- **washingtonpost**
|
||||||
|
- **washingtonpost:article**
|
||||||
- **wat.tv**
|
- **wat.tv**
|
||||||
- **WayOfTheMaster**
|
- **WatchIndianPorn**: Watch Indian Porn
|
||||||
- **WDR**
|
- **WDR**
|
||||||
- **wdr:mobile**
|
- **wdr:mobile**
|
||||||
- **WDRMaus**: Sendung mit der Maus
|
- **WDRMaus**: Sendung mit der Maus
|
||||||
@ -771,9 +792,13 @@
|
|||||||
- **WSJ**: Wall Street Journal
|
- **WSJ**: Wall Street Journal
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me
|
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
- **XHamsterEmbed**
|
- **XHamsterEmbed**
|
||||||
|
- **xiami:album**: 虾米音乐 - 专辑
|
||||||
|
- **xiami:artist**: 虾米音乐 - 歌手
|
||||||
|
- **xiami:collection**: 虾米音乐 - 精选集
|
||||||
|
- **xiami:song**: 虾米音乐
|
||||||
- **XMinus**
|
- **XMinus**
|
||||||
- **XNXX**
|
- **XNXX**
|
||||||
- **Xstream**
|
- **Xstream**
|
||||||
|
@ -24,8 +24,13 @@ from youtube_dl.utils import (
|
|||||||
def get_params(override=None):
|
def get_params(override=None):
|
||||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||||
"parameters.json")
|
"parameters.json")
|
||||||
|
LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||||
|
"local_parameters.json")
|
||||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||||
parameters = json.load(pf)
|
parameters = json.load(pf)
|
||||||
|
if os.path.exists(LOCAL_PARAMETERS_FILE):
|
||||||
|
with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||||
|
parameters.update(json.load(pf))
|
||||||
if override:
|
if override:
|
||||||
parameters.update(override)
|
parameters.update(override)
|
||||||
return parameters
|
return parameters
|
||||||
|
@ -10,13 +10,14 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.utils import get_filesystem_encoding
|
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
|
compat_setenv,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_struct_unpack,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
@ -26,19 +27,22 @@ from youtube_dl.compat import (
|
|||||||
class TestCompat(unittest.TestCase):
|
class TestCompat(unittest.TestCase):
|
||||||
def test_compat_getenv(self):
|
def test_compat_getenv(self):
|
||||||
test_str = 'тест'
|
test_str = 'тест'
|
||||||
os.environ['YOUTUBE-DL-TEST'] = (
|
compat_setenv('YOUTUBE-DL-TEST', test_str)
|
||||||
test_str if sys.version_info >= (3, 0)
|
|
||||||
else test_str.encode(get_filesystem_encoding()))
|
|
||||||
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
|
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
|
||||||
|
|
||||||
|
def test_compat_setenv(self):
|
||||||
|
test_var = 'YOUTUBE-DL-TEST'
|
||||||
|
test_str = 'тест'
|
||||||
|
compat_setenv(test_var, test_str)
|
||||||
|
compat_getenv(test_var)
|
||||||
|
self.assertEqual(compat_getenv(test_var), test_str)
|
||||||
|
|
||||||
def test_compat_expanduser(self):
|
def test_compat_expanduser(self):
|
||||||
old_home = os.environ.get('HOME')
|
old_home = os.environ.get('HOME')
|
||||||
test_str = 'C:\Documents and Settings\тест\Application Data'
|
test_str = 'C:\Documents and Settings\тест\Application Data'
|
||||||
os.environ['HOME'] = (
|
compat_setenv('HOME', test_str)
|
||||||
test_str if sys.version_info >= (3, 0)
|
|
||||||
else test_str.encode(get_filesystem_encoding()))
|
|
||||||
self.assertEqual(compat_expanduser('~'), test_str)
|
self.assertEqual(compat_expanduser('~'), test_str)
|
||||||
os.environ['HOME'] = old_home
|
compat_setenv('HOME', old_home or '')
|
||||||
|
|
||||||
def test_all_present(self):
|
def test_all_present(self):
|
||||||
import youtube_dl.compat
|
import youtube_dl.compat
|
||||||
@ -99,5 +103,15 @@ class TestCompat(unittest.TestCase):
|
|||||||
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
|
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
|
||||||
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
|
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
|
||||||
|
|
||||||
|
def test_compat_etree_fromstring_doctype(self):
|
||||||
|
xml = '''<?xml version="1.0"?>
|
||||||
|
<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
|
||||||
|
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
|
||||||
|
compat_etree_fromstring(xml)
|
||||||
|
|
||||||
|
def test_struct_unpack(self):
|
||||||
|
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
118
test/test_socks.py
Normal file
118
test/test_socks.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import random
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from test.helper import (
|
||||||
|
FakeYDL,
|
||||||
|
get_params,
|
||||||
|
)
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestMultipleSocks(unittest.TestCase):
|
||||||
|
@staticmethod
|
||||||
|
def _check_params(attrs):
|
||||||
|
params = get_params()
|
||||||
|
for attr in attrs:
|
||||||
|
if attr not in params:
|
||||||
|
print('Missing %s. Skipping.' % attr)
|
||||||
|
return
|
||||||
|
return params
|
||||||
|
|
||||||
|
def test_proxy_http(self):
|
||||||
|
params = self._check_params(['primary_proxy', 'primary_server_ip'])
|
||||||
|
if params is None:
|
||||||
|
return
|
||||||
|
ydl = FakeYDL({
|
||||||
|
'proxy': params['primary_proxy']
|
||||||
|
})
|
||||||
|
self.assertEqual(
|
||||||
|
ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'),
|
||||||
|
params['primary_server_ip'])
|
||||||
|
|
||||||
|
def test_proxy_https(self):
|
||||||
|
params = self._check_params(['primary_proxy', 'primary_server_ip'])
|
||||||
|
if params is None:
|
||||||
|
return
|
||||||
|
ydl = FakeYDL({
|
||||||
|
'proxy': params['primary_proxy']
|
||||||
|
})
|
||||||
|
self.assertEqual(
|
||||||
|
ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'),
|
||||||
|
params['primary_server_ip'])
|
||||||
|
|
||||||
|
def test_secondary_proxy_http(self):
|
||||||
|
params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
|
||||||
|
if params is None:
|
||||||
|
return
|
||||||
|
ydl = FakeYDL()
|
||||||
|
req = compat_urllib_request.Request('http://yt-dl.org/ip')
|
||||||
|
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
|
||||||
|
self.assertEqual(
|
||||||
|
ydl.urlopen(req).read().decode('utf-8'),
|
||||||
|
params['secondary_server_ip'])
|
||||||
|
|
||||||
|
def test_secondary_proxy_https(self):
|
||||||
|
params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
|
||||||
|
if params is None:
|
||||||
|
return
|
||||||
|
ydl = FakeYDL()
|
||||||
|
req = compat_urllib_request.Request('https://yt-dl.org/ip')
|
||||||
|
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
|
||||||
|
self.assertEqual(
|
||||||
|
ydl.urlopen(req).read().decode('utf-8'),
|
||||||
|
params['secondary_server_ip'])
|
||||||
|
|
||||||
|
|
||||||
|
class TestSocks(unittest.TestCase):
|
||||||
|
_SKIP_SOCKS_TEST = True
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
if self._SKIP_SOCKS_TEST:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.port = random.randint(20000, 30000)
|
||||||
|
self.server_process = subprocess.Popen([
|
||||||
|
'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
|
||||||
|
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if self._SKIP_SOCKS_TEST:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.server_process.terminate()
|
||||||
|
self.server_process.communicate()
|
||||||
|
|
||||||
|
def _get_ip(self, protocol):
|
||||||
|
if self._SKIP_SOCKS_TEST:
|
||||||
|
return '127.0.0.1'
|
||||||
|
|
||||||
|
ydl = FakeYDL({
|
||||||
|
'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
|
||||||
|
})
|
||||||
|
return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8')
|
||||||
|
|
||||||
|
def test_socks4(self):
|
||||||
|
self.assertTrue(isinstance(self._get_ip('socks4'), compat_str))
|
||||||
|
|
||||||
|
def test_socks4a(self):
|
||||||
|
self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str))
|
||||||
|
|
||||||
|
def test_socks5(self):
|
||||||
|
self.assertTrue(isinstance(self._get_ip('socks5'), compat_str))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -50,12 +50,13 @@ from youtube_dl.utils import (
|
|||||||
sanitize_path,
|
sanitize_path,
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
replace_extension,
|
replace_extension,
|
||||||
|
remove_start,
|
||||||
|
remove_end,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
struct_unpack,
|
|
||||||
timeconvert,
|
timeconvert,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -139,8 +140,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
|
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
|
||||||
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
|
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
|
||||||
|
|
||||||
tests = 'a\xe4b\u4e2d\u56fd\u7684c'
|
tests = 'aäb\u4e2d\u56fd\u7684c'
|
||||||
self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
|
self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
|
||||||
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
|
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
|
||||||
|
|
||||||
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
|
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
|
||||||
@ -155,6 +156,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertTrue(sanitize_filename('-', restricted=True) != '')
|
self.assertTrue(sanitize_filename('-', restricted=True) != '')
|
||||||
self.assertTrue(sanitize_filename(':', restricted=True) != '')
|
self.assertTrue(sanitize_filename(':', restricted=True) != '')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_filename(
|
||||||
|
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True),
|
||||||
|
'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy')
|
||||||
|
|
||||||
def test_sanitize_ids(self):
|
def test_sanitize_ids(self):
|
||||||
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
|
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
|
||||||
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
|
||||||
@ -212,6 +217,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||||
|
|
||||||
|
def test_remove_start(self):
|
||||||
|
self.assertEqual(remove_start(None, 'A - '), None)
|
||||||
|
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||||
|
self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
|
||||||
|
|
||||||
|
def test_remove_end(self):
|
||||||
|
self.assertEqual(remove_end(None, ' - B'), None)
|
||||||
|
self.assertEqual(remove_end('A - B', ' - B'), 'A')
|
||||||
|
self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
|
||||||
|
|
||||||
def test_remove_quotes(self):
|
def test_remove_quotes(self):
|
||||||
self.assertEqual(remove_quotes(None), None)
|
self.assertEqual(remove_quotes(None), None)
|
||||||
self.assertEqual(remove_quotes('"'), '"')
|
self.assertEqual(remove_quotes('"'), '"')
|
||||||
@ -413,6 +428,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||||
self.assertEqual(parse_duration('87 Min.'), 5220)
|
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||||
|
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
|
||||||
|
|
||||||
def test_fix_xml_ampersands(self):
|
def test_fix_xml_ampersands(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -452,9 +468,6 @@ class TestUtil(unittest.TestCase):
|
|||||||
testPL(5, 2, (2, 99), [2, 3, 4])
|
testPL(5, 2, (2, 99), [2, 3, 4])
|
||||||
testPL(5, 2, (20, 99), [])
|
testPL(5, 2, (20, 99), [])
|
||||||
|
|
||||||
def test_struct_unpack(self):
|
|
||||||
self.assertEqual(struct_unpack('!B', b'\x00'), (0,))
|
|
||||||
|
|
||||||
def test_read_batch_urls(self):
|
def test_read_batch_urls(self):
|
||||||
f = io.StringIO('''\xef\xbb\xbf foo
|
f = io.StringIO('''\xef\xbb\xbf foo
|
||||||
bar\r
|
bar\r
|
||||||
@ -616,6 +629,15 @@ class TestUtil(unittest.TestCase):
|
|||||||
json_code = js_to_json(inp)
|
json_code = js_to_json(inp)
|
||||||
self.assertEqual(json.loads(json_code), json.loads(inp))
|
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||||
|
|
||||||
|
inp = '''{
|
||||||
|
0:{src:'skipped', type: 'application/dash+xml'},
|
||||||
|
1:{src:'skipped', type: 'application/vnd.apple.mpegURL'},
|
||||||
|
}'''
|
||||||
|
self.assertEqual(js_to_json(inp), '''{
|
||||||
|
"0":{"src":"skipped", "type": "application/dash+xml"},
|
||||||
|
"1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
|
||||||
|
}''')
|
||||||
|
|
||||||
def test_js_to_json_edgecases(self):
|
def test_js_to_json_edgecases(self):
|
||||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||||
@ -639,6 +661,27 @@ class TestUtil(unittest.TestCase):
|
|||||||
on = js_to_json('{"abc": "def",}')
|
on = js_to_json('{"abc": "def",}')
|
||||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||||
|
|
||||||
|
on = js_to_json('{ 0: /* " \n */ ",]" , }')
|
||||||
|
self.assertEqual(json.loads(on), {'0': ',]'})
|
||||||
|
|
||||||
|
on = js_to_json(r'["<p>x<\/p>"]')
|
||||||
|
self.assertEqual(json.loads(on), ['<p>x</p>'])
|
||||||
|
|
||||||
|
on = js_to_json(r'["\xaa"]')
|
||||||
|
self.assertEqual(json.loads(on), ['\u00aa'])
|
||||||
|
|
||||||
|
on = js_to_json("['a\\\nb']")
|
||||||
|
self.assertEqual(json.loads(on), ['ab'])
|
||||||
|
|
||||||
|
on = js_to_json('{0xff:0xff}')
|
||||||
|
self.assertEqual(json.loads(on), {'255': 255})
|
||||||
|
|
||||||
|
on = js_to_json('{077:077}')
|
||||||
|
self.assertEqual(json.loads(on), {'63': 63})
|
||||||
|
|
||||||
|
on = js_to_json('{42:42}')
|
||||||
|
self.assertEqual(json.loads(on), {'42': 42})
|
||||||
|
|
||||||
def test_extract_attributes(self):
|
def test_extract_attributes(self):
|
||||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||||
|
@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
|
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertTrue(len(entries) >= 20)
|
self.assertTrue(len(entries) >= 50)
|
||||||
original_video = entries[0]
|
original_video = entries[0]
|
||||||
self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
|
self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
|
||||||
|
|
||||||
|
1
tox.ini
1
tox.ini
@ -9,5 +9,6 @@ passenv = HOME
|
|||||||
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
|
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
|
||||||
--exclude test_subtitles.py --exclude test_write_annotations.py
|
--exclude test_subtitles.py --exclude test_write_annotations.py
|
||||||
--exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
|
--exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
|
||||||
|
--exclude test_socks.py
|
||||||
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
|
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||||
# test.test_download:TestDownload.test_NowVideo
|
# test.test_download:TestDownload.test_NowVideo
|
||||||
|
@ -64,6 +64,7 @@ from .utils import (
|
|||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
|
register_socks_protocols,
|
||||||
render_table,
|
render_table,
|
||||||
replace_extension,
|
replace_extension,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
@ -260,7 +261,9 @@ class YoutubeDL(object):
|
|||||||
The following options determine which downloader is picked:
|
The following options determine which downloader is picked:
|
||||||
external_downloader: Executable of the external downloader to call.
|
external_downloader: Executable of the external downloader to call.
|
||||||
None or unset for standard (built-in) downloader.
|
None or unset for standard (built-in) downloader.
|
||||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
|
||||||
|
if True, otherwise use ffmpeg/avconv if False, otherwise
|
||||||
|
use downloader suggested by extractor if None.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the downloader (see youtube_dl/downloader/common.py):
|
the downloader (see youtube_dl/downloader/common.py):
|
||||||
@ -323,7 +326,7 @@ class YoutubeDL(object):
|
|||||||
['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
|
['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
|
||||||
self._output_channel = os.fdopen(master, 'rb')
|
self._output_channel = os.fdopen(master, 'rb')
|
||||||
except OSError as ose:
|
except OSError as ose:
|
||||||
if ose.errno == 2:
|
if ose.errno == errno.ENOENT:
|
||||||
self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
|
self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
@ -359,6 +362,8 @@ class YoutubeDL(object):
|
|||||||
for ph in self.params.get('progress_hooks', []):
|
for ph in self.params.get('progress_hooks', []):
|
||||||
self.add_progress_hook(ph)
|
self.add_progress_hook(ph)
|
||||||
|
|
||||||
|
register_socks_protocols()
|
||||||
|
|
||||||
def warn_if_short_id(self, argv):
|
def warn_if_short_id(self, argv):
|
||||||
# short YouTube ID starting with dash?
|
# short YouTube ID starting with dash?
|
||||||
idxs = [
|
idxs = [
|
||||||
@ -578,7 +583,7 @@ class YoutubeDL(object):
|
|||||||
is_id=(k == 'id'))
|
is_id=(k == 'id'))
|
||||||
template_dict = dict((k, sanitize(k, v))
|
template_dict = dict((k, sanitize(k, v))
|
||||||
for k, v in template_dict.items()
|
for k, v in template_dict.items()
|
||||||
if v is not None)
|
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||||
|
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
@ -715,6 +720,7 @@ class YoutubeDL(object):
|
|||||||
result_type = ie_result.get('_type', 'video')
|
result_type = ie_result.get('_type', 'video')
|
||||||
|
|
||||||
if result_type in ('url', 'url_transparent'):
|
if result_type in ('url', 'url_transparent'):
|
||||||
|
ie_result['url'] = sanitize_url(ie_result['url'])
|
||||||
extract_flat = self.params.get('extract_flat', False)
|
extract_flat = self.params.get('extract_flat', False)
|
||||||
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
|
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
|
||||||
extract_flat is True):
|
extract_flat is True):
|
||||||
@ -1637,7 +1643,7 @@ class YoutubeDL(object):
|
|||||||
# Just a single file
|
# Just a single file
|
||||||
success = dl(filename, info_dict)
|
success = dl(filename, info_dict)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self.report_error('unable to download video data: %s' % str(err))
|
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
|
||||||
return
|
return
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
raise UnavailableVideoError(err)
|
raise UnavailableVideoError(err)
|
||||||
@ -2016,6 +2022,7 @@ class YoutubeDL(object):
|
|||||||
if opts_cookiefile is None:
|
if opts_cookiefile is None:
|
||||||
self.cookiejar = compat_cookiejar.CookieJar()
|
self.cookiejar = compat_cookiejar.CookieJar()
|
||||||
else:
|
else:
|
||||||
|
opts_cookiefile = compat_expanduser(opts_cookiefile)
|
||||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||||
opts_cookiefile)
|
opts_cookiefile)
|
||||||
if os.access(opts_cookiefile, os.R_OK):
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
|
@ -67,9 +67,9 @@ def _real_main(argv=None):
|
|||||||
# Custom HTTP headers
|
# Custom HTTP headers
|
||||||
if opts.headers is not None:
|
if opts.headers is not None:
|
||||||
for h in opts.headers:
|
for h in opts.headers:
|
||||||
if h.find(':', 1) < 0:
|
if ':' not in h:
|
||||||
parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
|
parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
|
||||||
key, value = h.split(':', 2)
|
key, value = h.split(':', 1)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
|
write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
|
||||||
std_headers[key] = value
|
std_headers[key] = value
|
||||||
@ -86,7 +86,9 @@ def _real_main(argv=None):
|
|||||||
if opts.batchfile == '-':
|
if opts.batchfile == '-':
|
||||||
batchfd = sys.stdin
|
batchfd = sys.stdin
|
||||||
else:
|
else:
|
||||||
batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
|
batchfd = io.open(
|
||||||
|
compat_expanduser(opts.batchfile),
|
||||||
|
'r', encoding='utf-8', errors='ignore')
|
||||||
batch_urls = read_batch_urls(batchfd)
|
batch_urls = read_batch_urls(batchfd)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||||
@ -404,7 +406,7 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if opts.load_info_filename is not None:
|
if opts.load_info_filename is not None:
|
||||||
retcode = ydl.download_with_info_file(opts.load_info_filename)
|
retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
|
||||||
else:
|
else:
|
||||||
retcode = ydl.download(all_urls)
|
retcode = ydl.download(all_urls)
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
|
@ -11,6 +11,7 @@ import re
|
|||||||
import shlex
|
import shlex
|
||||||
import shutil
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import itertools
|
import itertools
|
||||||
@ -244,13 +245,20 @@ try:
|
|||||||
except ImportError: # Python 2.6
|
except ImportError: # Python 2.6
|
||||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||||
|
|
||||||
|
|
||||||
|
etree = xml.etree.ElementTree
|
||||||
|
|
||||||
|
|
||||||
|
class _TreeBuilder(etree.TreeBuilder):
|
||||||
|
def doctype(self, name, pubid, system):
|
||||||
|
pass
|
||||||
|
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
compat_etree_fromstring = xml.etree.ElementTree.fromstring
|
def compat_etree_fromstring(text):
|
||||||
|
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||||
else:
|
else:
|
||||||
# python 2.x tries to encode unicode strings with ascii (see the
|
# python 2.x tries to encode unicode strings with ascii (see the
|
||||||
# XMLParser._fixtext method)
|
# XMLParser._fixtext method)
|
||||||
etree = xml.etree.ElementTree
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_etree_iter = etree.Element.iter
|
_etree_iter = etree.Element.iter
|
||||||
except AttributeError: # Python <=2.6
|
except AttributeError: # Python <=2.6
|
||||||
@ -264,7 +272,7 @@ else:
|
|||||||
# 2.7 source
|
# 2.7 source
|
||||||
def _XML(text, parser=None):
|
def _XML(text, parser=None):
|
||||||
if not parser:
|
if not parser:
|
||||||
parser = etree.XMLParser(target=etree.TreeBuilder())
|
parser = etree.XMLParser(target=_TreeBuilder())
|
||||||
parser.feed(text)
|
parser.feed(text)
|
||||||
return parser.close()
|
return parser.close()
|
||||||
|
|
||||||
@ -276,7 +284,7 @@ else:
|
|||||||
return el
|
return el
|
||||||
|
|
||||||
def compat_etree_fromstring(text):
|
def compat_etree_fromstring(text):
|
||||||
doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
|
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
|
||||||
for el in _etree_iter(doc):
|
for el in _etree_iter(doc):
|
||||||
if el.text is not None and isinstance(el.text, bytes):
|
if el.text is not None and isinstance(el.text, bytes):
|
||||||
el.text = el.text.decode('utf-8')
|
el.text = el.text.decode('utf-8')
|
||||||
@ -340,9 +348,9 @@ except ImportError: # Python 2
|
|||||||
return parsed_result
|
return parsed_result
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from shlex import quote as shlex_quote
|
from shlex import quote as compat_shlex_quote
|
||||||
except ImportError: # Python < 3.3
|
except ImportError: # Python < 3.3
|
||||||
def shlex_quote(s):
|
def compat_shlex_quote(s):
|
||||||
if re.match(r'^[-_\w./]+$', s):
|
if re.match(r'^[-_\w./]+$', s):
|
||||||
return s
|
return s
|
||||||
else:
|
else:
|
||||||
@ -373,6 +381,9 @@ compat_os_name = os._name if os.name == 'java' else os.name
|
|||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
compat_getenv = os.getenv
|
compat_getenv = os.getenv
|
||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
|
|
||||||
|
def compat_setenv(key, value, env=os.environ):
|
||||||
|
env[key] = value
|
||||||
else:
|
else:
|
||||||
# Environment variables should be decoded with filesystem encoding.
|
# Environment variables should be decoded with filesystem encoding.
|
||||||
# Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
|
# Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
|
||||||
@ -384,6 +395,12 @@ else:
|
|||||||
env = env.decode(get_filesystem_encoding())
|
env = env.decode(get_filesystem_encoding())
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
def compat_setenv(key, value, env=os.environ):
|
||||||
|
def encode(v):
|
||||||
|
from .utils import get_filesystem_encoding
|
||||||
|
return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
|
||||||
|
env[encode(key)] = encode(value)
|
||||||
|
|
||||||
# HACK: The default implementations of os.path.expanduser from cpython do not decode
|
# HACK: The default implementations of os.path.expanduser from cpython do not decode
|
||||||
# environment variables with filesystem encoding. We will work around this by
|
# environment variables with filesystem encoding. We will work around this by
|
||||||
# providing adjusted implementations.
|
# providing adjusted implementations.
|
||||||
@ -456,18 +473,6 @@ else:
|
|||||||
print(s)
|
print(s)
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
subprocess_check_output = subprocess.check_output
|
|
||||||
except AttributeError:
|
|
||||||
def subprocess_check_output(*args, **kwargs):
|
|
||||||
assert 'input' not in kwargs
|
|
||||||
p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
|
|
||||||
output, _ = p.communicate()
|
|
||||||
ret = p.poll()
|
|
||||||
if ret:
|
|
||||||
raise subprocess.CalledProcessError(ret, p.args, output=output)
|
|
||||||
return output
|
|
||||||
|
|
||||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||||
def compat_getpass(prompt, *args, **kwargs):
|
def compat_getpass(prompt, *args, **kwargs):
|
||||||
if isinstance(prompt, compat_str):
|
if isinstance(prompt, compat_str):
|
||||||
@ -583,6 +588,26 @@ if sys.version_info >= (3, 0):
|
|||||||
else:
|
else:
|
||||||
from tokenize import generate_tokens as compat_tokenize_tokenize
|
from tokenize import generate_tokens as compat_tokenize_tokenize
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
struct.pack('!I', 0)
|
||||||
|
except TypeError:
|
||||||
|
# In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
|
||||||
|
# See https://bugs.python.org/issue19099
|
||||||
|
def compat_struct_pack(spec, *args):
|
||||||
|
if isinstance(spec, compat_str):
|
||||||
|
spec = spec.encode('ascii')
|
||||||
|
return struct.pack(spec, *args)
|
||||||
|
|
||||||
|
def compat_struct_unpack(spec, *args):
|
||||||
|
if isinstance(spec, compat_str):
|
||||||
|
spec = spec.encode('ascii')
|
||||||
|
return struct.unpack(spec, *args)
|
||||||
|
else:
|
||||||
|
compat_struct_pack = struct.pack
|
||||||
|
compat_struct_unpack = struct.unpack
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'compat_HTMLParser',
|
'compat_HTMLParser',
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
@ -604,9 +629,13 @@ __all__ = [
|
|||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
'compat_print',
|
'compat_print',
|
||||||
|
'compat_setenv',
|
||||||
|
'compat_shlex_quote',
|
||||||
'compat_shlex_split',
|
'compat_shlex_split',
|
||||||
'compat_socket_create_connection',
|
'compat_socket_create_connection',
|
||||||
'compat_str',
|
'compat_str',
|
||||||
|
'compat_struct_pack',
|
||||||
|
'compat_struct_unpack',
|
||||||
'compat_subprocess_get_DEVNULL',
|
'compat_subprocess_get_DEVNULL',
|
||||||
'compat_tokenize_tokenize',
|
'compat_tokenize_tokenize',
|
||||||
'compat_urllib_error',
|
'compat_urllib_error',
|
||||||
@ -623,7 +652,5 @@ __all__ = [
|
|||||||
'compat_urlretrieve',
|
'compat_urlretrieve',
|
||||||
'compat_xml_parse_error',
|
'compat_xml_parse_error',
|
||||||
'compat_xpath',
|
'compat_xpath',
|
||||||
'shlex_quote',
|
|
||||||
'subprocess_check_output',
|
|
||||||
'workaround_optparse_bug9161',
|
'workaround_optparse_bug9161',
|
||||||
]
|
]
|
||||||
|
@ -41,9 +41,12 @@ def get_suitable_downloader(info_dict, params={}):
|
|||||||
if ed.can_download(info_dict):
|
if ed.can_download(info_dict):
|
||||||
return ed
|
return ed
|
||||||
|
|
||||||
if protocol == 'm3u8' and params.get('hls_prefer_native'):
|
if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
|
||||||
return HlsFD
|
return HlsFD
|
||||||
|
|
||||||
|
if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
|
||||||
|
return FFmpegFD
|
||||||
|
|
||||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ import sys
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
|
from ..compat import compat_setenv
|
||||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
cli_option,
|
cli_option,
|
||||||
@ -198,6 +199,18 @@ class FFmpegFD(ExternalFD):
|
|||||||
'-headers',
|
'-headers',
|
||||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||||
|
|
||||||
|
env = None
|
||||||
|
proxy = self.params.get('proxy')
|
||||||
|
if proxy:
|
||||||
|
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||||
|
proxy = 'http://%s' % proxy
|
||||||
|
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||||
|
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||||
|
# We could switch to the following code if we are able to detect version properly
|
||||||
|
# args += ['-http_proxy', proxy]
|
||||||
|
env = os.environ.copy()
|
||||||
|
compat_setenv('HTTP_PROXY', proxy, env=env)
|
||||||
|
|
||||||
protocol = info_dict.get('protocol')
|
protocol = info_dict.get('protocol')
|
||||||
|
|
||||||
if protocol == 'rtmp':
|
if protocol == 'rtmp':
|
||||||
@ -224,8 +237,8 @@ class FFmpegFD(ExternalFD):
|
|||||||
args += ['-rtmp_live', 'live']
|
args += ['-rtmp_live', 'live']
|
||||||
|
|
||||||
args += ['-i', url, '-c', 'copy']
|
args += ['-i', url, '-c', 'copy']
|
||||||
if protocol == 'm3u8':
|
if protocol in ('m3u8', 'm3u8_native'):
|
||||||
if self.params.get('hls_use_mpegts', False):
|
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||||
args += ['-f', 'mpegts']
|
args += ['-f', 'mpegts']
|
||||||
else:
|
else:
|
||||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||||
@ -239,7 +252,7 @@ class FFmpegFD(ExternalFD):
|
|||||||
|
|
||||||
self._debug_cmd(args)
|
self._debug_cmd(args)
|
||||||
|
|
||||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE)
|
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
|
||||||
try:
|
try:
|
||||||
retval = proc.wait()
|
retval = proc.wait()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
@ -12,37 +12,49 @@ from ..compat import (
|
|||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_struct_pack,
|
||||||
|
compat_struct_unpack,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
struct_pack,
|
|
||||||
struct_unpack,
|
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DataTruncatedError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FlvReader(io.BytesIO):
|
class FlvReader(io.BytesIO):
|
||||||
"""
|
"""
|
||||||
Reader for Flv files
|
Reader for Flv files
|
||||||
The file format is documented in https://www.adobe.com/devnet/f4v.html
|
The file format is documented in https://www.adobe.com/devnet/f4v.html
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def read_bytes(self, n):
|
||||||
|
data = self.read(n)
|
||||||
|
if len(data) < n:
|
||||||
|
raise DataTruncatedError(
|
||||||
|
'FlvReader error: need %d bytes while only %d bytes got' % (
|
||||||
|
n, len(data)))
|
||||||
|
return data
|
||||||
|
|
||||||
# Utility functions for reading numbers and strings
|
# Utility functions for reading numbers and strings
|
||||||
def read_unsigned_long_long(self):
|
def read_unsigned_long_long(self):
|
||||||
return struct_unpack('!Q', self.read(8))[0]
|
return compat_struct_unpack('!Q', self.read_bytes(8))[0]
|
||||||
|
|
||||||
def read_unsigned_int(self):
|
def read_unsigned_int(self):
|
||||||
return struct_unpack('!I', self.read(4))[0]
|
return compat_struct_unpack('!I', self.read_bytes(4))[0]
|
||||||
|
|
||||||
def read_unsigned_char(self):
|
def read_unsigned_char(self):
|
||||||
return struct_unpack('!B', self.read(1))[0]
|
return compat_struct_unpack('!B', self.read_bytes(1))[0]
|
||||||
|
|
||||||
def read_string(self):
|
def read_string(self):
|
||||||
res = b''
|
res = b''
|
||||||
while True:
|
while True:
|
||||||
char = self.read(1)
|
char = self.read_bytes(1)
|
||||||
if char == b'\x00':
|
if char == b'\x00':
|
||||||
break
|
break
|
||||||
res += char
|
res += char
|
||||||
@ -53,18 +65,18 @@ class FlvReader(io.BytesIO):
|
|||||||
Read a box and return the info as a tuple: (box_size, box_type, box_data)
|
Read a box and return the info as a tuple: (box_size, box_type, box_data)
|
||||||
"""
|
"""
|
||||||
real_size = size = self.read_unsigned_int()
|
real_size = size = self.read_unsigned_int()
|
||||||
box_type = self.read(4)
|
box_type = self.read_bytes(4)
|
||||||
header_end = 8
|
header_end = 8
|
||||||
if size == 1:
|
if size == 1:
|
||||||
real_size = self.read_unsigned_long_long()
|
real_size = self.read_unsigned_long_long()
|
||||||
header_end = 16
|
header_end = 16
|
||||||
return real_size, box_type, self.read(real_size - header_end)
|
return real_size, box_type, self.read_bytes(real_size - header_end)
|
||||||
|
|
||||||
def read_asrt(self):
|
def read_asrt(self):
|
||||||
# version
|
# version
|
||||||
self.read_unsigned_char()
|
self.read_unsigned_char()
|
||||||
# flags
|
# flags
|
||||||
self.read(3)
|
self.read_bytes(3)
|
||||||
quality_entry_count = self.read_unsigned_char()
|
quality_entry_count = self.read_unsigned_char()
|
||||||
# QualityEntryCount
|
# QualityEntryCount
|
||||||
for i in range(quality_entry_count):
|
for i in range(quality_entry_count):
|
||||||
@ -85,7 +97,7 @@ class FlvReader(io.BytesIO):
|
|||||||
# version
|
# version
|
||||||
self.read_unsigned_char()
|
self.read_unsigned_char()
|
||||||
# flags
|
# flags
|
||||||
self.read(3)
|
self.read_bytes(3)
|
||||||
# time scale
|
# time scale
|
||||||
self.read_unsigned_int()
|
self.read_unsigned_int()
|
||||||
|
|
||||||
@ -119,7 +131,7 @@ class FlvReader(io.BytesIO):
|
|||||||
# version
|
# version
|
||||||
self.read_unsigned_char()
|
self.read_unsigned_char()
|
||||||
# flags
|
# flags
|
||||||
self.read(3)
|
self.read_bytes(3)
|
||||||
|
|
||||||
self.read_unsigned_int() # BootstrapinfoVersion
|
self.read_unsigned_int() # BootstrapinfoVersion
|
||||||
# Profile,Live,Update,Reserved
|
# Profile,Live,Update,Reserved
|
||||||
@ -194,11 +206,11 @@ def build_fragments_list(boot_info):
|
|||||||
|
|
||||||
|
|
||||||
def write_unsigned_int(stream, val):
|
def write_unsigned_int(stream, val):
|
||||||
stream.write(struct_pack('!I', val))
|
stream.write(compat_struct_pack('!I', val))
|
||||||
|
|
||||||
|
|
||||||
def write_unsigned_int_24(stream, val):
|
def write_unsigned_int_24(stream, val):
|
||||||
stream.write(struct_pack('!I', val)[1:])
|
stream.write(compat_struct_pack('!I', val)[1:])
|
||||||
|
|
||||||
|
|
||||||
def write_flv_header(stream):
|
def write_flv_header(stream):
|
||||||
@ -307,7 +319,7 @@ class F4mFD(FragmentFD):
|
|||||||
doc = compat_etree_fromstring(manifest)
|
doc = compat_etree_fromstring(manifest)
|
||||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||||
for f in self._get_unencrypted_media(doc)]
|
for f in self._get_unencrypted_media(doc)]
|
||||||
if requested_bitrate is None:
|
if requested_bitrate is None or len(formats) == 1:
|
||||||
# get the best format
|
# get the best format
|
||||||
formats = sorted(formats, key=lambda f: f[0])
|
formats = sorted(formats, key=lambda f: f[0])
|
||||||
rate, media = formats[-1]
|
rate, media = formats[-1]
|
||||||
@ -374,7 +386,17 @@ class F4mFD(FragmentFD):
|
|||||||
down.close()
|
down.close()
|
||||||
reader = FlvReader(down_data)
|
reader = FlvReader(down_data)
|
||||||
while True:
|
while True:
|
||||||
|
try:
|
||||||
_, box_type, box_data = reader.read_box_info()
|
_, box_type, box_data = reader.read_box_info()
|
||||||
|
except DataTruncatedError:
|
||||||
|
if test:
|
||||||
|
# In tests, segments may be truncated, and thus
|
||||||
|
# FlvReader may not be able to parse the whole
|
||||||
|
# chunk. If so, write the segment as is
|
||||||
|
# See https://github.com/rg3/youtube-dl/issues/9214
|
||||||
|
dest_stream.write(down_data)
|
||||||
|
break
|
||||||
|
raise
|
||||||
if box_type == b'mdat':
|
if box_type == b'mdat':
|
||||||
dest_stream.write(box_data)
|
dest_stream.write(box_data)
|
||||||
break
|
break
|
||||||
|
@ -4,6 +4,7 @@ import os.path
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
|
from .external import FFmpegFD
|
||||||
|
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -17,12 +18,39 @@ class HlsFD(FragmentFD):
|
|||||||
|
|
||||||
FD_NAME = 'hlsnative'
|
FD_NAME = 'hlsnative'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def can_download(manifest):
|
||||||
|
UNSUPPORTED_FEATURES = (
|
||||||
|
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
|
||||||
|
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||||
|
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||||
|
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
||||||
|
# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
|
||||||
|
r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||||
|
# event media playlists [4]
|
||||||
|
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||||
|
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||||
|
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||||
|
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||||
|
)
|
||||||
|
return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||||
manifest = self.ydl.urlopen(man_url).read()
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
|
|
||||||
s = manifest.decode('utf-8', 'ignore')
|
s = manifest.decode('utf-8', 'ignore')
|
||||||
|
|
||||||
|
if not self.can_download(s):
|
||||||
|
self.report_warning(
|
||||||
|
'hlsnative has detected features it does not support, '
|
||||||
|
'extraction will be delegated to ffmpeg')
|
||||||
|
fd = FFmpegFD(self.ydl, self.params)
|
||||||
|
for ph in self._progress_hooks:
|
||||||
|
fd.add_progress_hook(ph)
|
||||||
|
return fd.real_download(filename, info_dict)
|
||||||
|
|
||||||
fragment_urls = []
|
fragment_urls = []
|
||||||
for line in s.splitlines():
|
for line in s.splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
@ -27,6 +27,8 @@ class RtspFD(FileDownloader):
|
|||||||
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
|
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
self._debug_cmd(args)
|
||||||
|
|
||||||
retval = subprocess.call(args)
|
retval = subprocess.call(args)
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
|
135
youtube_dl/extractor/abcnews.py
Normal file
135
youtube_dl/extractor/abcnews.py
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import calendar
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .amp import AMPIE
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
|
|
||||||
|
|
||||||
|
class AbcNewsVideoIE(AMPIE):
|
||||||
|
IE_NAME = 'abcnews:video'
|
||||||
|
_VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20411932',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'week-exclusive-irans-foreign-minister-zarif',
|
||||||
|
'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
|
||||||
|
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||||
|
'duration': 180,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info_dict = self._extract_feed_info(
|
||||||
|
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||||
|
info_dict.update({
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
})
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
|
class AbcNewsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'abcnews'
|
||||||
|
_VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10498713',
|
||||||
|
'ext': 'flv',
|
||||||
|
'display_id': 'dramatic-video-rare-death-job-america',
|
||||||
|
'title': 'Occupational Hazards',
|
||||||
|
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20100428',
|
||||||
|
'timestamp': 1272412800,
|
||||||
|
},
|
||||||
|
'add_ie': ['AbcNewsVideo'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '39125818',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||||
|
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||||
|
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||||
|
'upload_date': '20160515',
|
||||||
|
'timestamp': 1463329500,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
# The embedded YouTube video is blocked due to copyright issues
|
||||||
|
'playlist_items': '1',
|
||||||
|
},
|
||||||
|
'add_ie': ['AbcNewsVideo'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||||
|
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||||
|
|
||||||
|
youtube_url = self._html_search_regex(
|
||||||
|
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
|
||||||
|
webpage, 'YouTube URL', default=None)
|
||||||
|
|
||||||
|
timestamp = None
|
||||||
|
date_str = self._html_search_regex(
|
||||||
|
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
||||||
|
webpage, 'timestamp', fatal=False)
|
||||||
|
if date_str:
|
||||||
|
tz_offset = 0
|
||||||
|
if date_str.endswith(' ET'): # Eastern Time
|
||||||
|
tz_offset = -5
|
||||||
|
date_str = date_str[:-3]
|
||||||
|
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
||||||
|
for date_format in date_formats:
|
||||||
|
try:
|
||||||
|
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if timestamp is not None:
|
||||||
|
timestamp -= tz_offset * 3600
|
||||||
|
|
||||||
|
entry = {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||||
|
'url': full_video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
if youtube_url:
|
||||||
|
entries = [entry, self.url_result(youtube_url, 'Youtube')]
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
return entry
|
@ -52,7 +52,7 @@ class AMPIE(InfoExtractor):
|
|||||||
for media_data in media_content:
|
for media_data in media_content:
|
||||||
media = media_data['@attributes']
|
media = media_data['@attributes']
|
||||||
media_type = media['type']
|
media_type = media['type']
|
||||||
if media_type == 'video/f4m':
|
if media_type in ('video/f4m', 'application/f4m+xml'):
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||||
video_id, f4m_id='hds', fatal=False))
|
video_id, f4m_id='hds', fatal=False))
|
||||||
@ -61,7 +61,7 @@ class AMPIE(InfoExtractor):
|
|||||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': media_data['media-category']['@attributes']['label'],
|
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||||
'url': media['url'],
|
'url': media['url'],
|
||||||
'tbr': int_or_none(media.get('bitrate')),
|
'tbr': int_or_none(media.get('bitrate')),
|
||||||
'filesize': int_or_none(media.get('fileSize')),
|
'filesize': int_or_none(media.get('fileSize')),
|
||||||
|
224
youtube_dl/extractor/anvato.py
Normal file
224
youtube_dl/extractor/anvato.py
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..aes import aes_encrypt
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
bytes_to_intlist,
|
||||||
|
determine_ext,
|
||||||
|
intlist_to_bytes,
|
||||||
|
int_or_none,
|
||||||
|
strip_jsonp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def md5_text(s):
|
||||||
|
if not isinstance(s, compat_str):
|
||||||
|
s = compat_str(s)
|
||||||
|
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
class AnvatoIE(InfoExtractor):
|
||||||
|
# Copied from anvplayer.min.js
|
||||||
|
_ANVACK_TABLE = {
|
||||||
|
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||||
|
'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
|
||||||
|
'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
|
||||||
|
'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
|
||||||
|
'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
|
||||||
|
'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
|
||||||
|
'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
|
||||||
|
'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
|
||||||
|
'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
|
||||||
|
'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
|
||||||
|
'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
|
||||||
|
'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
|
||||||
|
'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
|
||||||
|
'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
|
||||||
|
'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
|
||||||
|
'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
|
||||||
|
'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
|
||||||
|
'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
|
||||||
|
'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
|
||||||
|
'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
|
||||||
|
'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
|
||||||
|
'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
|
||||||
|
'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
|
||||||
|
'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
|
||||||
|
'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
|
||||||
|
'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
|
||||||
|
'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
|
||||||
|
'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
|
||||||
|
'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
|
||||||
|
'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
|
||||||
|
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
|
||||||
|
'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
|
||||||
|
'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
|
||||||
|
'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
|
||||||
|
'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
|
||||||
|
'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
|
||||||
|
'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
|
||||||
|
'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
|
||||||
|
'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
|
||||||
|
'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
|
||||||
|
'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
|
||||||
|
'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
|
||||||
|
'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
|
||||||
|
'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
|
||||||
|
'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
|
||||||
|
'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
|
||||||
|
'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
|
||||||
|
'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
|
||||||
|
'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
|
||||||
|
'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
|
||||||
|
'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
|
||||||
|
'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
|
||||||
|
'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
|
||||||
|
'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
|
||||||
|
'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
|
||||||
|
'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
|
||||||
|
'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
|
||||||
|
'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
|
||||||
|
'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
|
||||||
|
'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
|
||||||
|
'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
|
||||||
|
'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
|
||||||
|
'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
|
||||||
|
'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
|
||||||
|
'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
|
||||||
|
'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
|
||||||
|
'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
|
||||||
|
'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
|
||||||
|
'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
|
||||||
|
'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
|
||||||
|
'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
|
||||||
|
'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
|
||||||
|
'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
|
||||||
|
'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
|
||||||
|
'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
|
||||||
|
'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
|
||||||
|
'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
|
||||||
|
'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
|
||||||
|
'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
|
||||||
|
'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
|
||||||
|
'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
|
||||||
|
'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
|
||||||
|
'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
|
||||||
|
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
|
||||||
|
}
|
||||||
|
|
||||||
|
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||||
|
self.__server_time = None
|
||||||
|
|
||||||
|
def _server_time(self, access_key, video_id):
|
||||||
|
if self.__server_time is not None:
|
||||||
|
return self.__server_time
|
||||||
|
|
||||||
|
self.__server_time = int(self._download_json(
|
||||||
|
self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
|
||||||
|
note='Fetching server time')['server_time'])
|
||||||
|
|
||||||
|
return self.__server_time
|
||||||
|
|
||||||
|
def _api_prefix(self, access_key):
|
||||||
|
return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
|
||||||
|
|
||||||
|
def _get_video_json(self, access_key, video_id):
|
||||||
|
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
|
||||||
|
video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
|
||||||
|
server_time = self._server_time(access_key, video_id)
|
||||||
|
input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
|
||||||
|
|
||||||
|
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||||
|
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||||
|
|
||||||
|
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
||||||
|
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
||||||
|
payload = {
|
||||||
|
'api': {
|
||||||
|
'anvrid': anvrid,
|
||||||
|
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||||
|
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
|
||||||
|
'anvts': server_time,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return self._download_json(
|
||||||
|
video_data_url, video_id, transform_source=strip_jsonp,
|
||||||
|
data=json.dumps(payload).encode('utf-8'))
|
||||||
|
|
||||||
|
def _extract_anvato_videos(self, webpage, video_id):
|
||||||
|
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||||
|
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||||
|
'Anvato player data'), video_id)
|
||||||
|
|
||||||
|
video_id = anvplayer_data['video']
|
||||||
|
access_key = anvplayer_data['accessKey']
|
||||||
|
|
||||||
|
video_data = self._get_video_json(access_key, video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for published_url in video_data['published_urls']:
|
||||||
|
video_url = published_url['embed_url']
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
|
||||||
|
if ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(video_url, video_id))
|
||||||
|
continue
|
||||||
|
|
||||||
|
tbr = int_or_none(published_url.get('kbps'))
|
||||||
|
a_format = {
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
|
||||||
|
'tbr': tbr if tbr != 0 else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if ext == 'm3u8':
|
||||||
|
# Not using _extract_m3u8_formats here as individual media
|
||||||
|
# playlists are also included in published_urls.
|
||||||
|
if tbr is None:
|
||||||
|
formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
a_format.update({
|
||||||
|
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||||
|
'ext': 'mp4',
|
||||||
|
})
|
||||||
|
elif ext == 'mp3':
|
||||||
|
a_format['vcodec'] = 'none'
|
||||||
|
else:
|
||||||
|
a_format.update({
|
||||||
|
'width': int_or_none(published_url.get('width')),
|
||||||
|
'height': int_or_none(published_url.get('height')),
|
||||||
|
})
|
||||||
|
formats.append(a_format)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for caption in video_data.get('captions', []):
|
||||||
|
a_caption = {
|
||||||
|
'url': caption['url'],
|
||||||
|
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||||
|
}
|
||||||
|
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': video_data.get('def_title'),
|
||||||
|
'description': video_data.get('def_description'),
|
||||||
|
'categories': video_data.get('categories'),
|
||||||
|
'thumbnail': video_data.get('thumbnail'),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@ -12,9 +12,10 @@ from ..utils import (
|
|||||||
|
|
||||||
class AolIE(InfoExtractor):
|
class AolIE(InfoExtractor):
|
||||||
IE_NAME = 'on.aol.com'
|
IE_NAME = 'on.aol.com'
|
||||||
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[^/?-]+)'
|
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# video with 5min ID
|
||||||
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
||||||
'md5': '18ef68f48740e86ae94b98da815eec42',
|
'md5': '18ef68f48740e86ae94b98da815eec42',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -31,6 +32,7 @@ class AolIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
# video with vidible ID
|
||||||
'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183',
|
'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5707d6b8e4b090497b04f706',
|
'id': '5707d6b8e4b090497b04f706',
|
||||||
@ -45,6 +47,18 @@ class AolIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://on.aol.com/video/519442220',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'aol-video:5707d6b8e4b090497b04f706',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlus7IE(InfoExtractor):
|
class ArteTVBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'arte.tv:+7'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&+])'
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_url_info(cls, url):
|
def _extract_url_info(cls, url):
|
||||||
mobj = re.match(cls._VALID_URL, url)
|
mobj = re.match(cls._VALID_URL, url)
|
||||||
@ -78,6 +75,122 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
return video_id, lang
|
return video_id, lang
|
||||||
|
|
||||||
|
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||||
|
info = self._download_json(json_url, video_id)
|
||||||
|
player_info = info['videoJsonPlayer']
|
||||||
|
|
||||||
|
upload_date_str = player_info.get('shootingDate')
|
||||||
|
if not upload_date_str:
|
||||||
|
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||||
|
|
||||||
|
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
||||||
|
subtitle = player_info.get('VSU', '').strip()
|
||||||
|
if subtitle:
|
||||||
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
|
info_dict = {
|
||||||
|
'id': player_info['VID'],
|
||||||
|
'title': title,
|
||||||
|
'description': player_info.get('VDE'),
|
||||||
|
'upload_date': unified_strdate(upload_date_str),
|
||||||
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
|
}
|
||||||
|
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
|
||||||
|
|
||||||
|
LANGS = {
|
||||||
|
'fr': 'F',
|
||||||
|
'de': 'A',
|
||||||
|
'en': 'E[ANG]',
|
||||||
|
'es': 'E[ESP]',
|
||||||
|
}
|
||||||
|
|
||||||
|
langcode = LANGS.get(lang, lang)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, format_dict in player_info['VSR'].items():
|
||||||
|
f = dict(format_dict)
|
||||||
|
versionCode = f.get('versionCode')
|
||||||
|
l = re.escape(langcode)
|
||||||
|
|
||||||
|
# Language preference from most to least priority
|
||||||
|
# Reference: section 5.6.3 of
|
||||||
|
# http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
|
||||||
|
PREFERENCES = (
|
||||||
|
# original version in requested language, without subtitles
|
||||||
|
r'VO{0}$'.format(l),
|
||||||
|
# original version in requested language, with partial subtitles in requested language
|
||||||
|
r'VO{0}-ST{0}$'.format(l),
|
||||||
|
# original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||||
|
r'VO{0}-STM{0}$'.format(l),
|
||||||
|
# non-original (dubbed) version in requested language, without subtitles
|
||||||
|
r'V{0}$'.format(l),
|
||||||
|
# non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
|
||||||
|
r'V{0}-ST{0}$'.format(l),
|
||||||
|
# non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||||
|
r'V{0}-STM{0}$'.format(l),
|
||||||
|
# original version in requested language, with partial subtitles in different language
|
||||||
|
r'VO{0}-ST(?!{0}).+?$'.format(l),
|
||||||
|
# original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
|
||||||
|
r'VO{0}-STM(?!{0}).+?$'.format(l),
|
||||||
|
# original version in different language, with partial subtitles in requested language
|
||||||
|
r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
|
||||||
|
# original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
|
||||||
|
r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
|
||||||
|
# original version in different language, without subtitles
|
||||||
|
r'VO(?:(?!{0}))?$'.format(l),
|
||||||
|
# original version in different language, with partial subtitles in different language
|
||||||
|
r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
|
||||||
|
# original version in different language, with subtitles for the deaf and hard-of-hearing in different language
|
||||||
|
r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
|
||||||
|
)
|
||||||
|
|
||||||
|
for pref, p in enumerate(PREFERENCES):
|
||||||
|
if re.match(p, versionCode):
|
||||||
|
lang_pref = len(PREFERENCES) - pref
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
lang_pref = -1
|
||||||
|
|
||||||
|
format = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||||
|
'language_preference': lang_pref,
|
||||||
|
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
|
||||||
|
'width': int_or_none(f.get('width')),
|
||||||
|
'height': int_or_none(f.get('height')),
|
||||||
|
'tbr': int_or_none(f.get('bitrate')),
|
||||||
|
'quality': qfunc(f.get('quality')),
|
||||||
|
}
|
||||||
|
|
||||||
|
if f.get('mediaType') == 'rtmp':
|
||||||
|
format['url'] = f['streamer']
|
||||||
|
format['play_path'] = 'mp4:' + f['url']
|
||||||
|
format['ext'] = 'flv'
|
||||||
|
else:
|
||||||
|
format['url'] = f['url']
|
||||||
|
|
||||||
|
formats.append(format)
|
||||||
|
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info_dict['formats'] = formats
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||||
|
IE_NAME = 'arte.tv:+7'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, lang = self._extract_url_info(url)
|
video_id, lang = self._extract_url_info(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
@ -132,85 +245,11 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
webpage, 'embed url', group='url')
|
webpage, 'embed url', group='url')
|
||||||
return self.url_result(embed_url)
|
return self.url_result(embed_url)
|
||||||
|
|
||||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
|
||||||
info = self._download_json(json_url, video_id)
|
|
||||||
player_info = info['videoJsonPlayer']
|
|
||||||
|
|
||||||
upload_date_str = player_info.get('shootingDate')
|
|
||||||
if not upload_date_str:
|
|
||||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
|
||||||
|
|
||||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
|
||||||
subtitle = player_info.get('VSU', '').strip()
|
|
||||||
if subtitle:
|
|
||||||
title += ' - %s' % subtitle
|
|
||||||
|
|
||||||
info_dict = {
|
|
||||||
'id': player_info['VID'],
|
|
||||||
'title': title,
|
|
||||||
'description': player_info.get('VDE'),
|
|
||||||
'upload_date': unified_strdate(upload_date_str),
|
|
||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
|
||||||
}
|
|
||||||
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
|
|
||||||
|
|
||||||
LANGS = {
|
|
||||||
'fr': 'F',
|
|
||||||
'de': 'A',
|
|
||||||
'en': 'E[ANG]',
|
|
||||||
'es': 'E[ESP]',
|
|
||||||
}
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for format_id, format_dict in player_info['VSR'].items():
|
|
||||||
f = dict(format_dict)
|
|
||||||
versionCode = f.get('versionCode')
|
|
||||||
langcode = LANGS.get(lang, lang)
|
|
||||||
lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
|
|
||||||
lang_pref = None
|
|
||||||
if versionCode:
|
|
||||||
matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
|
|
||||||
lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
|
|
||||||
source_pref = 0
|
|
||||||
if versionCode is not None:
|
|
||||||
# The original version with subtitles has lower relevance
|
|
||||||
if re.match(r'VO-ST(F|A|E)', versionCode):
|
|
||||||
source_pref -= 10
|
|
||||||
# The version with sourds/mal subtitles has also lower relevance
|
|
||||||
elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
|
|
||||||
source_pref -= 9
|
|
||||||
format = {
|
|
||||||
'format_id': format_id,
|
|
||||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
|
||||||
'language_preference': lang_pref,
|
|
||||||
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
|
|
||||||
'width': int_or_none(f.get('width')),
|
|
||||||
'height': int_or_none(f.get('height')),
|
|
||||||
'tbr': int_or_none(f.get('bitrate')),
|
|
||||||
'quality': qfunc(f.get('quality')),
|
|
||||||
'source_preference': source_pref,
|
|
||||||
}
|
|
||||||
|
|
||||||
if f.get('mediaType') == 'rtmp':
|
|
||||||
format['url'] = f['streamer']
|
|
||||||
format['play_path'] = 'mp4:' + f['url']
|
|
||||||
format['ext'] = 'flv'
|
|
||||||
else:
|
|
||||||
format['url'] = f['url']
|
|
||||||
|
|
||||||
formats.append(format)
|
|
||||||
|
|
||||||
self._check_formats(formats, video_id)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info_dict['formats'] = formats
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
|
|
||||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||||
IE_NAME = 'arte.tv:creative'
|
IE_NAME = 'arte.tv:creative'
|
||||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:magazine?/)?(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||||
@ -229,6 +268,24 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
|||||||
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
|
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
|
||||||
'upload_date': '20140805',
|
'upload_date': '20140805',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVInfoIE(ArteTVPlus7IE):
|
||||||
|
IE_NAME = 'arte.tv:info'
|
||||||
|
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '067528-000-A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Service civique, un cache misère ?',
|
||||||
|
'upload_date': '20160403',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
@ -254,6 +311,8 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
|||||||
IE_NAME = 'arte.tv:ddc'
|
IE_NAME = 'arte.tv:ddc'
|
||||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, lang = self._extract_url_info(url)
|
video_id, lang = self._extract_url_info(url)
|
||||||
if lang == 'folge':
|
if lang == 'folge':
|
||||||
@ -272,7 +331,7 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
|||||||
IE_NAME = 'arte.tv:concert'
|
IE_NAME = 'arte.tv:concert'
|
||||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||||
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -282,14 +341,14 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
|||||||
'upload_date': '20140128',
|
'upload_date': '20140128',
|
||||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
|
|
||||||
class ArteTVCinemaIE(ArteTVPlus7IE):
|
class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||||
IE_NAME = 'arte.tv:cinema'
|
IE_NAME = 'arte.tv:cinema'
|
||||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://cinema.arte.tv/de/node/38291',
|
'url': 'http://cinema.arte.tv/de/node/38291',
|
||||||
'md5': '6b275511a5107c60bacbeeda368c3aa1',
|
'md5': '6b275511a5107c60bacbeeda368c3aa1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -299,7 +358,7 @@ class ArteTVCinemaIE(ArteTVPlus7IE):
|
|||||||
'upload_date': '20160122',
|
'upload_date': '20160122',
|
||||||
'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
|
'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
|
|
||||||
class ArteTVMagazineIE(ArteTVPlus7IE):
|
class ArteTVMagazineIE(ArteTVPlus7IE):
|
||||||
@ -344,9 +403,41 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
|
|||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
lang = mobj.group('lang')
|
lang = mobj.group('lang')
|
||||||
json_url = mobj.group('json_url')
|
json_url = mobj.group('json_url')
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
|
IE_NAME = 'arte.tv:playlist'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PL-013263',
|
||||||
|
'title': 'Areva & Uramin',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id, lang = self._extract_url_info(url)
|
||||||
|
collection = self._download_json(
|
||||||
|
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||||
|
% (lang, playlist_id), playlist_id)
|
||||||
|
title = collection.get('title')
|
||||||
|
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||||
|
entries = [
|
||||||
|
self._extract_from_json_url(
|
||||||
|
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
||||||
|
for video in collection['videos'] if video.get('jsonUrl')]
|
||||||
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
@ -30,14 +30,14 @@ class AudiomackIE(InfoExtractor):
|
|||||||
# audiomack wrapper around soundcloud song
|
# audiomack wrapper around soundcloud song
|
||||||
{
|
{
|
||||||
'add_ie': ['Soundcloud'],
|
'add_ie': ['Soundcloud'],
|
||||||
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
|
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '172419696',
|
'id': '258901379',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
|
'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||||
'title': 'Young Thug ft Lil Wayne - Take Kare',
|
'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||||
'uploader': 'Young Thug World',
|
'uploader': 'ILOVEMAKONNEN',
|
||||||
'upload_date': '20141016',
|
'upload_date': '20160414',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||||
'md5': '2b68e5851514c20efdff2afc5603b8b4',
|
'md5': '73d0b3171568232574e45652f8720b5c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2650410135',
|
'id': '2650410135',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
|
|||||||
if m_trackinfo:
|
if m_trackinfo:
|
||||||
json_code = m_trackinfo.group(1)
|
json_code = m_trackinfo.group(1)
|
||||||
data = json.loads(json_code)[0]
|
data = json.loads(json_code)[0]
|
||||||
|
track_id = compat_str(data['id'])
|
||||||
|
|
||||||
|
if not data.get('file'):
|
||||||
|
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in data['file'].items():
|
for format_id, format_url in data['file'].items():
|
||||||
@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(data['id']),
|
'id': track_id,
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': float_or_none(data.get('duration')),
|
'duration': float_or_none(data.get('duration')),
|
||||||
|
@ -671,6 +671,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34475836',
|
'id': '34475836',
|
||||||
'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
|
'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
|
||||||
|
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||||
},
|
},
|
||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
}, {
|
}, {
|
||||||
|
@ -1,34 +1,42 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import calendar
|
||||||
|
import datetime
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_etree_fromstring,
|
||||||
|
compat_str,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_xml_parse_error,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
|
||||||
unescapeHTML,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliIE(InfoExtractor):
|
class BiliBiliIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
|
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||||
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1554319',
|
'id': '1554319',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': '【金坷垃】金泡沫',
|
'title': '【金坷垃】金泡沫',
|
||||||
'duration': 308313,
|
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||||
|
'duration': 308.067,
|
||||||
|
'timestamp': 1398012660,
|
||||||
'upload_date': '20140420',
|
'upload_date': '20140420',
|
||||||
'thumbnail': 're:^https?://.+\.jpg',
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
|
||||||
'timestamp': 1397983878,
|
|
||||||
'uploader': '菊子桑',
|
'uploader': '菊子桑',
|
||||||
|
'uploader_id': '156160',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||||
@ -36,75 +44,110 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
'id': '1041170',
|
'id': '1041170',
|
||||||
'title': '【BD1080P】刀语【诸神&异域】',
|
'title': '【BD1080P】刀语【诸神&异域】',
|
||||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||||
'uploader': '枫叶逝去',
|
|
||||||
'timestamp': 1396501299,
|
|
||||||
},
|
},
|
||||||
'playlist_count': 9,
|
'playlist_count': 9,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||||
|
# the Android client
|
||||||
|
# TODO: find the sign algorithm used in the flash player
|
||||||
|
_APP_KEY = '86385cdc024c0f6c'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
page_num = mobj.group('page_num') or '1'
|
|
||||||
|
|
||||||
view_data = self._download_json(
|
webpage = self._download_webpage(url, video_id)
|
||||||
'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
|
|
||||||
video_id)
|
|
||||||
if 'error' in view_data:
|
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
|
|
||||||
|
|
||||||
cid = view_data['cid']
|
params = compat_parse_qs(self._search_regex(
|
||||||
title = unescapeHTML(view_data['title'])
|
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||||
|
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||||
|
webpage, 'player parameters'))
|
||||||
|
cid = params['cid'][0]
|
||||||
|
|
||||||
doc = self._download_xml(
|
info_xml_str = self._download_webpage(
|
||||||
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
|
'http://interface.bilibili.com/v_cdn_play',
|
||||||
cid,
|
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||||
'Downloading page %s/%s' % (page_num, view_data['pages'])
|
note='Downloading video info page')
|
||||||
)
|
|
||||||
|
|
||||||
if xpath_text(doc, './result') == 'error':
|
err_msg = None
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
|
durls = None
|
||||||
|
info_xml = None
|
||||||
|
try:
|
||||||
|
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||||
|
except compat_xml_parse_error:
|
||||||
|
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||||
|
err_msg = (info_json or {}).get('error_text')
|
||||||
|
else:
|
||||||
|
err_msg = xpath_text(info_xml, './message')
|
||||||
|
|
||||||
|
if info_xml is not None:
|
||||||
|
durls = info_xml.findall('./durl')
|
||||||
|
if not durls:
|
||||||
|
if err_msg:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||||
|
else:
|
||||||
|
raise ExtractorError('No videos found!')
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for durl in doc.findall('./durl'):
|
for durl in durls:
|
||||||
size = xpath_text(durl, ['./filesize', './size'])
|
size = xpath_text(durl, ['./filesize', './size'])
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': durl.find('./url').text,
|
'url': durl.find('./url').text,
|
||||||
'filesize': int_or_none(size),
|
'filesize': int_or_none(size),
|
||||||
'ext': 'flv',
|
|
||||||
}]
|
}]
|
||||||
backup_urls = durl.find('./backup_url')
|
for backup_url in durl.findall('./backup_url/url'):
|
||||||
if backup_urls is not None:
|
formats.append({
|
||||||
for backup_url in backup_urls.findall('./url'):
|
'url': backup_url.text,
|
||||||
formats.append({'url': backup_url.text})
|
# backup URLs have lower priorities
|
||||||
formats.reverse()
|
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||||
'title': title,
|
|
||||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||||
|
description = self._html_search_meta('description', webpage)
|
||||||
|
datetime_str = self._html_search_regex(
|
||||||
|
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||||
|
if datetime_str:
|
||||||
|
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||||
|
|
||||||
|
# TODO 'view_count' requires deobfuscating Javascript
|
||||||
info = {
|
info = {
|
||||||
'id': compat_str(cid),
|
'id': compat_str(cid),
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': view_data.get('description'),
|
'description': description,
|
||||||
'thumbnail': view_data.get('pic'),
|
'timestamp': timestamp,
|
||||||
'uploader': view_data.get('author'),
|
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||||
'timestamp': int_or_none(view_data.get('created')),
|
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||||
'view_count': int_or_none(view_data.get('play')),
|
|
||||||
'duration': int_or_none(xpath_text(doc, './timelength')),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uploader_mobj = re.search(
|
||||||
|
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||||
|
webpage)
|
||||||
|
if uploader_mobj:
|
||||||
|
info.update({
|
||||||
|
'uploader': uploader_mobj.group('name'),
|
||||||
|
'uploader_id': uploader_mobj.group('id'),
|
||||||
|
})
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
entry.update(info)
|
||||||
|
|
||||||
if len(entries) == 1:
|
if len(entries) == 1:
|
||||||
entries[0].update(info)
|
|
||||||
return entries[0]
|
return entries[0]
|
||||||
else:
|
else:
|
||||||
info.update({
|
return {
|
||||||
'_type': 'multi_video',
|
'_type': 'multi_video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
})
|
}
|
||||||
return info
|
|
||||||
|
39
youtube_dl/extractor/biqle.py
Normal file
39
youtube_dl/extractor/biqle.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class BIQLEIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.biqle.ru/watch/847655_160197695',
|
||||||
|
'md5': 'ad5f746a874ccded7b8f211aeea96637',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '160197695',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)',
|
||||||
|
'uploader': 'Andrey Rogozin',
|
||||||
|
'upload_date': '20110605',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://biqle.org/watch/-44781847_168547604',
|
||||||
|
'md5': '7f24e72af1db0edf7c1aaba513174f97',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '168547604',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ребенок в шоке от автоматической мойки',
|
||||||
|
'uploader': 'Dmitry Kotov',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
embed_url = self._proto_relative_url(self._search_regex(
|
||||||
|
r'<iframe.+?src="((?:http:)?//daxab\.com/[^"]+)".*?></iframe>', webpage, 'embed url'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': embed_url,
|
||||||
|
}
|
@ -17,6 +17,9 @@ class BloombergIE(InfoExtractor):
|
|||||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||||
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'best[format_id^=hds]',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -307,9 +307,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||||
|
|
||||||
def _extract_video_info(self, video_info):
|
def _extract_video_info(self, video_info):
|
||||||
|
video_id = compat_str(video_info['id'])
|
||||||
publisher_id = video_info.get('publisherId')
|
publisher_id = video_info.get('publisherId')
|
||||||
info = {
|
info = {
|
||||||
'id': compat_str(video_info['id']),
|
'id': video_id,
|
||||||
'title': video_info['displayName'].strip(),
|
'title': video_info['displayName'].strip(),
|
||||||
'description': video_info.get('shortDescription'),
|
'description': video_info.get('shortDescription'),
|
||||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||||
@ -331,7 +332,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
url_comp = compat_urllib_parse_urlparse(url)
|
url_comp = compat_urllib_parse_urlparse(url)
|
||||||
if url_comp.path.endswith('.m3u8'):
|
if url_comp.path.endswith('.m3u8'):
|
||||||
formats.extend(
|
formats.extend(
|
||||||
self._extract_m3u8_formats(url, info['id'], 'mp4'))
|
self._extract_m3u8_formats(
|
||||||
|
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
continue
|
continue
|
||||||
elif 'akamaihd.net' in url_comp.netloc:
|
elif 'akamaihd.net' in url_comp.netloc:
|
||||||
# This type of renditions are served through
|
# This type of renditions are served through
|
||||||
@ -340,7 +342,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
ext = 'flv'
|
ext = 'flv'
|
||||||
if ext is None:
|
if ext is None:
|
||||||
ext = determine_ext(url)
|
ext = determine_ext(url)
|
||||||
tbr = int_or_none(rend.get('encodingRate'), 1000),
|
tbr = int_or_none(rend.get('encodingRate'), 1000)
|
||||||
a_format = {
|
a_format = {
|
||||||
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
|
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
|
||||||
'url': url,
|
'url': url,
|
||||||
@ -365,7 +367,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
a_format.update({
|
a_format.update({
|
||||||
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
|
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8_native',
|
||||||
})
|
})
|
||||||
|
|
||||||
formats.append(a_format)
|
formats.append(a_format)
|
||||||
@ -395,7 +397,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
return ad_info
|
return ad_info
|
||||||
|
|
||||||
if 'url' not in info and not info.get('formats'):
|
if 'url' not in info and not info.get('formats'):
|
||||||
raise ExtractorError('Unable to extract video url for %s' % info['id'])
|
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
@ -442,6 +444,10 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
# non numeric ref: prefixed video id
|
# non numeric ref: prefixed video id
|
||||||
'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
|
'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# unavailable video without message but with error_code
|
||||||
|
'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -512,8 +518,9 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
json_data = self._parse_json(e.cause.read().decode(), video_id)
|
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||||
raise ExtractorError(json_data[0]['message'], expected=True)
|
raise ExtractorError(
|
||||||
|
json_data.get('message') or json_data['error_code'], expected=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
title = json_data['name'].strip()
|
title = json_data['name'].strip()
|
||||||
@ -527,7 +534,7 @@ class BrightcoveNewIE(InfoExtractor):
|
|||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
src, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
elif source_type == 'application/dash+xml':
|
elif source_type == 'application/dash+xml':
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
|
@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||||
|
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'studio-c-season-5-episode-5',
|
'id': 'studio-c-season-5-episode-5',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'add_ie': ['Ooyala'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -4,64 +4,66 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
smuggle_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CBCIE(InfoExtractor):
|
class CBCIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# with mediaId
|
# with mediaId
|
||||||
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||||
|
'md5': '97e24d09672fc4cf56256d6faa6c25bc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2682904050',
|
'id': '2682904050',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Don Cherry – All-Stars',
|
'title': 'Don Cherry – All-Stars',
|
||||||
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||||
'timestamp': 1454475540,
|
'timestamp': 1454463000,
|
||||||
'upload_date': '20160203',
|
'upload_date': '20160203',
|
||||||
},
|
'uploader': 'CBCC-NEW',
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# with clipId
|
# with clipId
|
||||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||||
|
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2487345465',
|
'id': '2487345465',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||||
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||||
'upload_date': '19700101',
|
'upload_date': '19780210',
|
||||||
},
|
'uploader': 'CBCC-NEW',
|
||||||
'params': {
|
'timestamp': 255977160,
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# multiple iframes
|
# multiple iframes
|
||||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
|
'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2680832926',
|
'id': '2680832926',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||||
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||||
'upload_date': '19700101',
|
'upload_date': '20160201',
|
||||||
|
'timestamp': 1454342820,
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
'md5': '415a0e3f586113894174dfb31aa5bb1a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2658915080',
|
'id': '2658915080',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Fly like an eagle!',
|
'title': 'Fly like an eagle!',
|
||||||
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||||
'upload_date': '19700101',
|
'upload_date': '20150315',
|
||||||
|
'timestamp': 1426443984,
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -90,24 +92,54 @@ class CBCIE(InfoExtractor):
|
|||||||
|
|
||||||
class CBCPlayerIE(InfoExtractor):
|
class CBCPlayerIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||||
|
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2683190193',
|
'id': '2683190193',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Gerry Runs a Sweat Shop',
|
'title': 'Gerry Runs a Sweat Shop',
|
||||||
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||||
'timestamp': 1455067800,
|
'timestamp': 1455071400,
|
||||||
'upload_date': '20160210',
|
'upload_date': '20160210',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
'params': {
|
}, {
|
||||||
# rtmp download
|
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||||
'skip_download': True,
|
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||||
|
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2657631896',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
||||||
|
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
|
||||||
|
'timestamp': 1425704400,
|
||||||
|
'upload_date': '20150307',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
# available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
|
||||||
|
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||||
|
'md5': '17a61eb813539abea40618d6323a7f82',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2164402062',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Cancer survivor four times over',
|
||||||
|
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||||
|
'timestamp': 1320410746,
|
||||||
|
'upload_date': '20111104',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
return self.url_result(
|
return {
|
||||||
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
'_type': 'url_transparent',
|
||||||
'ThePlatformFeed', video_id)
|
'ie_key': 'ThePlatform',
|
||||||
|
'url': smuggle_url(
|
||||||
|
'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
|
||||||
|
'force_smil_url': True
|
||||||
|
}),
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
ExtractorError,
|
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -22,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
|
|||||||
|
|
||||||
|
|
||||||
class CBSIE(CBSBaseIE):
|
class CBSIE(CBSBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
_VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||||
@ -64,10 +65,11 @@ class CBSIE(CBSBaseIE):
|
|||||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true'
|
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
content_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
if not content_id:
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
content_id = self._search_regex(
|
content_id = self._search_regex(
|
||||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||||
@ -84,11 +86,11 @@ class CBSIE(CBSBaseIE):
|
|||||||
pid = xpath_text(item, 'pid')
|
pid = xpath_text(item, 'pid')
|
||||||
if not pid:
|
if not pid:
|
||||||
continue
|
continue
|
||||||
try:
|
tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
|
||||||
|
if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
|
||||||
|
tp_release_url += '&manifest=m3u'
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||||
self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid)
|
tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
|
||||||
except ExtractorError:
|
|
||||||
continue
|
|
||||||
formats.extend(tp_formats)
|
formats.extend(tp_formats)
|
||||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
84
youtube_dl/extractor/cbslocal.py
Normal file
84
youtube_dl/extractor/cbslocal.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import calendar
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .anvato import AnvatoIE
|
||||||
|
from .sendtonews import SendtoNewsIE
|
||||||
|
from ..compat import compat_urlparse
|
||||||
|
|
||||||
|
|
||||||
|
class CBSLocalIE(AnvatoIE):
|
||||||
|
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# Anvato backend
|
||||||
|
'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
|
||||||
|
'md5': 'f0ee3081e3843f575fccef901199b212',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3401037',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
|
||||||
|
'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
|
||||||
|
'thumbnail': 're:^https?://.*',
|
||||||
|
'timestamp': 1463440500,
|
||||||
|
'upload_date': '20160516',
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:5',
|
||||||
|
},
|
||||||
|
'categories': [
|
||||||
|
'Stations\\Spoken Word\\KCBSTV',
|
||||||
|
'Syndication\\MSN',
|
||||||
|
'Syndication\\NDN',
|
||||||
|
'Syndication\\AOL',
|
||||||
|
'Syndication\\Yahoo',
|
||||||
|
'Syndication\\Tribune',
|
||||||
|
'Syndication\\Curb.tv',
|
||||||
|
'Content\\News'
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# SendtoNews embed
|
||||||
|
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'GxfCe0Zo7D-175909-5588',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Recap: CLE 15, CIN 6',
|
||||||
|
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||||
|
'upload_date': '20160516',
|
||||||
|
'timestamp': 1463433840,
|
||||||
|
'duration': 49,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||||
|
if sendtonews_url:
|
||||||
|
info_dict = {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': compat_urlparse.urljoin(url, sendtonews_url),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||||
|
|
||||||
|
time_str = self._html_search_regex(
|
||||||
|
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||||
|
timestamp = None
|
||||||
|
if time_str:
|
||||||
|
timestamp = calendar.timegm(datetime.datetime.strptime(
|
||||||
|
time_str, '%b %d, %Y %I:%M %p').timetuple())
|
||||||
|
|
||||||
|
info_dict.update({
|
||||||
|
'display_id': display_id,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
})
|
||||||
|
|
||||||
|
return info_dict
|
@ -1,13 +1,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_iso8601,
|
||||||
qualities,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -19,14 +15,14 @@ class CCCIE(InfoExtractor):
|
|||||||
'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
|
'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
|
||||||
'md5': '3a1eda8f3a29515d27f5adb967d7e740',
|
'md5': '3a1eda8f3a29515d27f5adb967d7e740',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',
|
'id': '1839',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Introduction to Processor Design',
|
'title': 'Introduction to Processor Design',
|
||||||
'description': 'md5:80be298773966f66d56cb11260b879af',
|
'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'view_count': int,
|
|
||||||
'upload_date': '20131228',
|
'upload_date': '20131228',
|
||||||
'duration': 3660,
|
'timestamp': 1388188800,
|
||||||
|
'duration': 3710,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
|
'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
|
||||||
@ -34,79 +30,48 @@ class CCCIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
event_id = self._search_regex("data-id='(\d+)'", webpage, 'event id')
|
||||||
|
event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
|
||||||
|
|
||||||
if self._downloader.params.get('prefer_free_formats'):
|
|
||||||
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
|
|
||||||
else:
|
|
||||||
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<h3>About</h3>(.+?)<h3>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
|
||||||
r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
|
|
||||||
webpage, 'upload date', fatal=False))
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
|
||||||
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
|
|
||||||
webpage, 'view count', fatal=False))
|
|
||||||
duration = parse_duration(self._html_search_regex(
|
|
||||||
r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li',
|
|
||||||
webpage, 'duration', fatal=False, group='duration'))
|
|
||||||
|
|
||||||
matches = re.finditer(r'''(?xs)
|
|
||||||
<(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s*
|
|
||||||
<(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*
|
|
||||||
<a\s+download\s+href='(?P<http_url>[^']+)'>\s*
|
|
||||||
(?:
|
|
||||||
.*?
|
|
||||||
<a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'
|
|
||||||
)?''', webpage)
|
|
||||||
formats = []
|
formats = []
|
||||||
for m in matches:
|
for recording in event_data.get('recordings', []):
|
||||||
format = m.group('format')
|
recording_url = recording.get('recording_url')
|
||||||
format_id = self._search_regex(
|
if not recording_url:
|
||||||
r'.*/([a-z0-9_-]+)/[^/]*$',
|
continue
|
||||||
m.group('http_url'), 'format id', default=None)
|
language = recording.get('language')
|
||||||
if format_id:
|
folder = recording.get('folder')
|
||||||
format_id = m.group('lang') + '-' + format_id
|
format_id = None
|
||||||
vcodec = 'h264' if 'h264' in format_id else (
|
if language:
|
||||||
'none' if format_id in ('mp3', 'opus') else None
|
format_id = language
|
||||||
|
if folder:
|
||||||
|
if language:
|
||||||
|
format_id += '-' + folder
|
||||||
|
else:
|
||||||
|
format_id = folder
|
||||||
|
vcodec = 'h264' if 'h264' in folder else (
|
||||||
|
'none' if folder in ('mp3', 'opus') else None
|
||||||
)
|
)
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'format': format,
|
'url': recording_url,
|
||||||
'language': m.group('lang'),
|
'width': int_or_none(recording.get('width')),
|
||||||
'url': m.group('http_url'),
|
'height': int_or_none(recording.get('height')),
|
||||||
|
'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
|
||||||
|
'language': language,
|
||||||
'vcodec': vcodec,
|
'vcodec': vcodec,
|
||||||
'preference': preference(format_id),
|
|
||||||
})
|
|
||||||
|
|
||||||
if m.group('torrent_url'):
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
|
|
||||||
'format': '%s (torrent)' % format,
|
|
||||||
'proto': 'torrent',
|
|
||||||
'format_note': '(unsupported; will just download the .torrent file)',
|
|
||||||
'vcodec': vcodec,
|
|
||||||
'preference': -100 + preference(format_id),
|
|
||||||
'url': m.group('torrent_url'),
|
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(
|
|
||||||
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': event_id,
|
||||||
'title': title,
|
'display_id': display_id,
|
||||||
'description': description,
|
'title': event_data['title'],
|
||||||
'thumbnail': thumbnail,
|
'description': event_data.get('description'),
|
||||||
'view_count': view_count,
|
'thumbnail': event_data.get('thumb_url'),
|
||||||
'upload_date': upload_date,
|
'timestamp': parse_iso8601(event_data.get('date')),
|
||||||
'duration': duration,
|
'duration': int_or_none(event_data.get('length')),
|
||||||
|
'tags': event_data.get('tags'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -33,19 +33,33 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '61924494876844374',
|
'id': '61924494877028507',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
'title': 'Hyde Park Civilizace: Bonus 01 - En',
|
||||||
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
|
'description': 'English Subtittles',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'duration': 88.4,
|
'duration': 81.3,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# live stream
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 402,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Georestricted to Czech Republic',
|
||||||
}, {
|
}, {
|
||||||
# video with 18+ caution trailer
|
# video with 18+ caution trailer
|
||||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||||
@ -118,19 +132,21 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
playlist_title = self._og_search_title(webpage)
|
playlist_title = self._og_search_title(webpage, default=None)
|
||||||
playlist_description = self._og_search_description(webpage)
|
playlist_description = self._og_search_description(webpage, default=None)
|
||||||
|
|
||||||
playlist = self._download_json(req, playlist_id)['playlist']
|
playlist = self._download_json(req, playlist_id)['playlist']
|
||||||
playlist_len = len(playlist)
|
playlist_len = len(playlist)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for item in playlist:
|
for item in playlist:
|
||||||
|
is_live = item.get('type') == 'LIVE'
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, stream_url in item['streamUrls'].items():
|
for format_id, stream_url in item['streamUrls'].items():
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
stream_url, playlist_id, 'mp4',
|
stream_url, playlist_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', fatal=False))
|
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||||
|
fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
item_id = item.get('id') or item['assetId']
|
item_id = item.get('id') or item['assetId']
|
||||||
@ -145,14 +161,22 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||||||
if subs:
|
if subs:
|
||||||
subtitles = self.extract_subtitles(episode_id, subs)
|
subtitles = self.extract_subtitles(episode_id, subs)
|
||||||
|
|
||||||
|
if playlist_len == 1:
|
||||||
|
final_title = playlist_title or title
|
||||||
|
if is_live:
|
||||||
|
final_title = self._live_title(final_title)
|
||||||
|
else:
|
||||||
|
final_title = '%s (%s)' % (playlist_title, title)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': item_id,
|
'id': item_id,
|
||||||
'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title),
|
'title': final_title,
|
||||||
'description': playlist_description if playlist_len == 1 else None,
|
'description': playlist_description if playlist_len == 1 else None,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'is_live': is_live,
|
||||||
})
|
})
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
@ -1,119 +0,0 @@
|
|||||||
# encoding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import ExtractorError
|
|
||||||
from .screenwavemedia import ScreenwaveMediaIE
|
|
||||||
|
|
||||||
|
|
||||||
class CinemassacreIE(InfoExtractor):
|
|
||||||
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
|
||||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'Cinemassacre-19911',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20121110',
|
|
||||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
|
||||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
|
||||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'Cinemassacre-521be8ef82b16',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20131002',
|
|
||||||
'title': 'The Mummy’s Hand (1940)',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
# Youtube embedded video
|
|
||||||
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
|
|
||||||
'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'OEVzPCY2T-g',
|
|
||||||
'ext': 'webm',
|
|
||||||
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
|
|
||||||
'upload_date': '20061207',
|
|
||||||
'uploader': 'Cinemassacre',
|
|
||||||
'uploader_id': 'JamesNintendoNerd',
|
|
||||||
'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
# Youtube embedded video
|
|
||||||
'url': 'http://cinemassacre.com/2006/09/01/mckids/',
|
|
||||||
'md5': '7393c4e0f54602ad110c793eb7a6513a',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'FnxsNhuikpo',
|
|
||||||
'ext': 'webm',
|
|
||||||
'upload_date': '20060901',
|
|
||||||
'uploader': 'Cinemassacre Extra',
|
|
||||||
'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
|
|
||||||
'uploader_id': 'Cinemassacre',
|
|
||||||
'title': 'AVGN: McKids',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/',
|
|
||||||
'md5': '1376908e49572389e7b06251a53cdd08',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'Cinemassacre-555779690c440',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
|
|
||||||
'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
|
|
||||||
'upload_date': '20150525',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
playerdata_url = self._search_regex(
|
|
||||||
[
|
|
||||||
ScreenwaveMediaIE.EMBED_PATTERN,
|
|
||||||
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
|
||||||
],
|
|
||||||
webpage, 'player data URL', default=None, group='url')
|
|
||||||
if not playerdata_url:
|
|
||||||
raise ExtractorError('Unable to find player data')
|
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
|
||||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
|
||||||
video_description = self._html_search_regex(
|
|
||||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
|
||||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
|
||||||
'upload_date': video_date,
|
|
||||||
'thumbnail': video_thumbnail,
|
|
||||||
'url': playerdata_url,
|
|
||||||
}
|
|
@ -19,7 +19,7 @@ from ..utils import (
|
|||||||
class CloudyIE(InfoExtractor):
|
class CloudyIE(InfoExtractor):
|
||||||
_IE_DESC = 'cloudy.ec and videoraj.ch'
|
_IE_DESC = 'cloudy.ec and videoraj.ch'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/
|
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
|
||||||
(?:v/|embed\.php\?id=)
|
(?:v/|embed\.php\?id=)
|
||||||
(?P<id>[A-Za-z0-9]+)
|
(?P<id>[A-Za-z0-9]+)
|
||||||
'''
|
'''
|
||||||
@ -37,7 +37,7 @@ class CloudyIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.videoraj.ch/v/47f399fd8bb60',
|
'url': 'http://www.videoraj.to/v/47f399fd8bb60',
|
||||||
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
|
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '47f399fd8bb60',
|
'id': '47f399fd8bb60',
|
||||||
|
@ -1,101 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import int_or_none
|
|
||||||
|
|
||||||
|
|
||||||
class CollegeHumorIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
|
||||||
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
|
||||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '6902724',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Comic-Con Cosplay Catastrophe',
|
|
||||||
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
|
|
||||||
'age_limit': 13,
|
|
||||||
'duration': 187,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
|
||||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3505939',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Font Conference',
|
|
||||||
'description': "This video wasn't long enough, so we made it double-spaced.",
|
|
||||||
'age_limit': 10,
|
|
||||||
'duration': 179,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# embedded youtube video
|
|
||||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'Z-bao9fg6Yc',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
|
||||||
'uploader': 'Mark Dice',
|
|
||||||
'uploader_id': 'MarkDice',
|
|
||||||
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
|
||||||
'upload_date': '20140127',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('videoid')
|
|
||||||
|
|
||||||
jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
|
|
||||||
data = json.loads(self._download_webpage(
|
|
||||||
jsonUrl, video_id, 'Downloading info JSON'))
|
|
||||||
vdata = data['video']
|
|
||||||
if vdata.get('youtubeId') is not None:
|
|
||||||
return {
|
|
||||||
'_type': 'url',
|
|
||||||
'url': vdata['youtubeId'],
|
|
||||||
'ie_key': 'Youtube',
|
|
||||||
}
|
|
||||||
|
|
||||||
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
|
||||||
rating = vdata.get('rating')
|
|
||||||
if rating:
|
|
||||||
age_limit = AGE_LIMITS.get(rating.lower())
|
|
||||||
else:
|
|
||||||
age_limit = None # None = No idea
|
|
||||||
|
|
||||||
PREFS = {'high_quality': 2, 'low_quality': 0}
|
|
||||||
formats = []
|
|
||||||
for format_key in ('mp4', 'webm'):
|
|
||||||
for qname, qurl in vdata.get(format_key, {}).items():
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_key + '_' + qname,
|
|
||||||
'url': qurl,
|
|
||||||
'format': format_key,
|
|
||||||
'preference': PREFS.get(qname),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
duration = int_or_none(vdata.get('duration'), 1000)
|
|
||||||
like_count = int_or_none(vdata.get('likes'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': vdata['title'],
|
|
||||||
'description': vdata.get('description'),
|
|
||||||
'thumbnail': vdata.get('thumbnail'),
|
|
||||||
'formats': formats,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
'duration': duration,
|
|
||||||
'like_count': like_count,
|
|
||||||
}
|
|
@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||||
|https?://(:www\.)?
|
|https?://(:www\.)?
|
||||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
||||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||||
(?P<clip>
|
(?P<clip>
|
||||||
(?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||||
)|
|
)|
|
||||||
@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||||
|
@ -163,7 +163,7 @@ class InfoExtractor(object):
|
|||||||
description: Full video description.
|
description: Full video description.
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
license: License name the video is licensed under.
|
license: License name the video is licensed under.
|
||||||
creator: The main artist who created the video.
|
creator: The creator of the video.
|
||||||
release_date: The date (YYYYMMDD) when the video was released.
|
release_date: The date (YYYYMMDD) when the video was released.
|
||||||
timestamp: UNIX timestamp of the moment the video became available.
|
timestamp: UNIX timestamp of the moment the video became available.
|
||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
@ -382,7 +382,7 @@ class InfoExtractor(object):
|
|||||||
else:
|
else:
|
||||||
if query:
|
if query:
|
||||||
url_or_request = update_url_query(url_or_request, query)
|
url_or_request = update_url_query(url_or_request, query)
|
||||||
if data or headers:
|
if data is not None or headers:
|
||||||
url_or_request = sanitized_Request(url_or_request, data, headers)
|
url_or_request = sanitized_Request(url_or_request, data, headers)
|
||||||
try:
|
try:
|
||||||
return self._downloader.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
@ -987,7 +987,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||||
fatal=True):
|
fatal=True, m3u8_id=None):
|
||||||
manifest = self._download_xml(
|
manifest = self._download_xml(
|
||||||
manifest_url, video_id, 'Downloading f4m manifest',
|
manifest_url, video_id, 'Downloading f4m manifest',
|
||||||
'Unable to download f4m manifest',
|
'Unable to download f4m manifest',
|
||||||
@ -1001,11 +1001,18 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return self._parse_f4m_formats(
|
return self._parse_f4m_formats(
|
||||||
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||||
transform_source=transform_source, fatal=fatal)
|
transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
|
||||||
|
|
||||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||||
fatal=True):
|
fatal=True, m3u8_id=None):
|
||||||
|
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
||||||
|
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
||||||
|
if akamai_pv is not None and ';' in akamai_pv.text:
|
||||||
|
playerVerificationChallenge = akamai_pv.text.split(';')[0]
|
||||||
|
if playerVerificationChallenge.strip() != '':
|
||||||
|
return []
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
manifest_version = '1.0'
|
manifest_version = '1.0'
|
||||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
|
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
|
||||||
@ -1022,9 +1029,26 @@ class InfoExtractor(object):
|
|||||||
'base URL', default=None)
|
'base URL', default=None)
|
||||||
if base_url:
|
if base_url:
|
||||||
base_url = base_url.strip()
|
base_url = base_url.strip()
|
||||||
|
|
||||||
|
bootstrap_info = xpath_text(
|
||||||
|
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||||
|
'bootstrap info', default=None)
|
||||||
|
|
||||||
for i, media_el in enumerate(media_nodes):
|
for i, media_el in enumerate(media_nodes):
|
||||||
|
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||||
|
width = int_or_none(media_el.attrib.get('width'))
|
||||||
|
height = int_or_none(media_el.attrib.get('height'))
|
||||||
|
format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
|
||||||
|
# If <bootstrapInfo> is present, the specified f4m is a
|
||||||
|
# stream-level manifest, and only set-level manifests may refer to
|
||||||
|
# external resources. See section 11.4 and section 4 of F4M spec
|
||||||
|
if bootstrap_info is None:
|
||||||
|
media_url = None
|
||||||
|
# @href is introduced in 2.0, see section 11.6 of F4M spec
|
||||||
if manifest_version == '2.0':
|
if manifest_version == '2.0':
|
||||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
media_url = media_el.attrib.get('href')
|
||||||
|
if media_url is None:
|
||||||
|
media_url = media_el.attrib.get('url')
|
||||||
if not media_url:
|
if not media_url:
|
||||||
continue
|
continue
|
||||||
manifest_url = (
|
manifest_url = (
|
||||||
@ -1034,29 +1058,43 @@ class InfoExtractor(object):
|
|||||||
# since bitrates in parent manifest (this one) and media_url manifest
|
# since bitrates in parent manifest (this one) and media_url manifest
|
||||||
# may differ leading to inability to resolve the format by requested
|
# may differ leading to inability to resolve the format by requested
|
||||||
# bitrate in f4m downloader
|
# bitrate in f4m downloader
|
||||||
if determine_ext(manifest_url) == 'f4m':
|
ext = determine_ext(manifest_url)
|
||||||
formats.extend(self._extract_f4m_formats(
|
if ext == 'f4m':
|
||||||
|
f4m_formats = self._extract_f4m_formats(
|
||||||
manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||||
transform_source=transform_source, fatal=fatal))
|
transform_source=transform_source, fatal=fatal)
|
||||||
|
# Sometimes stream-level manifest contains single media entry that
|
||||||
|
# does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
|
||||||
|
# At the same time parent's media entry in set-level manifest may
|
||||||
|
# contain it. We will copy it from parent in such cases.
|
||||||
|
if len(f4m_formats) == 1:
|
||||||
|
f = f4m_formats[0]
|
||||||
|
f.update({
|
||||||
|
'tbr': f.get('tbr') or tbr,
|
||||||
|
'width': f.get('width') or width,
|
||||||
|
'height': f.get('height') or height,
|
||||||
|
'format_id': f.get('format_id') if not tbr else format_id,
|
||||||
|
})
|
||||||
|
formats.extend(f4m_formats)
|
||||||
|
continue
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
manifest_url, video_id, 'mp4', preference=preference,
|
||||||
|
m3u8_id=m3u8_id, fatal=fatal))
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
'format_id': format_id,
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv' if bootstrap_info else None,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': int_or_none(media_el.attrib.get('width')),
|
'width': width,
|
||||||
'height': int_or_none(media_el.attrib.get('height')),
|
'height': height,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
})
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
|
||||||
entry_protocol='m3u8', preference=None,
|
return {
|
||||||
m3u8_id=None, note=None, errnote=None,
|
|
||||||
fatal=True):
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
@ -1064,7 +1102,14 @@ class InfoExtractor(object):
|
|||||||
'preference': preference - 1 if preference else -1,
|
'preference': preference - 1 if preference else -1,
|
||||||
'resolution': 'multiple',
|
'resolution': 'multiple',
|
||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}]
|
}
|
||||||
|
|
||||||
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
|
entry_protocol='m3u8', preference=None,
|
||||||
|
m3u8_id=None, note=None, errnote=None,
|
||||||
|
fatal=True, live=False):
|
||||||
|
|
||||||
|
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||||
|
|
||||||
format_url = lambda u: (
|
format_url = lambda u: (
|
||||||
u
|
u
|
||||||
@ -1131,8 +1176,15 @@ class InfoExtractor(object):
|
|||||||
format_id = []
|
format_id = []
|
||||||
if m3u8_id:
|
if m3u8_id:
|
||||||
format_id.append(m3u8_id)
|
format_id.append(m3u8_id)
|
||||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
|
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
|
||||||
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
|
# Despite specification does not mention NAME attribute for
|
||||||
|
# EXT-X-STREAM-INF it still sometimes may be present
|
||||||
|
stream_name = last_info.get('NAME') or last_media_name
|
||||||
|
# Bandwidth of live streams may differ over time thus making
|
||||||
|
# format_id unpredictable. So it's better to keep provided
|
||||||
|
# format_id intact.
|
||||||
|
if not live:
|
||||||
|
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||||
f = {
|
f = {
|
||||||
'format_id': '-'.join(format_id),
|
'format_id': '-'.join(format_id),
|
||||||
'url': format_url(line.strip()),
|
'url': format_url(line.strip()),
|
||||||
@ -1264,21 +1316,21 @@ class InfoExtractor(object):
|
|||||||
m3u8_count = 0
|
m3u8_count = 0
|
||||||
|
|
||||||
srcs = []
|
srcs = []
|
||||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
|
||||||
for video in videos:
|
for medium in media:
|
||||||
src = video.get('src')
|
src = medium.get('src')
|
||||||
if not src or src in srcs:
|
if not src or src in srcs:
|
||||||
continue
|
continue
|
||||||
srcs.append(src)
|
srcs.append(src)
|
||||||
|
|
||||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
|
||||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
|
||||||
width = int_or_none(video.get('width'))
|
width = int_or_none(medium.get('width'))
|
||||||
height = int_or_none(video.get('height'))
|
height = int_or_none(medium.get('height'))
|
||||||
proto = video.get('proto')
|
proto = medium.get('proto')
|
||||||
ext = video.get('ext')
|
ext = medium.get('ext')
|
||||||
src_ext = determine_ext(src)
|
src_ext = determine_ext(src)
|
||||||
streamer = video.get('streamer') or base
|
streamer = medium.get('streamer') or base
|
||||||
|
|
||||||
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
||||||
rtmp_count += 1
|
rtmp_count += 1
|
||||||
|
143
youtube_dl/extractor/coub.py
Normal file
143
youtube_dl/extractor/coub.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CoubIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://coub.com/view/5u5n1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5u5n1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Matrix Moonwalk',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 4.6,
|
||||||
|
'timestamp': 1428527772,
|
||||||
|
'upload_date': '20150408',
|
||||||
|
'uploader': 'Артём Лоскутников',
|
||||||
|
'uploader_id': 'artyom.loskutnikov',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'coub:5u5n1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# longer video id
|
||||||
|
'url': 'http://coub.com/view/237d5l5h',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
coub = self._download_json(
|
||||||
|
'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
|
||||||
|
|
||||||
|
if coub.get('error'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
|
||||||
|
|
||||||
|
title = coub['title']
|
||||||
|
|
||||||
|
file_versions = coub['file_versions']
|
||||||
|
|
||||||
|
QUALITIES = ('low', 'med', 'high')
|
||||||
|
|
||||||
|
MOBILE = 'mobile'
|
||||||
|
IPHONE = 'iphone'
|
||||||
|
HTML5 = 'html5'
|
||||||
|
|
||||||
|
SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
|
||||||
|
|
||||||
|
quality_key = qualities(QUALITIES)
|
||||||
|
preference_key = qualities(SOURCE_PREFERENCE)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for kind, items in file_versions.get(HTML5, {}).items():
|
||||||
|
if kind not in ('video', 'audio'):
|
||||||
|
continue
|
||||||
|
if not isinstance(items, dict):
|
||||||
|
continue
|
||||||
|
for quality, item in items.items():
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
item_url = item.get('url')
|
||||||
|
if not item_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': item_url,
|
||||||
|
'format_id': '%s-%s-%s' % (HTML5, kind, quality),
|
||||||
|
'filesize': int_or_none(item.get('size')),
|
||||||
|
'vcodec': 'none' if kind == 'audio' else None,
|
||||||
|
'quality': quality_key(quality),
|
||||||
|
'preference': preference_key(HTML5),
|
||||||
|
})
|
||||||
|
|
||||||
|
iphone_url = file_versions.get(IPHONE, {}).get('url')
|
||||||
|
if iphone_url:
|
||||||
|
formats.append({
|
||||||
|
'url': iphone_url,
|
||||||
|
'format_id': IPHONE,
|
||||||
|
'preference': preference_key(IPHONE),
|
||||||
|
})
|
||||||
|
|
||||||
|
mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
|
||||||
|
if mobile_url:
|
||||||
|
formats.append({
|
||||||
|
'url': mobile_url,
|
||||||
|
'format_id': '%s-audio' % MOBILE,
|
||||||
|
'preference': preference_key(MOBILE),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = coub.get('picture')
|
||||||
|
duration = float_or_none(coub.get('duration'))
|
||||||
|
timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
|
||||||
|
uploader = coub.get('channel', {}).get('title')
|
||||||
|
uploader_id = coub.get('channel', {}).get('permalink')
|
||||||
|
|
||||||
|
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
|
||||||
|
like_count = int_or_none(coub.get('likes_count'))
|
||||||
|
repost_count = int_or_none(coub.get('recoubs_count'))
|
||||||
|
comment_count = int_or_none(coub.get('comments_count'))
|
||||||
|
|
||||||
|
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
|
||||||
|
if age_restricted is not None:
|
||||||
|
age_limit = 18 if age_restricted is True else 0
|
||||||
|
else:
|
||||||
|
age_limit = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'repost_count': repost_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -11,7 +11,6 @@ from math import pow, sqrt, floor
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@ -27,6 +26,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
extract_attributes,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
@ -306,28 +306,36 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
|
r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
|
||||||
'video_uploader', fatal=False)
|
'video_uploader', fatal=False)
|
||||||
|
|
||||||
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
available_fmts = []
|
||||||
playerdata_req = sanitized_Request(playerdata_url)
|
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||||
playerdata_req.data = urlencode_postdata({'current_page': webpage_url})
|
attrs = extract_attributes(a)
|
||||||
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
href = attrs.get('href')
|
||||||
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
|
if href and '/freetrial' in href:
|
||||||
|
continue
|
||||||
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
|
available_fmts.append(fmt)
|
||||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
if not available_fmts:
|
||||||
|
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||||
|
available_fmts = re.findall(p, webpage)
|
||||||
|
if available_fmts:
|
||||||
|
break
|
||||||
|
video_encode_ids = []
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
for fmt in available_fmts:
|
||||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||||
video_format = fmt + 'p'
|
video_format = fmt + 'p'
|
||||||
streamdata_req = sanitized_Request(
|
streamdata_req = sanitized_Request(
|
||||||
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
|
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
|
||||||
% (stream_id, stream_format, stream_quality),
|
% (video_id, stream_format, stream_quality),
|
||||||
compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
|
compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
|
||||||
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
streamdata = self._download_xml(
|
streamdata = self._download_xml(
|
||||||
streamdata_req, video_id,
|
streamdata_req, video_id,
|
||||||
note='Downloading media info for %s' % video_format)
|
note='Downloading media info for %s' % video_format)
|
||||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||||
|
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||||
|
if video_encode_id in video_encode_ids:
|
||||||
|
continue
|
||||||
|
video_encode_ids.append(video_encode_id)
|
||||||
video_url = xpath_text(stream_info, './host')
|
video_url = xpath_text(stream_info, './host')
|
||||||
video_play_path = xpath_text(stream_info, './file')
|
video_play_path = xpath_text(stream_info, './file')
|
||||||
if not video_url or not video_play_path:
|
if not video_url or not video_play_path:
|
||||||
@ -359,6 +367,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
})
|
})
|
||||||
formats.append(format_info)
|
formats.append(format_info)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
metadata = self._download_xml(
|
||||||
|
'http://www.crunchyroll.com/xml', video_id,
|
||||||
|
note='Downloading media info', query={
|
||||||
|
'req': 'RpcApiVideoPlayer_GetMediaMetadata',
|
||||||
|
'media_id': video_id,
|
||||||
|
})
|
||||||
|
|
||||||
subtitles = self.extract_subtitles(video_id, webpage)
|
subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
|
|
||||||
@ -366,9 +382,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
|
'series': xpath_text(metadata, 'series_title'),
|
||||||
|
'episode': xpath_text(metadata, 'episode_title'),
|
||||||
|
'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CWTVIE(InfoExtractor):
|
class CWTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
_VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -48,6 +48,9 @@ class CWTVIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
61
youtube_dl/extractor/dailymail.py
Normal file
61
youtube_dl/extractor/dailymail.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
determine_protocol,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DailyMailIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
|
||||||
|
'md5': '2f639d446394f53f3a33658b518b6615',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1288527',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Turn any video into an impressionist masterpiece',
|
||||||
|
'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_data = self._parse_json(self._search_regex(
|
||||||
|
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||||
|
title = video_data['title']
|
||||||
|
video_sources = self._download_json(video_data.get(
|
||||||
|
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for rendition in video_sources['renditions']:
|
||||||
|
rendition_url = rendition.get('url')
|
||||||
|
if not rendition_url:
|
||||||
|
continue
|
||||||
|
tbr = int_or_none(rendition.get('encodingRate'), 1000)
|
||||||
|
container = rendition.get('videoContainer')
|
||||||
|
is_hls = container == 'M2TS'
|
||||||
|
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
|
||||||
|
formats.append({
|
||||||
|
'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''),
|
||||||
|
'url': rendition_url,
|
||||||
|
'width': int_or_none(rendition.get('frameWidth')),
|
||||||
|
'height': int_or_none(rendition.get('frameHeight')),
|
||||||
|
'tbr': tbr,
|
||||||
|
'vcodec': rendition.get('videoCodec'),
|
||||||
|
'container': container,
|
||||||
|
'protocol': protocol,
|
||||||
|
'ext': 'mp4' if is_hls else None,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('descr'),
|
||||||
|
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -12,39 +12,46 @@ class DFBIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
|
'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
|
||||||
# The md5 is different each time
|
'md5': 'ac0f98a52a330f700b4b3034ad240649',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '11633',
|
'id': '11633',
|
||||||
'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
|
'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
|
'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
|
||||||
'upload_date': '20150714',
|
'upload_date': '20150714',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
video_id = mobj.group('id')
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
player_info = self._download_xml(
|
player_info = self._download_xml(
|
||||||
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
|
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
|
||||||
display_id)
|
display_id)
|
||||||
video_info = player_info.find('video')
|
video_info = player_info.find('video')
|
||||||
|
stream_access_url = self._proto_relative_url(video_info.find('url').text.strip())
|
||||||
|
|
||||||
f4m_info = self._download_xml(
|
formats = []
|
||||||
self._proto_relative_url(video_info.find('url').text.strip()), display_id)
|
# see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats
|
||||||
token_el = f4m_info.find('token')
|
for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'):
|
||||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
stream_access_info = self._download_xml(sa_url, display_id)
|
||||||
formats = self._extract_f4m_formats(manifest_url, display_id)
|
token_el = stream_access_info.find('token')
|
||||||
|
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth']
|
||||||
|
if '.f4m' in manifest_url:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
manifest_url + '&hdcore=3.2.0',
|
||||||
|
display_id, f4m_id='hds', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
manifest_url, display_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video_info.find('title').text,
|
'title': video_info.find('title').text,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id,
|
||||||
'upload_date': unified_strdate(video_info.find('time_date').text),
|
'upload_date': unified_strdate(video_info.find('time_date').text),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,7 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
'duration': 156,
|
'duration': 156,
|
||||||
'timestamp': 1302032462,
|
'timestamp': 1302032462,
|
||||||
'upload_date': '20110405',
|
'upload_date': '20110405',
|
||||||
|
'uploader_id': '103207',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
@ -54,7 +55,11 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
'upload_date': '20140725',
|
'upload_date': '20140725',
|
||||||
'timestamp': 1406246400,
|
'timestamp': 1406246400,
|
||||||
'duration': 116,
|
'duration': 116,
|
||||||
|
'uploader_id': '103207',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires ffmpeg
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -66,13 +71,19 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for idx, video_info in enumerate(info['playlist']):
|
for idx, video_info in enumerate(info['playlist']):
|
||||||
formats = self._extract_m3u8_formats(
|
subtitles = {}
|
||||||
video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
|
caption_url = video_info.get('captionsUrl')
|
||||||
note='Download m3u8 information for video %d' % (idx + 1))
|
if caption_url:
|
||||||
self._sort_formats(formats)
|
subtitles = {
|
||||||
|
'en': [{
|
||||||
|
'url': caption_url,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
|
||||||
'id': compat_str(video_info['id']),
|
'id': compat_str(video_info['id']),
|
||||||
'formats': formats,
|
|
||||||
'title': video_info['title'],
|
'title': video_info['title'],
|
||||||
'description': video_info.get('description'),
|
'description': video_info.get('description'),
|
||||||
'duration': parse_duration(video_info.get('video_length')),
|
'duration': parse_duration(video_info.get('video_length')),
|
||||||
@ -80,6 +91,7 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
'thumbnail': video_info.get('thumbnailURL'),
|
'thumbnail': video_info.get('thumbnailURL'),
|
||||||
'alt_title': video_info.get('secondary_title'),
|
'alt_title': video_info.get('secondary_title'),
|
||||||
'timestamp': parse_iso8601(video_info.get('publishedDate')),
|
'timestamp': parse_iso8601(video_info.get('publishedDate')),
|
||||||
|
'subtitles': subtitles,
|
||||||
})
|
})
|
||||||
|
|
||||||
return self.playlist_result(entries, display_id, video_title)
|
return self.playlist_result(entries, display_id, video_title)
|
||||||
|
114
youtube_dl/extractor/dispeak.py
Normal file
114
youtube_dl/extractor/dispeak.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
remove_end,
|
||||||
|
xpath_element,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DigitallySpeakingIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
|
||||||
|
'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
|
||||||
|
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '840376_BQRC',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tenacious Design and The Interface of \'Destiny\'',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
|
||||||
|
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _parse_mp4(self, metadata):
|
||||||
|
video_formats = []
|
||||||
|
video_root = None
|
||||||
|
|
||||||
|
mp4_video = xpath_text(metadata, './mp4video', default=None)
|
||||||
|
if mp4_video is not None:
|
||||||
|
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
|
||||||
|
video_root = mobj.group('root')
|
||||||
|
if video_root is None:
|
||||||
|
http_host = xpath_text(metadata, 'httpHost', default=None)
|
||||||
|
if http_host:
|
||||||
|
video_root = 'http://%s/' % http_host
|
||||||
|
if video_root is None:
|
||||||
|
# Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
|
||||||
|
# Works for GPUTechConf, too
|
||||||
|
video_root = 'http://s3-2u.digitallyspeaking.com/'
|
||||||
|
|
||||||
|
formats = metadata.findall('./MBRVideos/MBRVideo')
|
||||||
|
if not formats:
|
||||||
|
return None
|
||||||
|
for a_format in formats:
|
||||||
|
stream_name = xpath_text(a_format, 'streamName', fatal=True)
|
||||||
|
video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
|
||||||
|
url = video_root + video_path
|
||||||
|
vbr = xpath_text(a_format, 'bitrate')
|
||||||
|
video_formats.append({
|
||||||
|
'url': url,
|
||||||
|
'vbr': int_or_none(vbr),
|
||||||
|
})
|
||||||
|
return video_formats
|
||||||
|
|
||||||
|
def _parse_flv(self, metadata):
|
||||||
|
formats = []
|
||||||
|
akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
|
||||||
|
audios = metadata.findall('./audios/audio')
|
||||||
|
for audio in audios:
|
||||||
|
formats.append({
|
||||||
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
|
'play_path': remove_end(audio.get('url'), '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'format_id': audio.get('code'),
|
||||||
|
})
|
||||||
|
slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
|
||||||
|
formats.append({
|
||||||
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
|
'play_path': remove_end(slide_video_path, '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_note': 'slide deck video',
|
||||||
|
'quality': -2,
|
||||||
|
'preference': -2,
|
||||||
|
'format_id': 'slides',
|
||||||
|
})
|
||||||
|
speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
|
||||||
|
formats.append({
|
||||||
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
|
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_note': 'speaker video',
|
||||||
|
'quality': -1,
|
||||||
|
'preference': -1,
|
||||||
|
'format_id': 'speaker',
|
||||||
|
})
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
xml_description = self._download_xml(url, video_id)
|
||||||
|
metadata = xpath_element(xml_description, 'metadata')
|
||||||
|
|
||||||
|
video_formats = self._parse_mp4(metadata)
|
||||||
|
if video_formats is None:
|
||||||
|
video_formats = self._parse_flv(metadata)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': video_formats,
|
||||||
|
'title': xpath_text(metadata, 'title', fatal=True),
|
||||||
|
'duration': parse_duration(xpath_text(metadata, 'endTime')),
|
||||||
|
'creator': xpath_text(metadata, 'speaker'),
|
||||||
|
}
|
@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'display_id': 'iseven',
|
'display_id': 'iseven',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
|
'description': 're:.*m7show@163\.com.*',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'uploader': '7师傅',
|
'uploader': '7师傅',
|
||||||
'uploader_id': '431925',
|
'uploader_id': '431925',
|
||||||
@ -43,7 +43,7 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Romm not found',
|
'skip': 'Room not found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.douyutv.com/17732',
|
'url': 'http://www.douyutv.com/17732',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -51,7 +51,7 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
'display_id': '17732',
|
'display_id': '17732',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
|
'description': 're:.*m7show@163\.com.*',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'uploader': '7师傅',
|
'uploader': '7师傅',
|
||||||
'uploader_id': '431925',
|
'uploader_id': '431925',
|
||||||
@ -75,13 +75,28 @@ class DouyuTVIE(InfoExtractor):
|
|||||||
room_id = self._html_search_regex(
|
room_id = self._html_search_regex(
|
||||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||||
|
|
||||||
|
config = None
|
||||||
|
# Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache"
|
||||||
|
# Retry with different parameters - same parameters cause same errors
|
||||||
|
for i in range(5):
|
||||||
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||||
room_id, int(time.time()))
|
room_id, int(time.time()))
|
||||||
|
|
||||||
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||||
config = self._download_json(
|
|
||||||
|
config_page = self._download_webpage(
|
||||||
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||||
video_id)
|
video_id)
|
||||||
|
try:
|
||||||
|
config = self._parse_json(config_page, video_id, fatal=False)
|
||||||
|
except ExtractorError:
|
||||||
|
# Wait some time before retrying to get a different time() value
|
||||||
|
self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. '
|
||||||
|
'Waiting for %(timeout)s seconds before retrying')
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
if config is None:
|
||||||
|
raise ExtractorError('Unable to fetch API result')
|
||||||
|
|
||||||
data = config['data']
|
data = config['data']
|
||||||
|
|
||||||
|
@ -6,13 +6,18 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..compat import compat_urlparse
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DPlayIE(InfoExtractor):
|
class DPlayIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# geo restricted, via direct unsigned hls URL
|
||||||
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
|
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1255600',
|
'id': '1255600',
|
||||||
@ -31,11 +36,12 @@ class DPlayIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
|
# non geo restricted, via secure api, unsigned download hls URL
|
||||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3172',
|
'id': '3172',
|
||||||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
|
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Svensken lär sig njuta av livet',
|
'title': 'Svensken lär sig njuta av livet',
|
||||||
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
||||||
'duration': 2650,
|
'duration': 2650,
|
||||||
@ -48,23 +54,25 @@ class DPlayIE(InfoExtractor):
|
|||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# geo restricted, via secure api, unsigned download hls URL
|
||||||
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '70816',
|
'id': '70816',
|
||||||
'display_id': 'season-6-episode-12',
|
'display_id': 'season-6-episode-12',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Episode 12',
|
'title': 'Episode 12',
|
||||||
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
||||||
'duration': 2563,
|
'duration': 2563,
|
||||||
'timestamp': 1429696800,
|
'timestamp': 1429696800,
|
||||||
'upload_date': '20150422',
|
'upload_date': '20150422',
|
||||||
'creator': 'Kanal 4',
|
'creator': 'Kanal 4 (Home)',
|
||||||
'series': 'Mig og min mor',
|
'series': 'Mig og min mor',
|
||||||
'season_number': 6,
|
'season_number': 6,
|
||||||
'episode_number': 12,
|
'episode_number': 12,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# geo restricted, via direct unsigned hls URL
|
||||||
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
@ -90,17 +98,24 @@ class DPlayIE(InfoExtractor):
|
|||||||
|
|
||||||
def extract_formats(protocol, manifest_url):
|
def extract_formats(protocol, manifest_url):
|
||||||
if protocol == 'hls':
|
if protocol == 'hls':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
manifest_url, video_id, ext='mp4',
|
manifest_url, video_id, ext='mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False))
|
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
|
||||||
|
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
|
||||||
|
# ourselves
|
||||||
|
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
|
||||||
|
for m3u8_format in m3u8_formats:
|
||||||
|
m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
elif protocol == 'hds':
|
elif protocol == 'hds':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
|
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
|
||||||
video_id, f4m_id=protocol, fatal=False))
|
video_id, f4m_id=protocol, fatal=False))
|
||||||
|
|
||||||
domain_tld = domain.split('.')[-1]
|
domain_tld = domain.split('.')[-1]
|
||||||
if domain_tld in ('se', 'dk'):
|
if domain_tld in ('se', 'dk', 'no'):
|
||||||
for protocol in PROTOCOLS:
|
for protocol in PROTOCOLS:
|
||||||
|
# Providing dsc-geo allows to bypass geo restriction in some cases
|
||||||
self._set_cookie(
|
self._set_cookie(
|
||||||
'secure.dplay.%s' % domain_tld, 'dsc-geo',
|
'secure.dplay.%s' % domain_tld, 'dsc-geo',
|
||||||
json.dumps({
|
json.dumps({
|
||||||
@ -113,13 +128,24 @@ class DPlayIE(InfoExtractor):
|
|||||||
'Downloading %s stream JSON' % protocol, fatal=False)
|
'Downloading %s stream JSON' % protocol, fatal=False)
|
||||||
if stream and stream.get(protocol):
|
if stream and stream.get(protocol):
|
||||||
extract_formats(protocol, stream[protocol])
|
extract_formats(protocol, stream[protocol])
|
||||||
else:
|
|
||||||
|
# The last resort is to try direct unsigned hls/hds URLs from info dictionary.
|
||||||
|
# Sometimes this does work even when secure API with dsc-geo has failed (e.g.
|
||||||
|
# http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
|
||||||
|
if not formats:
|
||||||
for protocol in PROTOCOLS:
|
for protocol in PROTOCOLS:
|
||||||
if info.get(protocol):
|
if info.get(protocol):
|
||||||
extract_formats(protocol, info[protocol])
|
extract_formats(protocol, info[protocol])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for lang in ('se', 'sv', 'da', 'nl', 'no'):
|
||||||
|
for format_id in ('web_vtt', 'vtt', 'srt'):
|
||||||
|
subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id))
|
||||||
|
if subtitle_url:
|
||||||
|
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
@ -133,4 +159,5 @@ class DPlayIE(InfoExtractor):
|
|||||||
'episode_number': int_or_none(info.get('episode')),
|
'episode_number': int_or_none(info.get('episode')),
|
||||||
'age_limit': int_or_none(info.get('minimum_age')),
|
'age_limit': int_or_none(info.get('minimum_age')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@ -1,39 +0,0 @@
|
|||||||
# encoding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class DumpIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.dump.com/oneus/',
|
|
||||||
'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'oneus',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': "He's one of us.",
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
m = re.match(self._VALID_URL, url)
|
|
||||||
video_id = m.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
video_url = self._search_regex(
|
|
||||||
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'url': video_url,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
@ -2,13 +2,16 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
|
|
||||||
|
|
||||||
class DWIE(InfoExtractor):
|
class DWIE(InfoExtractor):
|
||||||
IE_NAME = 'dw'
|
IE_NAME = 'dw'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video
|
# video
|
||||||
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
|
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
|
||||||
@ -31,6 +34,16 @@ class DWIE(InfoExtractor):
|
|||||||
'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
|
'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
|
||||||
'upload_date': '20160311',
|
'upload_date': '20160311',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
|
||||||
|
'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '19274438',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Welcome to the 90s – Hip Hop',
|
||||||
|
'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
|
||||||
|
'upload_date': '20160521',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -38,6 +51,7 @@ class DWIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, media_id)
|
webpage = self._download_webpage(url, media_id)
|
||||||
hidden_inputs = self._hidden_inputs(webpage)
|
hidden_inputs = self._hidden_inputs(webpage)
|
||||||
title = hidden_inputs['media_title']
|
title = hidden_inputs['media_title']
|
||||||
|
media_id = hidden_inputs.get('media_id') or media_id
|
||||||
|
|
||||||
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
||||||
formats = self._extract_smil_formats(
|
formats = self._extract_smil_formats(
|
||||||
@ -49,13 +63,20 @@ class DWIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
formats = [{'url': hidden_inputs['file_name']}]
|
formats = [{'url': hidden_inputs['file_name']}]
|
||||||
|
|
||||||
|
upload_date = hidden_inputs.get('display_date')
|
||||||
|
if not upload_date:
|
||||||
|
upload_date = self._html_search_regex(
|
||||||
|
r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
|
||||||
|
'upload date', default=None)
|
||||||
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': hidden_inputs.get('preview_image'),
|
'thumbnail': hidden_inputs.get('preview_image'),
|
||||||
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
||||||
'upload_date': hidden_inputs.get('display_date'),
|
'upload_date': upload_date,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_basename,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -21,7 +23,7 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# http://lenta.ru/news/2015/03/06/navalny/
|
# http://lenta.ru/news/2015/03/06/navalny/
|
||||||
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
|
||||||
'md5': '70f5187fb620f2c1d503b3b22fd4efe3',
|
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '227304',
|
'id': '227304',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -36,7 +38,7 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
# http://muz-tv.ru/play/7129/
|
# http://muz-tv.ru/play/7129/
|
||||||
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
|
||||||
'url': 'eagleplatform:media.clipyou.ru:12820',
|
'url': 'eagleplatform:media.clipyou.ru:12820',
|
||||||
'md5': '90b26344ba442c8e44aa4cf8f301164a',
|
'md5': '358597369cf8ba56675c1df15e7af624',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '12820',
|
'id': '12820',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -55,8 +57,13 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||||
|
|
||||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||||
|
try:
|
||||||
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if isinstance(ee.cause, compat_HTTPError):
|
||||||
|
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
|
||||||
self._handle_error(response)
|
self._handle_error(response)
|
||||||
|
raise
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||||
@ -84,17 +91,33 @@ class EaglePlatformIE(InfoExtractor):
|
|||||||
|
|
||||||
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
|
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
||||||
formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id,
|
m3u8_url, video_id,
|
||||||
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
|
||||||
mp4_url = self._get_video_url(
|
mp4_url = self._get_video_url(
|
||||||
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
||||||
# http://lentaru.media.eagleplatform.com/player/player.js
|
# http://lentaru.media.eagleplatform.com/player/player.js
|
||||||
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
|
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
|
||||||
video_id, 'Downloading mp4 JSON')
|
video_id, 'Downloading mp4 JSON')
|
||||||
formats.append({'url': mp4_url, 'format_id': 'mp4'})
|
mp4_url_basename = url_basename(mp4_url)
|
||||||
|
for m3u8_format in m3u8_formats:
|
||||||
|
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
|
||||||
|
if mobj:
|
||||||
|
http_format = m3u8_format.copy()
|
||||||
|
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
|
||||||
|
if not self._is_valid_url(video_url, video_id):
|
||||||
|
continue
|
||||||
|
http_format.update({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||||
|
'protocol': 'http',
|
||||||
|
})
|
||||||
|
formats.append(http_format)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -11,8 +11,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class EpornerIE(InfoExtractor):
|
class EpornerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -23,8 +23,12 @@ class EpornerIE(InfoExtractor):
|
|||||||
'duration': 1838,
|
'duration': 1838,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
}
|
}, {
|
||||||
|
# New (May 2016) URL layout
|
||||||
|
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||||
|
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
|
|||||||
'description': None,
|
'description': None,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'add_ie': ['OoyalaExternal'],
|
||||||
}, {
|
}, {
|
||||||
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
||||||
'url': 'http://espn.go.com/video/clip?id=2743663',
|
'url': 'http://espn.go.com/video/clip?id=2743663',
|
||||||
|
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
|
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Must-See Moments: Best of the MLS season',
|
'title': 'Must-See Moments: Best of the MLS season',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'add_ie': ['OoyalaExternal'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -3,6 +3,10 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .abc import ABCIE
|
from .abc import ABCIE
|
||||||
from .abc7news import Abc7NewsIE
|
from .abc7news import Abc7NewsIE
|
||||||
|
from .abcnews import (
|
||||||
|
AbcNewsIE,
|
||||||
|
AbcNewsVideoIE,
|
||||||
|
)
|
||||||
from .academicearth import AcademicEarthCourseIE
|
from .academicearth import AcademicEarthCourseIE
|
||||||
from .acast import (
|
from .acast import (
|
||||||
ACastIE,
|
ACastIE,
|
||||||
@ -46,11 +50,13 @@ from .arte import (
|
|||||||
ArteTVPlus7IE,
|
ArteTVPlus7IE,
|
||||||
ArteTVCreativeIE,
|
ArteTVCreativeIE,
|
||||||
ArteTVConcertIE,
|
ArteTVConcertIE,
|
||||||
|
ArteTVInfoIE,
|
||||||
ArteTVFutureIE,
|
ArteTVFutureIE,
|
||||||
ArteTVCinemaIE,
|
ArteTVCinemaIE,
|
||||||
ArteTVDDCIE,
|
ArteTVDDCIE,
|
||||||
ArteTVMagazineIE,
|
ArteTVMagazineIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
from .atresplayer import AtresPlayerIE
|
from .atresplayer import AtresPlayerIE
|
||||||
from .atttechchannel import ATTTechChannelIE
|
from .atttechchannel import ATTTechChannelIE
|
||||||
@ -74,6 +80,7 @@ from .bigflix import BigflixIE
|
|||||||
from .bild import BildIE
|
from .bild import BildIE
|
||||||
from .bilibili import BiliBiliIE
|
from .bilibili import BiliBiliIE
|
||||||
from .biobiochiletv import BioBioChileTVIE
|
from .biobiochiletv import BioBioChileTVIE
|
||||||
|
from .biqle import BIQLEIE
|
||||||
from .bleacherreport import (
|
from .bleacherreport import (
|
||||||
BleacherReportIE,
|
BleacherReportIE,
|
||||||
BleacherReportCMSIE,
|
BleacherReportCMSIE,
|
||||||
@ -105,6 +112,7 @@ from .cbc import (
|
|||||||
CBCPlayerIE,
|
CBCPlayerIE,
|
||||||
)
|
)
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
|
from .cbslocal import CBSLocalIE
|
||||||
from .cbsinteractive import CBSInteractiveIE
|
from .cbsinteractive import CBSInteractiveIE
|
||||||
from .cbsnews import (
|
from .cbsnews import (
|
||||||
CBSNewsIE,
|
CBSNewsIE,
|
||||||
@ -122,7 +130,6 @@ from .chirbit import (
|
|||||||
ChirbitProfileIE,
|
ChirbitProfileIE,
|
||||||
)
|
)
|
||||||
from .cinchcast import CinchcastIE
|
from .cinchcast import CinchcastIE
|
||||||
from .cinemassacre import CinemassacreIE
|
|
||||||
from .cliprs import ClipRsIE
|
from .cliprs import ClipRsIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
@ -137,7 +144,7 @@ from .cnn import (
|
|||||||
CNNBlogsIE,
|
CNNBlogsIE,
|
||||||
CNNArticleIE,
|
CNNArticleIE,
|
||||||
)
|
)
|
||||||
from .collegehumor import CollegeHumorIE
|
from .coub import CoubIE
|
||||||
from .collegerama import CollegeRamaIE
|
from .collegerama import CollegeRamaIE
|
||||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .comcarcoff import ComCarCoffIE
|
from .comcarcoff import ComCarCoffIE
|
||||||
@ -156,6 +163,7 @@ from .cspan import CSpanIE
|
|||||||
from .ctsnews import CtsNewsIE
|
from .ctsnews import CtsNewsIE
|
||||||
from .cultureunplugged import CultureUnpluggedIE
|
from .cultureunplugged import CultureUnpluggedIE
|
||||||
from .cwtv import CWTVIE
|
from .cwtv import CWTVIE
|
||||||
|
from .dailymail import DailyMailIE
|
||||||
from .dailymotion import (
|
from .dailymotion import (
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
@ -192,10 +200,10 @@ from .drbonanza import DRBonanzaIE
|
|||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
from .drtv import DRTVIE
|
from .drtv import DRTVIE
|
||||||
from .dvtv import DVTVIE
|
from .dvtv import DVTVIE
|
||||||
from .dump import DumpIE
|
|
||||||
from .dumpert import DumpertIE
|
from .dumpert import DumpertIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
|
from .dispeak import DigitallySpeakingIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
from .dw import (
|
from .dw import (
|
||||||
DWIE,
|
DWIE,
|
||||||
@ -225,6 +233,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
|||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
from .expotv import ExpoTVIE
|
from .expotv import ExpoTVIE
|
||||||
from .extremetube import ExtremeTubeIE
|
from .extremetube import ExtremeTubeIE
|
||||||
|
from .eyedotv import EyedoTVIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
from .fc2 import FC2IE
|
from .fc2 import FC2IE
|
||||||
@ -237,6 +246,7 @@ from .fktv import FKTVIE
|
|||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
from .folketinget import FolketingetIE
|
from .folketinget import FolketingetIE
|
||||||
from .footyroom import FootyRoomIE
|
from .footyroom import FootyRoomIE
|
||||||
|
from .formula1 import Formula1IE
|
||||||
from .fourtube import FourTubeIE
|
from .fourtube import FourTubeIE
|
||||||
from .fox import FOXIE
|
from .fox import FOXIE
|
||||||
from .foxgay import FoxgayIE
|
from .foxgay import FoxgayIE
|
||||||
@ -364,6 +374,7 @@ from .kuwo import (
|
|||||||
)
|
)
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
from .laola1tv import Laola1TvIE
|
from .laola1tv import Laola1TvIE
|
||||||
|
from .learnr import LearnrIE
|
||||||
from .lecture2go import Lecture2GoIE
|
from .lecture2go import Lecture2GoIE
|
||||||
from .lemonde import LemondeIE
|
from .lemonde import LemondeIE
|
||||||
from .leeco import (
|
from .leeco import (
|
||||||
@ -381,6 +392,7 @@ from .limelight import (
|
|||||||
LimelightChannelIE,
|
LimelightChannelIE,
|
||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
|
from .litv import LiTVIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
LivestreamIE,
|
LivestreamIE,
|
||||||
@ -388,6 +400,7 @@ from .livestream import (
|
|||||||
LivestreamShortenerIE,
|
LivestreamShortenerIE,
|
||||||
)
|
)
|
||||||
from .lnkgo import LnkGoIE
|
from .lnkgo import LnkGoIE
|
||||||
|
from .localnews8 import LocalNews8IE
|
||||||
from .lovehomeporn import LoveHomePornIE
|
from .lovehomeporn import LoveHomePornIE
|
||||||
from .lrt import LRTIE
|
from .lrt import LRTIE
|
||||||
from .lynda import (
|
from .lynda import (
|
||||||
@ -399,19 +412,28 @@ from .macgamestore import MacGameStoreIE
|
|||||||
from .mailru import MailRuIE
|
from .mailru import MailRuIE
|
||||||
from .makerschannel import MakersChannelIE
|
from .makerschannel import MakersChannelIE
|
||||||
from .makertv import MakerTVIE
|
from .makertv import MakerTVIE
|
||||||
from .malemotion import MalemotionIE
|
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mgoon import MgoonIE
|
from .mgoon import MgoonIE
|
||||||
|
from .mgtv import MGTVIE
|
||||||
|
from .microsoftvirtualacademy import (
|
||||||
|
MicrosoftVirtualAcademyIE,
|
||||||
|
MicrosoftVirtualAcademyCourseIE,
|
||||||
|
)
|
||||||
from .minhateca import MinhatecaIE
|
from .minhateca import MinhatecaIE
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
from .minoto import MinotoIE
|
from .minoto import MinotoIE
|
||||||
from .miomio import MioMioIE
|
from .miomio import MioMioIE
|
||||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||||
from .mitele import MiTeleIE
|
from .mitele import MiTeleIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import (
|
||||||
|
MixcloudIE,
|
||||||
|
MixcloudUserIE,
|
||||||
|
MixcloudPlaylistIE,
|
||||||
|
MixcloudStreamIE,
|
||||||
|
)
|
||||||
from .mlb import MLBIE
|
from .mlb import MLBIE
|
||||||
from .mnet import MnetIE
|
from .mnet import MnetIE
|
||||||
from .mpora import MporaIE
|
from .mpora import MporaIE
|
||||||
@ -419,7 +441,6 @@ from .moevideo import MoeVideoIE
|
|||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
from .mojvideo import MojvideoIE
|
from .mojvideo import MojvideoIE
|
||||||
from .moniker import MonikerIE
|
from .moniker import MonikerIE
|
||||||
from .mooshare import MooshareIE
|
|
||||||
from .morningstar import MorningstarIE
|
from .morningstar import MorningstarIE
|
||||||
from .motherless import MotherlessIE
|
from .motherless import MotherlessIE
|
||||||
from .motorsport import MotorsportIE
|
from .motorsport import MotorsportIE
|
||||||
@ -433,8 +454,7 @@ from .mtv import (
|
|||||||
)
|
)
|
||||||
from .muenchentv import MuenchenTVIE
|
from .muenchentv import MuenchenTVIE
|
||||||
from .musicplayon import MusicPlayOnIE
|
from .musicplayon import MusicPlayOnIE
|
||||||
from .muzu import MuzuTVIE
|
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||||
from .mwave import MwaveIE
|
|
||||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvi import MyviIE
|
from .myvi import MyviIE
|
||||||
@ -464,7 +484,6 @@ from .ndr import (
|
|||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .netzkino import NetzkinoIE
|
from .netzkino import NetzkinoIE
|
||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
from .nerdist import NerdistIE
|
|
||||||
from .neteasemusic import (
|
from .neteasemusic import (
|
||||||
NetEaseMusicIE,
|
NetEaseMusicIE,
|
||||||
NetEaseMusicAlbumIE,
|
NetEaseMusicAlbumIE,
|
||||||
@ -485,9 +504,10 @@ from .nextmovie import NextMovieIE
|
|||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
from .nfl import NFLIE
|
from .nfl import NFLIE
|
||||||
from .nhl import (
|
from .nhl import (
|
||||||
NHLIE,
|
|
||||||
NHLNewsIE,
|
|
||||||
NHLVideocenterIE,
|
NHLVideocenterIE,
|
||||||
|
NHLNewsIE,
|
||||||
|
NHLVideocenterCategoryIE,
|
||||||
|
NHLIE,
|
||||||
)
|
)
|
||||||
from .nick import NickIE
|
from .nick import NickIE
|
||||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||||
@ -555,12 +575,15 @@ from .pandoratv import PandoraTVIE
|
|||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .periscope import PeriscopeIE
|
from .people import PeopleIE
|
||||||
|
from .periscope import (
|
||||||
|
PeriscopeIE,
|
||||||
|
PeriscopeUserIE,
|
||||||
|
)
|
||||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
from .pinkbike import PinkbikeIE
|
from .pinkbike import PinkbikeIE
|
||||||
from .planetaplay import PlanetaPlayIE
|
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .played import PlayedIE
|
from .played import PlayedIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
@ -596,8 +619,11 @@ from .qqmusic import (
|
|||||||
QQMusicToplistIE,
|
QQMusicToplistIE,
|
||||||
QQMusicPlaylistIE,
|
QQMusicPlaylistIE,
|
||||||
)
|
)
|
||||||
from .quickvid import QuickVidIE
|
|
||||||
from .r7 import R7IE
|
from .r7 import R7IE
|
||||||
|
from .radiocanada import (
|
||||||
|
RadioCanadaIE,
|
||||||
|
RadioCanadaAudioVideoIE,
|
||||||
|
)
|
||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
from .radiojavan import RadioJavanIE
|
from .radiojavan import RadioJavanIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
@ -611,6 +637,7 @@ from .rds import RDSIE
|
|||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .regiotv import RegioTVIE
|
from .regiotv import RegioTVIE
|
||||||
from .restudy import RestudyIE
|
from .restudy import RestudyIE
|
||||||
|
from .reuters import ReutersIE
|
||||||
from .reverbnation import ReverbNationIE
|
from .reverbnation import ReverbNationIE
|
||||||
from .revision3 import Revision3IE
|
from .revision3 import Revision3IE
|
||||||
from .rice import RICEIE
|
from .rice import RICEIE
|
||||||
@ -652,9 +679,9 @@ from .screencastomatic import ScreencastOMaticIE
|
|||||||
from .screenjunkies import ScreenJunkiesIE
|
from .screenjunkies import ScreenJunkiesIE
|
||||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
|
from .sendtonews import SendtoNewsIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
from .sexu import SexuIE
|
from .sexu import SexuIE
|
||||||
from .sexykarma import SexyKarmaIE
|
|
||||||
from .shahid import ShahidIE
|
from .shahid import ShahidIE
|
||||||
from .shared import SharedIE
|
from .shared import SharedIE
|
||||||
from .sharesix import ShareSixIE
|
from .sharesix import ShareSixIE
|
||||||
@ -671,10 +698,6 @@ from .smotri import (
|
|||||||
SmotriUserIE,
|
SmotriUserIE,
|
||||||
SmotriBroadcastIE,
|
SmotriBroadcastIE,
|
||||||
)
|
)
|
||||||
from .snagfilms import (
|
|
||||||
SnagFilmsIE,
|
|
||||||
SnagFilmsEmbedIE,
|
|
||||||
)
|
|
||||||
from .snotr import SnotrIE
|
from .snotr import SnotrIE
|
||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
from .soundcloud import (
|
from .soundcloud import (
|
||||||
@ -726,9 +749,13 @@ from .svt import (
|
|||||||
from .swrmediathek import SWRMediathekIE
|
from .swrmediathek import SWRMediathekIE
|
||||||
from .syfy import SyfyIE
|
from .syfy import SyfyIE
|
||||||
from .sztvhu import SztvHuIE
|
from .sztvhu import SztvHuIE
|
||||||
from .tagesschau import TagesschauIE
|
from .tagesschau import (
|
||||||
|
TagesschauPlayerIE,
|
||||||
|
TagesschauIE,
|
||||||
|
)
|
||||||
from .tapely import TapelyIE
|
from .tapely import TapelyIE
|
||||||
from .tass import TassIE
|
from .tass import TassIE
|
||||||
|
from .tdslifeway import TDSLifewayIE
|
||||||
from .teachertube import (
|
from .teachertube import (
|
||||||
TeacherTubeIE,
|
TeacherTubeIE,
|
||||||
TeacherTubeUserIE,
|
TeacherTubeUserIE,
|
||||||
@ -746,7 +773,6 @@ from .teletask import TeleTaskIE
|
|||||||
from .testurl import TestURLIE
|
from .testurl import TestURLIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .theintercept import TheInterceptIE
|
from .theintercept import TheInterceptIE
|
||||||
from .theonion import TheOnionIE
|
|
||||||
from .theplatform import (
|
from .theplatform import (
|
||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
ThePlatformFeedIE,
|
ThePlatformFeedIE,
|
||||||
@ -756,6 +782,7 @@ from .thesixtyone import TheSixtyOneIE
|
|||||||
from .thestar import TheStarIE
|
from .thestar import TheStarIE
|
||||||
from .thisamericanlife import ThisAmericanLifeIE
|
from .thisamericanlife import ThisAmericanLifeIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
|
from .threeqsdn import ThreeQSDNIE
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .tlc import TlcDeIE
|
from .tlc import TlcDeIE
|
||||||
from .tmz import (
|
from .tmz import (
|
||||||
@ -808,7 +835,10 @@ from .tvc import (
|
|||||||
)
|
)
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvland import TVLandIE
|
from .tvland import TVLandIE
|
||||||
from .tvp import TvpIE, TvpSeriesIE
|
from .tvp import (
|
||||||
|
TVPIE,
|
||||||
|
TVPSeriesIE,
|
||||||
|
)
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
from .tweakers import TweakersIE
|
from .tweakers import TweakersIE
|
||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
@ -823,7 +853,6 @@ from .twitch import (
|
|||||||
TwitchVodIE,
|
TwitchVodIE,
|
||||||
TwitchProfileIE,
|
TwitchProfileIE,
|
||||||
TwitchPastBroadcastsIE,
|
TwitchPastBroadcastsIE,
|
||||||
TwitchBookmarksIE,
|
|
||||||
TwitchStreamIE,
|
TwitchStreamIE,
|
||||||
)
|
)
|
||||||
from .twitter import (
|
from .twitter import (
|
||||||
@ -831,7 +860,6 @@ from .twitter import (
|
|||||||
TwitterIE,
|
TwitterIE,
|
||||||
TwitterAmplifyIE,
|
TwitterAmplifyIE,
|
||||||
)
|
)
|
||||||
from .ubu import UbuIE
|
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
@ -842,14 +870,20 @@ from .unistra import UnistraIE
|
|||||||
from .urort import UrortIE
|
from .urort import UrortIE
|
||||||
from .usatoday import USATodayIE
|
from .usatoday import USATodayIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
from .ustudio import UstudioIE
|
from .ustudio import (
|
||||||
|
UstudioIE,
|
||||||
|
UstudioEmbedIE,
|
||||||
|
)
|
||||||
from .varzesh3 import Varzesh3IE
|
from .varzesh3 import Varzesh3IE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .veehd import VeeHDIE
|
from .veehd import VeeHDIE
|
||||||
from .veoh import VeohIE
|
from .veoh import VeohIE
|
||||||
from .vessel import VesselIE
|
from .vessel import VesselIE
|
||||||
from .vesti import VestiIE
|
from .vesti import VestiIE
|
||||||
from .vevo import VevoIE
|
from .vevo import (
|
||||||
|
VevoIE,
|
||||||
|
VevoPlaylistIE,
|
||||||
|
)
|
||||||
from .vgtv import (
|
from .vgtv import (
|
||||||
BTArticleIE,
|
BTArticleIE,
|
||||||
BTVestlendingenIE,
|
BTVestlendingenIE,
|
||||||
@ -878,6 +912,10 @@ from .vidme import (
|
|||||||
)
|
)
|
||||||
from .vidzi import VidziIE
|
from .vidzi import VidziIE
|
||||||
from .vier import VierIE, VierVideosIE
|
from .vier import VierIE, VierVideosIE
|
||||||
|
from .viewlift import (
|
||||||
|
ViewLiftIE,
|
||||||
|
ViewLiftEmbedIE,
|
||||||
|
)
|
||||||
from .viewster import ViewsterIE
|
from .viewster import ViewsterIE
|
||||||
from .viidea import ViideaIE
|
from .viidea import ViideaIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
@ -914,9 +952,12 @@ from .vube import VubeIE
|
|||||||
from .vuclip import VuClipIE
|
from .vuclip import VuClipIE
|
||||||
from .vulture import VultureIE
|
from .vulture import VultureIE
|
||||||
from .walla import WallaIE
|
from .walla import WallaIE
|
||||||
from .washingtonpost import WashingtonPostIE
|
from .washingtonpost import (
|
||||||
|
WashingtonPostIE,
|
||||||
|
WashingtonPostArticleIE,
|
||||||
|
)
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .wayofthemaster import WayOfTheMasterIE
|
from .watchindianporn import WatchIndianPornIE
|
||||||
from .wdr import (
|
from .wdr import (
|
||||||
WDRIE,
|
WDRIE,
|
||||||
WDRMobileIE,
|
WDRMobileIE,
|
||||||
@ -940,6 +981,12 @@ from .xhamster import (
|
|||||||
XHamsterIE,
|
XHamsterIE,
|
||||||
XHamsterEmbedIE,
|
XHamsterEmbedIE,
|
||||||
)
|
)
|
||||||
|
from .xiami import (
|
||||||
|
XiamiSongIE,
|
||||||
|
XiamiAlbumIE,
|
||||||
|
XiamiArtistIE,
|
||||||
|
XiamiCollectionIE
|
||||||
|
)
|
||||||
from .xminus import XMinusIE
|
from .xminus import XMinusIE
|
||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xstream import XstreamIE
|
from .xstream import XstreamIE
|
||||||
|
64
youtube_dl/extractor/eyedotv.py
Normal file
64
youtube_dl/extractor/eyedotv.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
parse_duration,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EyedoTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
|
||||||
|
'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16301',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Journée du conseil scientifique de l\'Afnic 2015',
|
||||||
|
'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
|
||||||
|
'uploader': 'Afnic Live',
|
||||||
|
'uploader_id': '8023',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ROOT_URL = 'http://live.eyedo.net:1935/'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
def _add_ns(path):
|
||||||
|
return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
|
||||||
|
|
||||||
|
title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
|
||||||
|
state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
|
||||||
|
if state_live_code == 'avenir':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
is_live = state_live_code == 'live'
|
||||||
|
m3u8_url = None
|
||||||
|
# http://eyedo.tv/Content/Html5/Scripts/html5view.js
|
||||||
|
if is_live:
|
||||||
|
if xpath_text(video_data, 'Cdn') == 'true':
|
||||||
|
m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
|
||||||
|
else:
|
||||||
|
m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
|
||||||
|
else:
|
||||||
|
m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
|
||||||
|
'description': xpath_text(video_data, _add_ns('Description')),
|
||||||
|
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
||||||
|
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
||||||
|
'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
|
||||||
|
'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
|
||||||
|
'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
|
||||||
|
}
|
@ -1,20 +1,19 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
|
|
||||||
|
|
||||||
class FczenitIE(InfoExtractor):
|
class FczenitIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://fc-zenit.ru/video/gl6785/',
|
'url': 'http://fc-zenit.ru/video/41044/',
|
||||||
'md5': '458bacc24549173fe5a5aa29174a5606',
|
'md5': '0e3fab421b455e970fa1aa3891e57df0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6785',
|
'id': '41044',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»',
|
'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,15 +21,23 @@ class FczenitIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(
|
||||||
|
r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title')
|
||||||
|
|
||||||
bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL')
|
video_items = self._parse_json(self._search_regex(
|
||||||
bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw)
|
r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
def merge_dicts(*dicts):
|
||||||
|
ret = {}
|
||||||
|
for a_dict in dicts:
|
||||||
|
ret.update(a_dict)
|
||||||
|
return ret
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': furl,
|
'url': compat_urlparse.urljoin(url, video_url),
|
||||||
'tbr': tbr,
|
'tbr': int(tbr),
|
||||||
} for furl, tbr in bitrates]
|
} for tbr, video_url in merge_dicts(*video_items).items()]
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -24,13 +24,28 @@ class FlickrIE(InfoExtractor):
|
|||||||
'upload_date': '20110423',
|
'upload_date': '20110423',
|
||||||
'uploader_id': '10922353@N03',
|
'uploader_id': '10922353@N03',
|
||||||
'uploader': 'Forest Wander',
|
'uploader': 'Forest Wander',
|
||||||
|
'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'tags': list,
|
'tags': list,
|
||||||
|
'license': 'Attribution-ShareAlike',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_API_BASE_URL = 'https://api.flickr.com/services/rest?'
|
_API_BASE_URL = 'https://api.flickr.com/services/rest?'
|
||||||
|
# https://help.yahoo.com/kb/flickr/SLN25525.html
|
||||||
|
_LICENSES = {
|
||||||
|
'0': 'All Rights Reserved',
|
||||||
|
'1': 'Attribution-NonCommercial-ShareAlike',
|
||||||
|
'2': 'Attribution-NonCommercial',
|
||||||
|
'3': 'Attribution-NonCommercial-NoDerivs',
|
||||||
|
'4': 'Attribution',
|
||||||
|
'5': 'Attribution-ShareAlike',
|
||||||
|
'6': 'Attribution-NoDerivs',
|
||||||
|
'7': 'No known copyright restrictions',
|
||||||
|
'8': 'United States government work',
|
||||||
|
'9': 'Public Domain Dedication (CC0)',
|
||||||
|
'10': 'Public Domain Work',
|
||||||
|
}
|
||||||
|
|
||||||
def _call_api(self, method, video_id, api_key, note, secret=None):
|
def _call_api(self, method, video_id, api_key, note, secret=None):
|
||||||
query = {
|
query = {
|
||||||
@ -75,6 +90,9 @@ class FlickrIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
owner = video_info.get('owner', {})
|
owner = video_info.get('owner', {})
|
||||||
|
uploader_id = owner.get('nsid')
|
||||||
|
uploader_path = owner.get('path_alias') or uploader_id
|
||||||
|
uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -83,11 +101,13 @@ class FlickrIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'timestamp': int_or_none(video_info.get('dateuploaded')),
|
'timestamp': int_or_none(video_info.get('dateuploaded')),
|
||||||
'duration': int_or_none(video_info.get('video', {}).get('duration')),
|
'duration': int_or_none(video_info.get('video', {}).get('duration')),
|
||||||
'uploader_id': owner.get('nsid'),
|
'uploader_id': uploader_id,
|
||||||
'uploader': owner.get('realname'),
|
'uploader': owner.get('realname'),
|
||||||
|
'uploader_url': uploader_url,
|
||||||
'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
|
'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
|
||||||
'view_count': int_or_none(video_info.get('views')),
|
'view_count': int_or_none(video_info.get('views')),
|
||||||
'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
|
'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])],
|
||||||
|
'license': self._LICENSES.get(video_info.get('license')),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('not a video', expected=True)
|
raise ExtractorError('not a video', expected=True)
|
||||||
|
26
youtube_dl/extractor/formula1.py
Normal file
26
youtube_dl/extractor/formula1.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class Formula1IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||||
|
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Race highlights - Spain 2016',
|
||||||
|
},
|
||||||
|
'add_ie': ['Ooyala'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
ooyala_embed_code = self._search_regex(
|
||||||
|
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
|
||||||
|
return self.url_result(
|
||||||
|
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
|
@ -2,6 +2,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_urllib_parse_unquote_plus,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@ -27,6 +31,7 @@ class FunimationIE(InfoExtractor):
|
|||||||
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
|
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
|
||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
|
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
|
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -37,6 +42,7 @@ class FunimationIE(InfoExtractor):
|
|||||||
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
|
||||||
'thumbnail': 're:https?://.*\.jpg',
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
|
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
|
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -47,8 +53,36 @@ class FunimationIE(InfoExtractor):
|
|||||||
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
|
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
|
||||||
'thumbnail': 're:https?://.*\.(?:jpg|png)',
|
'thumbnail': 're:https?://.*\.(?:jpg|png)',
|
||||||
},
|
},
|
||||||
|
'skip': 'Access without user interaction is forbidden by CloudFlare',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_LOGIN_URL = 'http://www.funimation.com/login'
|
||||||
|
|
||||||
|
def _download_webpage(self, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
return super(FunimationIE, self)._download_webpage(*args, **kwargs)
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||||
|
response = ee.cause.read()
|
||||||
|
if b'>Please complete the security check to access<' in response:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Access to funimation.com is blocked by CloudFlare. '
|
||||||
|
'Please browse to http://www.funimation.com/, solve '
|
||||||
|
'the reCAPTCHA, export browser cookies to a text file,'
|
||||||
|
' and then try again with --cookies YOUR_COOKIE_FILE.',
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _extract_cloudflare_session_ua(self, url):
|
||||||
|
ci_session_cookie = self._get_cookies(url).get('ci_session')
|
||||||
|
if ci_session_cookie:
|
||||||
|
ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
|
||||||
|
# ci_session is a string serialized by PHP function serialize()
|
||||||
|
# This case is simple enough to use regular expressions only
|
||||||
|
return self._search_regex(
|
||||||
|
r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
|
||||||
|
default=None)
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
@ -57,8 +91,11 @@ class FunimationIE(InfoExtractor):
|
|||||||
'email_field': username,
|
'email_field': username,
|
||||||
'password_field': password,
|
'password_field': password,
|
||||||
})
|
})
|
||||||
login_request = sanitized_Request('http://www.funimation.com/login', data, headers={
|
user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0',
|
if not user_agent:
|
||||||
|
user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
|
||||||
|
login_request = sanitized_Request(self._LOGIN_URL, data, headers={
|
||||||
|
'User-Agent': user_agent,
|
||||||
'Content-Type': 'application/x-www-form-urlencoded'
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
})
|
})
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
@ -103,11 +140,16 @@ class FunimationIE(InfoExtractor):
|
|||||||
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
|
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
user_agent = self._extract_cloudflare_session_ua(url)
|
||||||
|
if user_agent:
|
||||||
|
USER_AGENTS = ((None, user_agent),)
|
||||||
|
|
||||||
for kind, user_agent in USER_AGENTS:
|
for kind, user_agent in USER_AGENTS:
|
||||||
request = sanitized_Request(url)
|
request = sanitized_Request(url)
|
||||||
request.add_header('User-Agent', user_agent)
|
request.add_header('User-Agent', user_agent)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
request, display_id, 'Downloading %s webpage' % kind)
|
request, display_id,
|
||||||
|
'Downloading %s webpage' % kind if kind else 'Downloading webpage')
|
||||||
|
|
||||||
playlist = self._parse_json(
|
playlist = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class GazetaIE(InfoExtractor):
|
class GazetaIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
||||||
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
|
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
|
||||||
@ -18,9 +18,19 @@ class GazetaIE(InfoExtractor):
|
|||||||
'description': 'md5:38617526050bd17b234728e7f9620a71',
|
'description': 'md5:38617526050bd17b234728e7f9620a71',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
|
'skip': 'video not found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
|
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
|
||||||
|
'md5': '37f19f78355eb2f4256ee1688359f24c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '252048',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
|
||||||
|
},
|
||||||
|
'add_ie': ['EaglePlatform'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -4,7 +4,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
remove_end,
|
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -51,63 +50,33 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://gdcvault.com/play/1020791/',
|
'url': 'http://gdcvault.com/play/1020791/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Hard-coded hostname
|
||||||
|
'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
|
||||||
|
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1023460',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'Tenacious-Design-and-The-Interface',
|
||||||
|
'title': 'Tenacious Design and The Interface of \'Destiny\'',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Multiple audios
|
||||||
|
'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1014631',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # Requires rtmpdump
|
||||||
|
'format': 'jp', # The japanese audio
|
||||||
}
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _parse_mp4(self, xml_description):
|
|
||||||
video_formats = []
|
|
||||||
mp4_video = xml_description.find('./metadata/mp4video')
|
|
||||||
if mp4_video is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
|
|
||||||
video_root = mobj.group('root')
|
|
||||||
formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
|
|
||||||
for format in formats:
|
|
||||||
mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
|
|
||||||
url = video_root + mobj.group('path')
|
|
||||||
vbr = format.find('bitrate').text
|
|
||||||
video_formats.append({
|
|
||||||
'url': url,
|
|
||||||
'vbr': int(vbr),
|
|
||||||
})
|
|
||||||
return video_formats
|
|
||||||
|
|
||||||
def _parse_flv(self, xml_description):
|
|
||||||
formats = []
|
|
||||||
akamai_url = xml_description.find('./metadata/akamaiHost').text
|
|
||||||
audios = xml_description.find('./metadata/audios')
|
|
||||||
if audios is not None:
|
|
||||||
for audio in audios:
|
|
||||||
formats.append({
|
|
||||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
|
||||||
'play_path': remove_end(audio.get('url'), '.flv'),
|
|
||||||
'ext': 'flv',
|
|
||||||
'vcodec': 'none',
|
|
||||||
'format_id': audio.get('code'),
|
|
||||||
})
|
|
||||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
|
||||||
formats.append({
|
|
||||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
|
||||||
'play_path': remove_end(slide_video_path, '.flv'),
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_note': 'slide deck video',
|
|
||||||
'quality': -2,
|
|
||||||
'preference': -2,
|
|
||||||
'format_id': 'slides',
|
|
||||||
})
|
|
||||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
|
||||||
formats.append({
|
|
||||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
|
||||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_note': 'speaker video',
|
|
||||||
'quality': -1,
|
|
||||||
'preference': -1,
|
|
||||||
'format_id': 'speaker',
|
|
||||||
})
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _login(self, webpage_url, display_id):
|
def _login(self, webpage_url, display_id):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None or password is None:
|
if username is None or password is None:
|
||||||
@ -183,17 +152,10 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
|
r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
|
||||||
start_page, 'xml filename')
|
start_page, 'xml filename')
|
||||||
|
|
||||||
xml_description = self._download_xml(
|
|
||||||
'%s/xml/%s' % (xml_root, xml_name), display_id)
|
|
||||||
|
|
||||||
video_title = xml_description.find('./metadata/title').text
|
|
||||||
video_formats = self._parse_mp4(xml_description)
|
|
||||||
if video_formats is None:
|
|
||||||
video_formats = self._parse_flv(xml_description)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video_title,
|
'url': '%s/xml/%s' % (xml_root, xml_name),
|
||||||
'formats': video_formats,
|
'ie_key': 'DigitallySpeaking',
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE
|
|||||||
from .vimeo import VimeoIE
|
from .vimeo import VimeoIE
|
||||||
from .dailymotion import DailymotionCloudIE
|
from .dailymotion import DailymotionCloudIE
|
||||||
from .onionstudios import OnionStudiosIE
|
from .onionstudios import OnionStudiosIE
|
||||||
from .snagfilms import SnagFilmsEmbedIE
|
from .viewlift import ViewLiftEmbedIE
|
||||||
from .screenwavemedia import ScreenwaveMediaIE
|
from .screenwavemedia import ScreenwaveMediaIE
|
||||||
from .mtv import MTVServicesEmbeddedIE
|
from .mtv import MTVServicesEmbeddedIE
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
@ -60,6 +60,9 @@ from .googledrive import GoogleDriveIE
|
|||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .digiteka import DigitekaIE
|
from .digiteka import DigitekaIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
|
from .liveleak import LiveLeakIE
|
||||||
|
from .threeqsdn import ThreeQSDNIE
|
||||||
|
from .theplatform import ThePlatformIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -104,7 +107,8 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True, # infinite live stream
|
'skip_download': True, # infinite live stream
|
||||||
},
|
},
|
||||||
'expected_warnings': [
|
'expected_warnings': [
|
||||||
r'501.*Not Implemented'
|
r'501.*Not Implemented',
|
||||||
|
r'400.*Bad Request',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
# Direct link with incorrect MIME type
|
# Direct link with incorrect MIME type
|
||||||
@ -235,6 +239,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'car-20120827-manifest',
|
'title': 'car-20120827-manifest',
|
||||||
'formats': 'mincount:9',
|
'formats': 'mincount:9',
|
||||||
|
'upload_date': '20130904',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
@ -594,7 +599,11 @@ class GenericIE(InfoExtractor):
|
|||||||
'id': 'k2mm4bCdJ6CQ2i7c8o2',
|
'id': 'k2mm4bCdJ6CQ2i7c8o2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
|
'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
|
||||||
|
'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
|
||||||
'uploader': 'Spi0n',
|
'uploader': 'Spi0n',
|
||||||
|
'uploader_id': 'xgditw',
|
||||||
|
'upload_date': '20140425',
|
||||||
|
'timestamp': 1398441542,
|
||||||
},
|
},
|
||||||
'add_ie': ['Dailymotion'],
|
'add_ie': ['Dailymotion'],
|
||||||
},
|
},
|
||||||
@ -709,15 +718,18 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
# Wistia embed
|
# Wistia embed
|
||||||
{
|
{
|
||||||
'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||||
'md5': '8788b683c777a5cf25621eaf286d0c23',
|
'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1cfaf6b7ea',
|
'id': '6e2wtrbdaf',
|
||||||
'ext': 'mov',
|
'ext': 'mov',
|
||||||
'title': 'md5:51364a8d3d009997ba99656004b5e20d',
|
'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
|
||||||
'duration': 643.0,
|
'description': 'a Paywall Videos video from Remilon',
|
||||||
'filesize': 182808282,
|
'duration': 644.072,
|
||||||
'uploader': 'education-portal.com',
|
'uploader': 'study.com',
|
||||||
|
'timestamp': 1459678540,
|
||||||
|
'upload_date': '20160403',
|
||||||
|
'filesize': 24687186,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -726,11 +738,30 @@ class GenericIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'uxjb0lwrcz',
|
'id': 'uxjb0lwrcz',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
|
'title': 'Conversation about Hexagonal Rails Part 1',
|
||||||
|
'description': 'a Martin Fowler video from ThoughtWorks',
|
||||||
'duration': 1715.0,
|
'duration': 1715.0,
|
||||||
'uploader': 'thoughtworks.wistia.com',
|
'uploader': 'thoughtworks.wistia.com',
|
||||||
|
'timestamp': 1401832161,
|
||||||
|
'upload_date': '20140603',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Wistia standard embed (async)
|
||||||
|
{
|
||||||
|
'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '807fafadvk',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Drip Brennan Dunn Workshop',
|
||||||
|
'description': 'a JV Webinars video from getdrip-1',
|
||||||
|
'duration': 4986.95,
|
||||||
|
'timestamp': 1463607249,
|
||||||
|
'upload_date': '20160518',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
# Soundcloud embed
|
# Soundcloud embed
|
||||||
{
|
{
|
||||||
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
|
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
|
||||||
@ -753,6 +784,19 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Rosetta #CometLanding webcast HL 10',
|
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# Another Livestream embed, without 'new.' in URL
|
||||||
|
{
|
||||||
|
'url': 'https://www.freespeech.org/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '123537347',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Live stream
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
# LazyYT
|
# LazyYT
|
||||||
{
|
{
|
||||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||||
@ -837,18 +881,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Kaltura embed
|
|
||||||
{
|
|
||||||
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1_eergr3h1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20150226',
|
|
||||||
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
|
|
||||||
'timestamp': int,
|
|
||||||
'title': 'John Carlson Postgame 2/25/15',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# Kaltura embed (different embed code)
|
# Kaltura embed (different embed code)
|
||||||
{
|
{
|
||||||
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
|
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
|
||||||
@ -874,9 +906,23 @@ class GenericIE(InfoExtractor):
|
|||||||
'uploader_id': 'echojecka',
|
'uploader_id': 'echojecka',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Kaltura embed with single quotes
|
||||||
|
{
|
||||||
|
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_izeg5utt',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '35871',
|
||||||
|
'timestamp': 1355743100,
|
||||||
|
'upload_date': '20121217',
|
||||||
|
'uploader_id': 'batchUser',
|
||||||
|
},
|
||||||
|
'add_ie': ['Kaltura'],
|
||||||
|
},
|
||||||
# Eagle.Platform embed (generic URL)
|
# Eagle.Platform embed (generic URL)
|
||||||
{
|
{
|
||||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||||
|
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '227304',
|
'id': '227304',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -891,6 +937,7 @@ class GenericIE(InfoExtractor):
|
|||||||
# ClipYou (Eagle.Platform) embed (custom URL)
|
# ClipYou (Eagle.Platform) embed (custom URL)
|
||||||
{
|
{
|
||||||
'url': 'http://muz-tv.ru/play/7129/',
|
'url': 'http://muz-tv.ru/play/7129/',
|
||||||
|
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '12820',
|
'id': '12820',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -979,18 +1026,25 @@ class GenericIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': "PFT Live: New leader in the 'new-look' defense",
|
'title': "PFT Live: New leader in the 'new-look' defense",
|
||||||
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
||||||
|
'uploader': 'NBCU-SPORTS',
|
||||||
|
'upload_date': '20140107',
|
||||||
|
'timestamp': 1389118457,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# UDN embed
|
# UDN embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.udn.com/news/story/7314/822787',
|
'url': 'https://video.udn.com/news/300346',
|
||||||
'md5': 'fd2060e988c326991037b9aff9df21a6',
|
'md5': 'fd2060e988c326991037b9aff9df21a6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '300346',
|
'id': '300346',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '中一中男師變性 全校師生力挺',
|
'title': '中一中男師變性 全校師生力挺',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
# Ooyala embed
|
# Ooyala embed
|
||||||
{
|
{
|
||||||
@ -1031,6 +1085,9 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'SN Presents: Russell Martin, World Citizen',
|
'title': 'SN Presents: Russell Martin, World Citizen',
|
||||||
'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
|
'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
|
||||||
'uploader': 'Rogers Sportsnet',
|
'uploader': 'Rogers Sportsnet',
|
||||||
|
'uploader_id': '1704050871',
|
||||||
|
'upload_date': '20150525',
|
||||||
|
'timestamp': 1432570283,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# Dailymotion Cloud video
|
# Dailymotion Cloud video
|
||||||
@ -1122,6 +1179,9 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'The Cardinal Pell Interview',
|
'title': 'The Cardinal Pell Interview',
|
||||||
'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
|
'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
|
||||||
'uploader': 'GlobeCast Australia - GlobeStream',
|
'uploader': 'GlobeCast Australia - GlobeStream',
|
||||||
|
'uploader_id': '2733773828001',
|
||||||
|
'upload_date': '20160304',
|
||||||
|
'timestamp': 1457083087,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 downloads
|
# m3u8 downloads
|
||||||
@ -1140,6 +1200,28 @@ class GenericIE(InfoExtractor):
|
|||||||
'upload_date': '20160409',
|
'upload_date': '20160409',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# LiveLeak embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.wykop.pl/link/3088787/',
|
||||||
|
'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '874_1459135191',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Man shows poor quality of new apartment building',
|
||||||
|
'description': 'The wall is like a sand pile.',
|
||||||
|
'uploader': 'Lake8737',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Duplicated embedded video URLs
|
||||||
|
{
|
||||||
|
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '149298443_480_16c25b74_2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'vs. Blue Orange Spring Game',
|
||||||
|
'uploader': 'www.hudl.com',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
@ -1394,7 +1476,8 @@ class GenericIE(InfoExtractor):
|
|||||||
# Site Name | Video Title
|
# Site Name | Video Title
|
||||||
# Video Title - Tagline | Site Name
|
# Video Title - Tagline | Site Name
|
||||||
# and so on and so forth; it's just not practical
|
# and so on and so forth; it's just not practical
|
||||||
video_title = self._html_search_regex(
|
video_title = self._og_search_title(
|
||||||
|
webpage, default=None) or self._html_search_regex(
|
||||||
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
||||||
default='video')
|
default='video')
|
||||||
|
|
||||||
@ -1412,6 +1495,9 @@ class GenericIE(InfoExtractor):
|
|||||||
video_uploader = self._search_regex(
|
video_uploader = self._search_regex(
|
||||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||||
|
|
||||||
|
video_description = self._og_search_description(webpage, default=None)
|
||||||
|
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
# Helper method
|
# Helper method
|
||||||
def _playlist_from_matches(matches, getter=None, ie=None):
|
def _playlist_from_matches(matches, getter=None, ie=None):
|
||||||
urlrs = orderedSet(
|
urlrs = orderedSet(
|
||||||
@ -1442,6 +1528,11 @@ class GenericIE(InfoExtractor):
|
|||||||
if bc_urls:
|
if bc_urls:
|
||||||
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
||||||
|
|
||||||
|
# Look for ThePlatform embeds
|
||||||
|
tp_urls = ThePlatformIE._extract_urls(webpage)
|
||||||
|
if tp_urls:
|
||||||
|
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
||||||
|
|
||||||
# Look for embedded rtl.nl player
|
# Look for embedded rtl.nl player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||||
@ -1510,21 +1601,26 @@ class GenericIE(InfoExtractor):
|
|||||||
'url': embed_url,
|
'url': embed_url,
|
||||||
'ie_key': 'Wistia',
|
'ie_key': 'Wistia',
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': video_title,
|
|
||||||
'id': video_id,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||||
if match:
|
if match:
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
|
'url': 'wistia:%s' % match.group('id'),
|
||||||
'ie_key': 'Wistia',
|
'ie_key': 'Wistia',
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': video_title,
|
|
||||||
'id': match.group('id')
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
match = re.search(
|
||||||
|
r'''(?sx)
|
||||||
|
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
|
||||||
|
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
|
||||||
|
''', webpage)
|
||||||
|
if match:
|
||||||
|
return self.url_result(self._proto_relative_url(
|
||||||
|
'wistia:%s' % match.group('id')), 'Wistia')
|
||||||
|
|
||||||
# Look for SVT player
|
# Look for SVT player
|
||||||
svt_url = SVTIE._extract_url(webpage)
|
svt_url = SVTIE._extract_url(webpage)
|
||||||
if svt_url:
|
if svt_url:
|
||||||
@ -1800,7 +1896,7 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||||
|
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
|
r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Livestream')
|
return self.url_result(mobj.group('url'), 'Livestream')
|
||||||
@ -1812,7 +1908,7 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||||
|
|
||||||
# Look for Kaltura embeds
|
# Look for Kaltura embeds
|
||||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
|
||||||
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(smuggle_url(
|
return self.url_result(smuggle_url(
|
||||||
@ -1891,10 +1987,10 @@ class GenericIE(InfoExtractor):
|
|||||||
if onionstudios_url:
|
if onionstudios_url:
|
||||||
return self.url_result(onionstudios_url)
|
return self.url_result(onionstudios_url)
|
||||||
|
|
||||||
# Look for SnagFilms embeds
|
# Look for ViewLift embeds
|
||||||
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
|
viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
|
||||||
if snagfilms_url:
|
if viewlift_url:
|
||||||
return self.url_result(snagfilms_url)
|
return self.url_result(viewlift_url)
|
||||||
|
|
||||||
# Look for JWPlatform embeds
|
# Look for JWPlatform embeds
|
||||||
jwplatform_url = JWPlatformIE._extract_url(webpage)
|
jwplatform_url = JWPlatformIE._extract_url(webpage)
|
||||||
@ -1942,7 +2038,26 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for Instagram embeds
|
# Look for Instagram embeds
|
||||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||||
if instagram_embed_url is not None:
|
if instagram_embed_url is not None:
|
||||||
return self.url_result(instagram_embed_url, InstagramIE.ie_key())
|
return self.url_result(
|
||||||
|
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
||||||
|
|
||||||
|
# Look for LiveLeak embeds
|
||||||
|
liveleak_url = LiveLeakIE._extract_url(webpage)
|
||||||
|
if liveleak_url:
|
||||||
|
return self.url_result(liveleak_url, 'LiveLeak')
|
||||||
|
|
||||||
|
# Look for 3Q SDN embeds
|
||||||
|
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
||||||
|
if threeqsdn_url:
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': ThreeQSDNIE.ie_key(),
|
||||||
|
'url': self._proto_relative_url(threeqsdn_url),
|
||||||
|
'title': video_title,
|
||||||
|
'description': video_description,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
'uploader': video_uploader,
|
||||||
|
}
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
@ -2024,7 +2139,8 @@ class GenericIE(InfoExtractor):
|
|||||||
raise UnsupportedError(url)
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for video_url in found:
|
for video_url in orderedSet(found):
|
||||||
|
video_url = unescapeHTML(video_url)
|
||||||
video_url = video_url.replace('\\/', '/')
|
video_url = video_url.replace('\\/', '/')
|
||||||
video_url = compat_urlparse.urljoin(url, video_url)
|
video_url = compat_urlparse.urljoin(url, video_url)
|
||||||
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
||||||
|
@ -2,12 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
xpath_element,
|
|
||||||
xpath_text,
|
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GPUTechConfIE(InfoExtractor):
|
class GPUTechConfIE(InfoExtractor):
|
||||||
@ -27,29 +21,15 @@ class GPUTechConfIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/')
|
root_path = self._search_regex(
|
||||||
xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
|
r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path',
|
||||||
|
default='http://evt.dispeak.com/nvidia/events/gtc15/')
|
||||||
doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id)
|
xml_file_id = self._search_regex(
|
||||||
|
r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')
|
||||||
metadata = xpath_element(doc, 'metadata')
|
|
||||||
http_host = xpath_text(metadata, 'httpHost', 'http host', True)
|
|
||||||
mbr_videos = xpath_element(metadata, 'MBRVideos')
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for mbr_video in mbr_videos.findall('MBRVideo'):
|
|
||||||
stream_name = xpath_text(mbr_video, 'streamName')
|
|
||||||
if stream_name:
|
|
||||||
formats.append({
|
|
||||||
'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')),
|
|
||||||
'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': xpath_text(metadata, 'title'),
|
'url': '%sxml/%s.xml' % (root_path, xml_file_id),
|
||||||
'duration': parse_duration(xpath_text(metadata, 'endTime')),
|
'ie_key': 'DigitallySpeaking',
|
||||||
'creator': xpath_text(metadata, 'speaker'),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class GrouponIE(InfoExtractor):
|
class GrouponIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
|
'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
|
||||||
@ -14,17 +14,27 @@ class GrouponIE(InfoExtractor):
|
|||||||
'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
|
'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
|
'md5': '42428ce8a00585f9bc36e49226eae7a1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
|
'id': 'fk6OhWpXgIQ',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Bikram Yoga Huntington Beach | Orange County',
|
'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf',
|
||||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
'duration': 44.961,
|
'duration': 45,
|
||||||
|
'upload_date': '20160405',
|
||||||
|
'uploader_id': 'groupon',
|
||||||
|
'uploader': 'Groupon',
|
||||||
},
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
}],
|
}],
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'HDS',
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_PROVIDERS = {
|
||||||
|
'ooyala': ('ooyala:%s', 'Ooyala'),
|
||||||
|
'youtube': ('%s', 'Youtube'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -36,12 +46,17 @@ class GrouponIE(InfoExtractor):
|
|||||||
videos = payload['carousel'].get('dealVideos', [])
|
videos = payload['carousel'].get('dealVideos', [])
|
||||||
entries = []
|
entries = []
|
||||||
for v in videos:
|
for v in videos:
|
||||||
if v.get('provider') != 'OOYALA':
|
provider = v.get('provider')
|
||||||
|
video_id = v.get('media') or v.get('id') or v.get('baseURL')
|
||||||
|
if not provider or not video_id:
|
||||||
|
continue
|
||||||
|
url_pattern, ie_key = self._PROVIDERS.get(provider.lower())
|
||||||
|
if not url_pattern:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'%s: Unsupported video provider %s, skipping video' %
|
'%s: Unsupported video provider %s, skipping video' %
|
||||||
(playlist_id, v.get('provider')))
|
(playlist_id, provider))
|
||||||
continue
|
continue
|
||||||
entries.append(self.url_result('ooyala:%s' % v['media']))
|
entries.append(self.url_result(url_pattern % video_id, ie_key))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
KNOWN_EXTENSIONS,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -17,7 +18,7 @@ from ..utils import (
|
|||||||
class HearThisAtIE(InfoExtractor):
|
class HearThisAtIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
|
_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
|
||||||
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
|
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://hearthis.at/moofi/dr-kreep',
|
'url': 'https://hearthis.at/moofi/dr-kreep',
|
||||||
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
|
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -26,7 +27,7 @@ class HearThisAtIE(InfoExtractor):
|
|||||||
'title': 'Moofi - Dr. Kreep',
|
'title': 'Moofi - Dr. Kreep',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'timestamp': 1421564134,
|
'timestamp': 1421564134,
|
||||||
'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.',
|
'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP',
|
||||||
'upload_date': '20150118',
|
'upload_date': '20150118',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@ -34,7 +35,25 @@ class HearThisAtIE(InfoExtractor):
|
|||||||
'duration': 71,
|
'duration': 71,
|
||||||
'categories': ['Experimental'],
|
'categories': ['Experimental'],
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# 'download' link redirects to the original webpage
|
||||||
|
'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
|
||||||
|
'md5': '5980ceb7c461605d30f1f039df160c6e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '811296',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
|
||||||
|
'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance',
|
||||||
|
'upload_date': '20160328',
|
||||||
|
'timestamp': 1459186146,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'duration': 4360,
|
||||||
|
'categories': ['Dance'],
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
@ -90,6 +109,7 @@ class HearThisAtIE(InfoExtractor):
|
|||||||
ext_handle = self._request_webpage(
|
ext_handle = self._request_webpage(
|
||||||
ext_req, display_id, note='Determining extension')
|
ext_req, display_id, note='Determining extension')
|
||||||
ext = urlhandle_detect_ext(ext_handle)
|
ext = urlhandle_detect_ext(ext_handle)
|
||||||
|
if ext in KNOWN_EXTENSIONS:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'download',
|
'format_id': 'download',
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
|
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||||
'md5': '8b743df908c42f60cf6496586c7f12c3',
|
'md5': '7d45932269a288149483144f01b99789',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '390161',
|
'id': '390161',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
|
|||||||
'duration': 56.823,
|
'duration': 56.823,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'add_ie': ['Ooyala'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
@ -29,7 +30,12 @@ class HuffPostIE(InfoExtractor):
|
|||||||
'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
|
'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
|
||||||
'duration': 1549,
|
'duration': 1549,
|
||||||
'upload_date': '20140124',
|
'upload_date': '20140124',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 404: Not Found'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -45,7 +51,7 @@ class HuffPostIE(InfoExtractor):
|
|||||||
description = data.get('description')
|
description = data.get('description')
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for url in data['images'].values():
|
for url in filter(None, data['images'].values()):
|
||||||
m = re.match('.*-([0-9]+x[0-9]+)\.', url)
|
m = re.match('.*-([0-9]+x[0-9]+)\.', url)
|
||||||
if not m:
|
if not m:
|
||||||
continue
|
continue
|
||||||
@ -54,13 +60,25 @@ class HuffPostIE(InfoExtractor):
|
|||||||
'resolution': m.group(1),
|
'resolution': m.group(1),
|
||||||
})
|
})
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
|
sources = data.get('sources', {})
|
||||||
|
live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
|
||||||
|
for key, url in live_sources:
|
||||||
|
ext = determine_ext(url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
'format': key,
|
'format': key,
|
||||||
'format_id': key.replace('/', '.'),
|
'format_id': key.replace('/', '.'),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'url': url,
|
'url': url,
|
||||||
'vcodec': 'none' if key.startswith('audio/') else None,
|
'vcodec': 'none' if key.startswith('audio/') else None,
|
||||||
} for key, url in data.get('sources', {}).get('live', {}).items()]
|
})
|
||||||
|
|
||||||
if not formats and data.get('fivemin_id'):
|
if not formats and data.get('fivemin_id'):
|
||||||
return self.url_result('5min:%s' % data['fivemin_id'])
|
return self.url_result('5min:%s' % data['fivemin_id'])
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
mimetype2ext,
|
||||||
qualities,
|
qualities,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -12,9 +12,9 @@ from ..utils import (
|
|||||||
class ImdbIE(InfoExtractor):
|
class ImdbIE(InfoExtractor):
|
||||||
IE_NAME = 'imdb'
|
IE_NAME = 'imdb'
|
||||||
IE_DESC = 'Internet Movie Database trailers'
|
IE_DESC = 'Internet Movie Database trailers'
|
||||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2524815897',
|
'id': '2524815897',
|
||||||
@ -22,7 +22,10 @@ class ImdbIE(InfoExtractor):
|
|||||||
'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
||||||
'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
|
'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@ -48,13 +51,27 @@ class ImdbIE(InfoExtractor):
|
|||||||
json_data = self._search_regex(
|
json_data = self._search_regex(
|
||||||
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
||||||
format_page, 'json data', flags=re.DOTALL)
|
format_page, 'json data', flags=re.DOTALL)
|
||||||
info = json.loads(json_data)
|
info = self._parse_json(json_data, video_id, fatal=False)
|
||||||
format_info = info['videoPlayerObject']['video']
|
if not info:
|
||||||
f_id = format_info['ffname']
|
continue
|
||||||
|
format_info = info.get('videoPlayerObject', {}).get('video', {})
|
||||||
|
if not format_info:
|
||||||
|
continue
|
||||||
|
video_info_list = format_info.get('videoInfoList')
|
||||||
|
if not video_info_list or not isinstance(video_info_list, list):
|
||||||
|
continue
|
||||||
|
video_info = video_info_list[0]
|
||||||
|
if not video_info or not isinstance(video_info, dict):
|
||||||
|
continue
|
||||||
|
video_url = video_info.get('videoUrl')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
format_id = format_info.get('ffname')
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': f_id,
|
'format_id': format_id,
|
||||||
'url': format_info['videoInfoList'][0]['videoUrl'],
|
'url': video_url,
|
||||||
'quality': quality(f_id),
|
'ext': mimetype2ext(video_info.get('videoMimeType')),
|
||||||
|
'quality': quality(format_id),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class InstagramIE(InfoExtractor):
|
class InstagramIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||||
'md5': '0d2da106a9d2631273e192b372806516',
|
'md5': '0d2da106a9d2631273e192b372806516',
|
||||||
@ -38,10 +38,19 @@ class InstagramIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_embed_url(webpage):
|
def _extract_embed_url(webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
blockquote_el = get_element_by_attribute(
|
blockquote_el = get_element_by_attribute(
|
||||||
'class', 'instagram-media', webpage)
|
'class', 'instagram-media', webpage)
|
||||||
if blockquote_el is None:
|
if blockquote_el is None:
|
||||||
@ -53,7 +62,9 @@ class InstagramIE(InfoExtractor):
|
|||||||
return mobj.group('link')
|
return mobj.group('link')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
url = mobj.group('url')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||||
|
@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):
|
|||||||
IE_NAME = 'iqiyi'
|
IE_NAME = 'iqiyi'
|
||||||
IE_DESC = '爱奇艺'
|
IE_DESC = '爱奇艺'
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html'
|
_VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
|
||||||
|
|
||||||
_NETRC_MACHINE = 'iqiyi'
|
_NETRC_MACHINE = 'iqiyi'
|
||||||
|
|
||||||
@ -273,6 +273,9 @@ class IqiyiIE(InfoExtractor):
|
|||||||
'title': '灌篮高手 国语版',
|
'title': '灌篮高手 国语版',
|
||||||
},
|
},
|
||||||
'playlist_count': 101,
|
'playlist_count': 101,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.pps.tv/w_19rrbav0ph.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMATS_MAP = [
|
_FORMATS_MAP = [
|
||||||
@ -284,6 +287,13 @@ class IqiyiIE(InfoExtractor):
|
|||||||
('10', 'h1'),
|
('10', 'h1'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
AUTH_API_ERRORS = {
|
||||||
|
# No preview available (不允许试看鉴权失败)
|
||||||
|
'Q00505': 'This video requires a VIP account',
|
||||||
|
# End of preview time (试看结束鉴权失败)
|
||||||
|
'Q00506': 'Needs a VIP account for full video',
|
||||||
|
}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -369,14 +379,18 @@ class IqiyiIE(InfoExtractor):
|
|||||||
note='Downloading video authentication JSON',
|
note='Downloading video authentication JSON',
|
||||||
errnote='Unable to download video authentication JSON')
|
errnote='Unable to download video authentication JSON')
|
||||||
|
|
||||||
if auth_result['code'] == 'Q00505': # No preview available (不允许试看鉴权失败)
|
code = auth_result.get('code')
|
||||||
raise ExtractorError('This video requires a VIP account', expected=True)
|
msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code
|
||||||
if auth_result['code'] == 'Q00506': # End of preview time (试看结束鉴权失败)
|
if code == 'Q00506':
|
||||||
if do_report_warning:
|
if do_report_warning:
|
||||||
self.report_warning('Needs a VIP account for full video')
|
self.report_warning(msg)
|
||||||
return False
|
return False
|
||||||
|
if 'data' not in auth_result:
|
||||||
|
if msg is not None:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True)
|
||||||
|
raise ExtractorError('Unexpected error from Iqiyi auth API')
|
||||||
|
|
||||||
return auth_result
|
return auth_result['data']
|
||||||
|
|
||||||
def construct_video_urls(self, data, video_id, _uuid, tvid):
|
def construct_video_urls(self, data, video_id, _uuid, tvid):
|
||||||
def do_xor(x, y):
|
def do_xor(x, y):
|
||||||
@ -452,11 +466,11 @@ class IqiyiIE(InfoExtractor):
|
|||||||
need_vip_warning_report = False
|
need_vip_warning_report = False
|
||||||
break
|
break
|
||||||
param.update({
|
param.update({
|
||||||
't': auth_result['data']['t'],
|
't': auth_result['t'],
|
||||||
# cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
|
# cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
|
||||||
'cid': 'afbe8fd3d73448c9',
|
'cid': 'afbe8fd3d73448c9',
|
||||||
'vid': video_id,
|
'vid': video_id,
|
||||||
'QY00001': auth_result['data']['u'],
|
'QY00001': auth_result['u'],
|
||||||
})
|
})
|
||||||
api_video_url += '?' if '?' not in api_video_url else '&'
|
api_video_url += '?' if '?' not in api_video_url else '&'
|
||||||
api_video_url += compat_urllib_parse_urlencode(param)
|
api_video_url += compat_urllib_parse_urlencode(param)
|
||||||
@ -491,7 +505,10 @@ class IqiyiIE(InfoExtractor):
|
|||||||
'enc': md5_text(enc_key + tail),
|
'enc': md5_text(enc_key + tail),
|
||||||
'qyid': _uuid,
|
'qyid': _uuid,
|
||||||
'tn': random.random(),
|
'tn': random.random(),
|
||||||
'um': 0,
|
# In iQiyi's flash player, um is set to 1 if there's a logged user
|
||||||
|
# Some 1080P formats are only available with a logged user.
|
||||||
|
# Here force um=1 to trick the iQiyi server
|
||||||
|
'um': 1,
|
||||||
'authkey': md5_text(md5_text('') + tail),
|
'authkey': md5_text(md5_text('') + tail),
|
||||||
'k_tag': 1,
|
'k_tag': 1,
|
||||||
}
|
}
|
||||||
|
@ -5,33 +5,50 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class JWPlatformBaseIE(InfoExtractor):
|
class JWPlatformBaseIE(InfoExtractor):
|
||||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
|
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
|
||||||
video_data = jwplayer_data['playlist'][0]
|
video_data = jwplayer_data['playlist'][0]
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for source in video_data['sources']:
|
for source in video_data['sources']:
|
||||||
source_url = self._proto_relative_url(source['file'])
|
source_url = self._proto_relative_url(source['file'])
|
||||||
source_type = source.get('type') or ''
|
source_type = source.get('type') or ''
|
||||||
if source_type in ('application/vnd.apple.mpegurl', 'hls'):
|
if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
|
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||||
elif source_type.startswith('audio'):
|
elif source_type.startswith('audio'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': source_url,
|
'url': source_url,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
formats.append({
|
a_format = {
|
||||||
'url': source_url,
|
'url': source_url,
|
||||||
'width': int_or_none(source.get('width')),
|
'width': int_or_none(source.get('width')),
|
||||||
'height': int_or_none(source.get('height')),
|
'height': int_or_none(source.get('height')),
|
||||||
|
}
|
||||||
|
if source_url.startswith('rtmp'):
|
||||||
|
a_format['ext'] = 'flv',
|
||||||
|
|
||||||
|
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||||
|
# of jwplayer.flash.swf
|
||||||
|
rtmp_url_parts = re.split(
|
||||||
|
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||||
|
if len(rtmp_url_parts) == 3:
|
||||||
|
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||||
|
a_format.update({
|
||||||
|
'url': rtmp_url,
|
||||||
|
'play_path': prefix + play_path,
|
||||||
})
|
})
|
||||||
|
if rtmp_params:
|
||||||
|
a_format.update(rtmp_params)
|
||||||
|
formats.append(a_format)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -2,39 +2,63 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote_plus
|
|
||||||
from ..utils import (
|
|
||||||
js_to_json,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class KaraoketvIE(InfoExtractor):
|
class KaraoketvIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
|
_VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://karaoketv.co.il/?container=songs&id=171568',
|
'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '171568',
|
'id': '58356',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'אל העולם שלך - רותם כהן - שרים קריוקי',
|
'title': 'קריוקי של איזון',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
api_page_url = self._search_regex(
|
||||||
|
r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
|
||||||
|
webpage, 'API play URL', group='url')
|
||||||
|
|
||||||
page_video_url = self._og_search_video_url(webpage, video_id)
|
api_page = self._download_webpage(api_page_url, video_id)
|
||||||
config_json = compat_urllib_parse_unquote_plus(self._search_regex(
|
video_cdn_url = self._search_regex(
|
||||||
r'config=(.*)', page_video_url, 'configuration'))
|
r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
|
||||||
|
api_page, 'video cdn URL', group='url')
|
||||||
|
|
||||||
urls_info_json = self._download_json(
|
video_cdn = self._download_webpage(video_cdn_url, video_id)
|
||||||
config_json, video_id, 'Downloading configuration',
|
play_path = self._parse_json(
|
||||||
transform_source=js_to_json)
|
self._search_regex(
|
||||||
|
r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
|
||||||
|
video_id)['clip']['url']
|
||||||
|
|
||||||
url = urls_info_json['playlist'][0]['url']
|
settings = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
|
||||||
|
video_id, fatal=False) or {}
|
||||||
|
|
||||||
|
servers = settings.get('servers')
|
||||||
|
if not servers or not isinstance(servers, list):
|
||||||
|
servers = ('wowzail.video-cdn.com:80/vodcdn', )
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
|
||||||
|
'play_path': play_path,
|
||||||
|
'app': 'vodcdn',
|
||||||
|
'page_url': video_cdn_url,
|
||||||
|
'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
|
||||||
|
'rtmp_real_time': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
} for server in servers]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'url': url,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -52,9 +52,12 @@ class KarriereVideosIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'/config/video/(.+?)\.xml', webpage, 'video id')
|
r'/config/video/(.+?)\.xml', webpage, 'video id')
|
||||||
|
# Server returns malformed headers
|
||||||
|
# Force Accept-Encoding: * to prevent gzipped results
|
||||||
playlist = self._download_xml(
|
playlist = self._download_xml(
|
||||||
'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
|
'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
|
||||||
video_id, transform_source=fix_xml_ampersands)
|
video_id, transform_source=fix_xml_ampersands,
|
||||||
|
headers={'Accept-Encoding': '*'})
|
||||||
|
|
||||||
NS_MAP = {
|
NS_MAP = {
|
||||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
|
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
|
||||||
|
@ -81,7 +81,7 @@ class KuwoIE(KuwoBaseIE):
|
|||||||
'id': '6446136',
|
'id': '6446136',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '心',
|
'title': '心',
|
||||||
'description': 'md5:b2ab6295d014005bfc607525bfc1e38a',
|
'description': 'md5:5d0e947b242c35dc0eb1d2fce9fbf02c',
|
||||||
'creator': 'IU',
|
'creator': 'IU',
|
||||||
'upload_date': '20150518',
|
'upload_date': '20150518',
|
||||||
},
|
},
|
||||||
@ -102,10 +102,10 @@ class KuwoIE(KuwoBaseIE):
|
|||||||
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
|
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
|
||||||
|
|
||||||
song_name = self._html_search_regex(
|
song_name = self._html_search_regex(
|
||||||
r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?<h1[^>]+title="([^"]+)"', webpage, 'song name')
|
r'<p[^>]+id="lrcName">([^<]+)</p>', webpage, 'song name')
|
||||||
singer_name = self._html_search_regex(
|
singer_name = remove_start(self._html_search_regex(
|
||||||
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
|
r'<a[^>]+href="http://www\.kuwo\.cn/artist/content\?name=([^"]+)">',
|
||||||
webpage, 'singer name', fatal=False)
|
webpage, 'singer name', fatal=False), '歌手')
|
||||||
lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
|
lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
|
||||||
if lrc_content == '暂无': # indicates no lyrics
|
if lrc_content == '暂无': # indicates no lyrics
|
||||||
lrc_content = None
|
lrc_content = None
|
||||||
@ -114,7 +114,7 @@ class KuwoIE(KuwoBaseIE):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
album_id = self._html_search_regex(
|
album_id = self._html_search_regex(
|
||||||
r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
|
r'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
|
||||||
webpage, 'album id', fatal=False)
|
webpage, 'album id', fatal=False)
|
||||||
|
|
||||||
publish_time = None
|
publish_time = None
|
||||||
@ -268,7 +268,7 @@ class KuwoCategoryIE(InfoExtractor):
|
|||||||
'title': '八十年代精选',
|
'title': '八十年代精选',
|
||||||
'description': '这些都是属于八十年代的回忆!',
|
'description': '这些都是属于八十年代的回忆!',
|
||||||
},
|
},
|
||||||
'playlist_count': 30,
|
'playlist_mincount': 24,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -283,6 +283,8 @@ class KuwoCategoryIE(InfoExtractor):
|
|||||||
category_desc = remove_start(
|
category_desc = remove_start(
|
||||||
get_element_by_id('intro', webpage).strip(),
|
get_element_by_id('intro', webpage).strip(),
|
||||||
'%s简介:' % category_name)
|
'%s简介:' % category_name)
|
||||||
|
if category_desc == '暂无':
|
||||||
|
category_desc = None
|
||||||
|
|
||||||
jsonm = self._parse_json(self._html_search_regex(
|
jsonm = self._parse_json(self._html_search_regex(
|
||||||
r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
|
r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
|
||||||
|
@ -63,6 +63,7 @@ class Laola1TvIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'This live stream has already finished.',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -74,6 +75,9 @@ class Laola1TvIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||||
|
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||||
|
|
||||||
iframe_url = self._search_regex(
|
iframe_url = self._search_regex(
|
||||||
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
|
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
|
||||||
webpage, 'iframe url')
|
webpage, 'iframe url')
|
||||||
|
33
youtube_dl/extractor/learnr.py
Normal file
33
youtube_dl/extractor/learnr.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class LearnrIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
|
||||||
|
'md5': '3719fdf0a68397f49899e82c308a89de',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '51624',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
|
||||||
|
'description': 'md5:b36dbfa92350176cdf12b4d388485503',
|
||||||
|
'uploader': 'LearnCode.academy',
|
||||||
|
'uploader_id': 'learncodeacademy',
|
||||||
|
'upload_date': '20131021',
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': self._search_regex(
|
||||||
|
r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
|
||||||
|
'id': video_id,
|
||||||
|
}
|
@ -6,6 +6,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
determine_protocol,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
@ -18,10 +19,14 @@ class Lecture2GoIE(InfoExtractor):
|
|||||||
'md5': 'ac02b570883020d208d405d5a3fd2f7f',
|
'md5': 'ac02b570883020d208d405d5a3fd2f7f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '17473',
|
'id': '17473',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '2 - Endliche Automaten und reguläre Sprachen',
|
'title': '2 - Endliche Automaten und reguläre Sprachen',
|
||||||
'creator': 'Frank Heitmann',
|
'creator': 'Frank Heitmann',
|
||||||
'duration': 5220,
|
'duration': 5220,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,14 +37,18 @@ class Lecture2GoIE(InfoExtractor):
|
|||||||
title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
|
title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for url in set(re.findall(r'"src","([^"]+)"', webpage)):
|
for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)):
|
||||||
ext = determine_ext(url)
|
ext = determine_ext(url)
|
||||||
|
protocol = determine_protocol({'url': url})
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(url, video_id))
|
formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds'))
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(url, video_id))
|
formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls'))
|
||||||
else:
|
else:
|
||||||
|
if protocol == 'rtmp':
|
||||||
|
continue # XXX: currently broken
|
||||||
formats.append({
|
formats.append({
|
||||||
|
'format_id': protocol,
|
||||||
'url': url,
|
'url': url,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -7,48 +7,53 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
|
||||||
remove_end,
|
|
||||||
unified_strdate,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LifeNewsIE(InfoExtractor):
|
class LifeNewsIE(InfoExtractor):
|
||||||
IE_NAME = 'lifenews'
|
IE_NAME = 'life'
|
||||||
IE_DESC = 'LIFE | NEWS'
|
IE_DESC = 'Life.ru'
|
||||||
_VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
_VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# single video embedded via video/source
|
# single video embedded via video/source
|
||||||
'url': 'http://lifenews.ru/news/98736',
|
'url': 'https://life.ru/t/новости/98736',
|
||||||
'md5': '77c95eaefaca216e32a76a343ad89d23',
|
'md5': '77c95eaefaca216e32a76a343ad89d23',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '98736',
|
'id': '98736',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Мужчина нашел дома архив оборонного завода',
|
'title': 'Мужчина нашел дома архив оборонного завода',
|
||||||
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
|
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
|
||||||
|
'timestamp': 1344154740,
|
||||||
'upload_date': '20120805',
|
'upload_date': '20120805',
|
||||||
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# single video embedded via iframe
|
# single video embedded via iframe
|
||||||
'url': 'http://lifenews.ru/news/152125',
|
'url': 'https://life.ru/t/новости/152125',
|
||||||
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
|
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '152125',
|
'id': '152125',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
||||||
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
||||||
|
'timestamp': 1427961840,
|
||||||
'upload_date': '20150402',
|
'upload_date': '20150402',
|
||||||
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# two videos embedded via iframe
|
# two videos embedded via iframe
|
||||||
'url': 'http://lifenews.ru/news/153461',
|
'url': 'https://life.ru/t/новости/153461',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '153461',
|
'id': '153461',
|
||||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||||
'upload_date': '20150505',
|
'timestamp': 1430825520,
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
||||||
@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
|
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
|
||||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||||
|
'timestamp': 1430825520,
|
||||||
'upload_date': '20150505',
|
'upload_date': '20150505',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
|
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
|
||||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||||
|
'timestamp': 1430825520,
|
||||||
'upload_date': '20150505',
|
'upload_date': '20150505',
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://lifenews.ru/video/13035',
|
'url': 'https://life.ru/t/новости/213035',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
section = mobj.group('section')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(url, video_id)
|
||||||
'http://lifenews.ru/%s/%s' % (section, video_id),
|
|
||||||
video_id, 'Downloading page')
|
|
||||||
|
|
||||||
video_urls = re.findall(
|
video_urls = re.findall(
|
||||||
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
|
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
|
||||||
@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
|
|
||||||
title = remove_end(
|
title = remove_end(
|
||||||
self._og_search_title(webpage),
|
self._og_search_title(webpage),
|
||||||
' - Первый по срочным новостям — LIFE | NEWS')
|
' - Life.ru')
|
||||||
|
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
view_count = self._html_search_regex(
|
view_count = self._html_search_regex(
|
||||||
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
|
||||||
comment_count = self._html_search_regex(
|
webpage, 'view count', fatal=False, group='value')
|
||||||
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
|
|
||||||
webpage, 'comment count', fatal=False)
|
|
||||||
|
|
||||||
upload_date = self._html_search_regex(
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
|
r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
|
||||||
if upload_date is not None:
|
webpage, 'upload date', fatal=False, group='value'))
|
||||||
upload_date = unified_strdate(upload_date)
|
|
||||||
|
|
||||||
common_info = {
|
common_info = {
|
||||||
'description': description,
|
'description': description,
|
||||||
'view_count': int_or_none(view_count),
|
'view_count': int_or_none(view_count),
|
||||||
'comment_count': int_or_none(comment_count),
|
'timestamp': timestamp,
|
||||||
'upload_date': upload_date,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def make_entry(video_id, video_url, index=None):
|
def make_entry(video_id, video_url, index=None):
|
||||||
@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
|
|||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url, video_id, 'mp4', m3u8_id='m3u8'))
|
video_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='m3u8'))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
137
youtube_dl/extractor/litv.py
Normal file
137
youtube_dl/extractor/litv.py
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LiTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P<id>[^&]+)'
|
||||||
|
|
||||||
|
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VOD00041606',
|
||||||
|
'title': '花千骨',
|
||||||
|
},
|
||||||
|
'playlist_count': 50,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VOD00041610',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '花千骨第1集',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f',
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True, # m3u8 download
|
||||||
|
},
|
||||||
|
'skip': 'Georestricted to Taiwan',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True):
|
||||||
|
episode_title = view_data['title']
|
||||||
|
content_id = season_list['contentId']
|
||||||
|
|
||||||
|
if prompt:
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id))
|
||||||
|
|
||||||
|
all_episodes = [
|
||||||
|
self.url_result(smuggle_url(
|
||||||
|
self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']),
|
||||||
|
{'force_noplaylist': True})) # To prevent infinite recursion
|
||||||
|
for episode in season_list['episode']]
|
||||||
|
|
||||||
|
return self.playlist_result(all_episodes, content_id, episode_title)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
noplaylist = self._downloader.params.get('noplaylist')
|
||||||
|
noplaylist_prompt = True
|
||||||
|
if 'force_noplaylist' in data:
|
||||||
|
noplaylist = data['force_noplaylist']
|
||||||
|
noplaylist_prompt = False
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
view_data = dict(map(lambda t: (t[0], t[2]), re.findall(
|
||||||
|
r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2',
|
||||||
|
webpage)))
|
||||||
|
|
||||||
|
vod_data = self._parse_json(self._search_regex(
|
||||||
|
'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
season_list = list(vod_data.get('seasonList', {}).values())
|
||||||
|
if season_list:
|
||||||
|
if not noplaylist:
|
||||||
|
return self._extract_playlist(
|
||||||
|
season_list[0], video_id, vod_data, view_data,
|
||||||
|
prompt=noplaylist_prompt)
|
||||||
|
|
||||||
|
if noplaylist_prompt:
|
||||||
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
|
# In browsers `getMainUrl` request is always issued. Usually this
|
||||||
|
# endpoint gives the same result as the data embedded in the webpage.
|
||||||
|
# If georestricted, there are no embedded data, so an extra request is
|
||||||
|
# necessary to get the error code
|
||||||
|
video_data = self._parse_json(self._search_regex(
|
||||||
|
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||||
|
webpage, 'video data', default='{}'), video_id)
|
||||||
|
if not video_data:
|
||||||
|
payload = {
|
||||||
|
'assetId': view_data['assetId'],
|
||||||
|
'watchDevices': vod_data['watchDevices'],
|
||||||
|
'contentType': view_data['contentType'],
|
||||||
|
}
|
||||||
|
video_data = self._download_json(
|
||||||
|
'https://www.litv.tv/vod/getMainUrl', video_id,
|
||||||
|
data=json.dumps(payload).encode('utf-8'),
|
||||||
|
headers={'Content-Type': 'application/json'})
|
||||||
|
|
||||||
|
if not video_data.get('fullpath'):
|
||||||
|
error_msg = video_data.get('errorMessage')
|
||||||
|
if error_msg == 'vod.error.outsideregionerror':
|
||||||
|
self.raise_geo_restricted('This video is available in Taiwan only')
|
||||||
|
if error_msg:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True)
|
||||||
|
raise ExtractorError('Unexpected result from %s' % self.IE_NAME)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls')
|
||||||
|
for a_format in formats:
|
||||||
|
# LiTV HLS segments doesn't like compressions
|
||||||
|
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
|
||||||
|
|
||||||
|
title = view_data['title'] + view_data.get('secondaryMark', '')
|
||||||
|
description = view_data.get('description')
|
||||||
|
thumbnail = view_data.get('imageFile')
|
||||||
|
categories = [item['name'] for item in vod_data.get('category', [])]
|
||||||
|
episode = int_or_none(view_data.get('episode'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'episode_number': episode,
|
||||||
|
}
|
@ -17,7 +17,8 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'description': 'extremely bad day for this guy..!',
|
'description': 'extremely bad day for this guy..!',
|
||||||
'uploader': 'ljfriel2',
|
'uploader': 'ljfriel2',
|
||||||
'title': 'Most unlucky car accident'
|
'title': 'Most unlucky car accident',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||||
@ -28,6 +29,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
||||||
'uploader': 'ARD_Stinkt',
|
'uploader': 'ARD_Stinkt',
|
||||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
||||||
@ -49,10 +51,19 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
|
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
|
||||||
'uploader': 'bony333',
|
'uploader': 'bony333',
|
||||||
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
|
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$'
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
@ -64,6 +75,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
age_limit = int_or_none(self._search_regex(
|
age_limit = int_or_none(self._search_regex(
|
||||||
r'you confirm that you are ([0-9]+) years and over.',
|
r'you confirm that you are ([0-9]+) years and over.',
|
||||||
webpage, 'age limit', default=None))
|
webpage, 'age limit', default=None))
|
||||||
|
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
sources_raw = self._search_regex(
|
sources_raw = self._search_regex(
|
||||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
||||||
@ -116,4 +128,5 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
}
|
}
|
||||||
|
@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _extract_stream_info(self, stream_info):
|
def _extract_stream_info(self, stream_info):
|
||||||
broadcast_id = stream_info['broadcast_id']
|
broadcast_id = compat_str(stream_info['broadcast_id'])
|
||||||
is_live = stream_info.get('is_live')
|
is_live = stream_info.get('is_live')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
47
youtube_dl/extractor/localnews8.py
Normal file
47
youtube_dl/extractor/localnews8.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class LocalNews8IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
|
||||||
|
'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '35183304',
|
||||||
|
'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
|
||||||
|
'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
|
||||||
|
'duration': 153,
|
||||||
|
'timestamp': 1441844822,
|
||||||
|
'upload_date': '20150910',
|
||||||
|
'uploader_id': 'api',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
|
||||||
|
webpage, 'partner id', group='id')
|
||||||
|
kaltura_id = self._search_regex(
|
||||||
|
r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
|
||||||
|
webpage, 'videl id', group='id')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||||
|
'ie_key': 'Kaltura',
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
}
|
@ -1,46 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_urllib_parse_unquote
|
|
||||||
|
|
||||||
|
|
||||||
class MalemotionIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://malemotion.com/video/bete-de-concours.ltc',
|
|
||||||
'md5': '3013e53a0afbde2878bc39998c33e8a5',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ltc',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Bête de Concours',
|
|
||||||
'age_limit': 18,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url = compat_urllib_parse_unquote(self._search_regex(
|
|
||||||
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
|
|
||||||
video_title = self._html_search_regex(
|
|
||||||
r'<title>(.*?)</title', webpage, 'title')
|
|
||||||
video_thumbnail = self._search_regex(
|
|
||||||
r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': video_url,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'format_id': 'mp4',
|
|
||||||
'preference': 1,
|
|
||||||
}]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'title': video_title,
|
|
||||||
'thumbnail': video_thumbnail,
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
@ -49,8 +49,8 @@ class MDRIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||||
'timestamp': 1419047100,
|
'timestamp': 1450950000,
|
||||||
'upload_date': '20141220',
|
'upload_date': '20151224',
|
||||||
'duration': 4628,
|
'duration': 4628,
|
||||||
'uploader': 'KIKA',
|
'uploader': 'KIKA',
|
||||||
},
|
},
|
||||||
@ -71,8 +71,8 @@ class MDRIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data_url = self._search_regex(
|
data_url = self._search_regex(
|
||||||
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
|
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
|
||||||
webpage, 'data url', default=None, group='url').replace('\/', '/')
|
webpage, 'data url', group='url').replace('\/', '/')
|
||||||
|
|
||||||
doc = self._download_xml(
|
doc = self._download_xml(
|
||||||
compat_urlparse.urljoin(url, data_url), video_id)
|
compat_urlparse.urljoin(url, data_url), video_id)
|
||||||
|
@ -81,6 +81,9 @@ class MetacafeIE(InfoExtractor):
|
|||||||
'title': 'Open: This is Face the Nation, February 9',
|
'title': 'Open: This is Face the Nation, February 9',
|
||||||
'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
|
'description': 'md5:8a9ceec26d1f7ed6eab610834cc1a476',
|
||||||
'duration': 96,
|
'duration': 96,
|
||||||
|
'uploader': 'CBSI-NEW',
|
||||||
|
'upload_date': '20140209',
|
||||||
|
'timestamp': 1391959800,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
class MetacriticIE(InfoExtractor):
|
class MetacriticIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
|
_VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3698222',
|
'id': '3698222',
|
||||||
@ -20,7 +20,17 @@ class MetacriticIE(InfoExtractor):
|
|||||||
'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
||||||
'duration': 221,
|
'duration': 221,
|
||||||
},
|
},
|
||||||
}
|
'skip': 'Not providing trailers anymore',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5740315',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
|
||||||
|
'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
|
||||||
|
'duration': 114,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
64
youtube_dl/extractor/mgtv.py
Normal file
64
youtube_dl/extractor/mgtv.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class MGTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||||
|
IE_DESC = '芒果TV'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||||
|
'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3116640',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
|
||||||
|
'description': '我是歌手第四季双年巅峰会',
|
||||||
|
'duration': 7461,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
api_data = self._download_json(
|
||||||
|
'http://v.api.mgtv.com/player/video', video_id,
|
||||||
|
query={'video_id': video_id})['data']
|
||||||
|
info = api_data['info']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for idx, stream in enumerate(api_data['stream']):
|
||||||
|
stream_url = stream.get('url')
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
tbr = int_or_none(self._search_regex(
|
||||||
|
r'(\d+)\.mp4', stream_url, 'tbr', default=None))
|
||||||
|
|
||||||
|
def extract_format(stream_url, format_id, idx, query={}):
|
||||||
|
format_info = self._download_json(
|
||||||
|
stream_url, video_id,
|
||||||
|
note='Download video info for format %s' % format_id or '#%d' % idx, query=query)
|
||||||
|
return {
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': format_info['info'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'tbr': tbr,
|
||||||
|
}
|
||||||
|
|
||||||
|
formats.append(extract_format(
|
||||||
|
stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
|
||||||
|
formats.append(extract_format(stream_url.replace(
|
||||||
|
'/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info['title'].strip(),
|
||||||
|
'formats': formats,
|
||||||
|
'description': info.get('desc'),
|
||||||
|
'duration': int_or_none(info.get('duration')),
|
||||||
|
'thumbnail': info.get('thumb'),
|
||||||
|
}
|
192
youtube_dl/extractor/microsoftvirtualacademy.py
Normal file
192
youtube_dl/extractor/microsoftvirtualacademy.py
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_xpath,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
|
||||||
|
def _extract_base_url(self, course_id, display_id):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id,
|
||||||
|
display_id, 'Downloading course base URL')
|
||||||
|
|
||||||
|
def _extract_chapter_and_title(self, title):
|
||||||
|
if not title:
|
||||||
|
return None, None
|
||||||
|
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
|
||||||
|
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
|
||||||
|
IE_NAME = 'mva'
|
||||||
|
IE_DESC = 'Microsoft Virtual Academy videos'
|
||||||
|
_VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
|
||||||
|
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gfVXISmEB_6804984382',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Course Introduction',
|
||||||
|
'formats': 'mincount:3',
|
||||||
|
'subtitles': {
|
||||||
|
'en': [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'mva:11788:gfVXISmEB_6804984382',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
course_id = mobj.group('course_id')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
|
||||||
|
|
||||||
|
settings = self._download_xml(
|
||||||
|
'%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id),
|
||||||
|
video_id, 'Downloading video settings XML')
|
||||||
|
|
||||||
|
_, title = self._extract_chapter_and_title(xpath_text(
|
||||||
|
settings, './/Title', 'title', fatal=True))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for sources in settings.findall(compat_xpath('.//MediaSources')):
|
||||||
|
if sources.get('videoType') == 'smoothstreaming':
|
||||||
|
continue
|
||||||
|
for source in sources.findall(compat_xpath('./MediaSource')):
|
||||||
|
video_url = source.text
|
||||||
|
if not video_url or not video_url.startswith('http'):
|
||||||
|
continue
|
||||||
|
video_mode = source.get('videoMode')
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
|
||||||
|
codec = source.get('codec')
|
||||||
|
acodec, vcodec = [None] * 2
|
||||||
|
if codec:
|
||||||
|
codecs = codec.split(',')
|
||||||
|
if len(codecs) == 2:
|
||||||
|
acodec, vcodec = codecs
|
||||||
|
elif len(codecs) == 1:
|
||||||
|
vcodec = codecs[0]
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': video_mode,
|
||||||
|
'height': height,
|
||||||
|
'acodec': acodec,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for source in settings.findall(compat_xpath('.//MarkerResourceSource')):
|
||||||
|
subtitle_url = source.text
|
||||||
|
if not subtitle_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault('en', []).append({
|
||||||
|
'url': '%s/%s' % (base_url, subtitle_url),
|
||||||
|
'ext': source.get('type'),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
|
||||||
|
IE_NAME = 'mva:course'
|
||||||
|
IE_DESC = 'Microsoft Virtual Academy courses'
|
||||||
|
_VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11788',
|
||||||
|
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
|
||||||
|
},
|
||||||
|
'playlist_count': 36,
|
||||||
|
}, {
|
||||||
|
# with emphasized chapters
|
||||||
|
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16335',
|
||||||
|
'title': 'Developing Windows 10 Games with Construct 2',
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'mva:course:11788',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if MicrosoftVirtualAcademyIE.suitable(url) else super(
|
||||||
|
MicrosoftVirtualAcademyCourseIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
course_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
base_url = self._extract_base_url(course_id, display_id)
|
||||||
|
|
||||||
|
manifest = self._download_json(
|
||||||
|
'%s/imsmanifestlite.json' % base_url,
|
||||||
|
display_id, 'Downloading course manifest JSON')['manifest']
|
||||||
|
|
||||||
|
organization = manifest['organizations']['organization'][0]
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for chapter in organization['item']:
|
||||||
|
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
|
||||||
|
chapter_id = chapter.get('@identifier')
|
||||||
|
for item in chapter.get('item', []):
|
||||||
|
item_id = item.get('@identifier')
|
||||||
|
if not item_id:
|
||||||
|
continue
|
||||||
|
metadata = item.get('resource', {}).get('metadata') or {}
|
||||||
|
if metadata.get('learningresourcetype') != 'Video':
|
||||||
|
continue
|
||||||
|
_, title = self._extract_chapter_and_title(item.get('title'))
|
||||||
|
duration = parse_duration(metadata.get('duration'))
|
||||||
|
description = metadata.get('description')
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': smuggle_url(
|
||||||
|
'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}),
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'chapter': chapter_title,
|
||||||
|
'chapter_number': chapter_number,
|
||||||
|
'chapter_id': chapter_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
title = organization.get('title') or manifest.get('metadata', {}).get('title')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, course_id, title)
|
@ -1,8 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor):
|
|||||||
'id': '3453494717001',
|
'id': '3453494717001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Gospel by Numbers',
|
'title': 'The Gospel by Numbers',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'upload_date': '20140410',
|
||||||
'description': 'Coming soon from T4G 2014!',
|
'description': 'Coming soon from T4G 2014!',
|
||||||
'uploader': 'LifeWay Christian Resources (MG)',
|
'uploader_id': '2034960640001',
|
||||||
|
'timestamp': 1397145591,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['TDSLifeway'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
portlets_json = self._search_regex(
|
portlets = self._parse_json(self._search_regex(
|
||||||
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
|
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
|
||||||
portlets = json.loads(portlets_json)
|
video_id)
|
||||||
pl_id = self._search_regex(
|
pl_id = self._search_regex(
|
||||||
r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
|
r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
|
||||||
|
|
||||||
for i, portlet in enumerate(portlets):
|
for i, portlet in enumerate(portlets):
|
||||||
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
|
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
|
||||||
@ -46,12 +50,8 @@ class MinistryGridIE(InfoExtractor):
|
|||||||
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
|
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
|
||||||
default=None)
|
default=None)
|
||||||
if video_iframe_url:
|
if video_iframe_url:
|
||||||
surl = smuggle_url(
|
return self.url_result(
|
||||||
video_iframe_url, {'force_videoid': video_id})
|
smuggle_url(video_iframe_url, {'force_videoid': video_id}),
|
||||||
return {
|
video_id=video_id)
|
||||||
'_type': 'url',
|
|
||||||
'id': video_id,
|
|
||||||
'url': surl,
|
|
||||||
}
|
|
||||||
|
|
||||||
raise ExtractorError('Could not find video iframe in any portlets')
|
raise ExtractorError('Could not find video iframe in any portlets')
|
||||||
|
@ -15,9 +15,9 @@ class MiTeleIE(InfoExtractor):
|
|||||||
IE_DESC = 'mitele.es'
|
IE_DESC = 'mitele.es'
|
||||||
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TEST = {
|
||||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||||
'md5': '0ff1a13aebb35d9bc14081ff633dd324',
|
# MD5 is unstable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||||
'display_id': 'programa-144',
|
'display_id': 'programa-144',
|
||||||
@ -27,7 +27,7 @@ class MiTeleIE(InfoExtractor):
|
|||||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||||
'duration': 2913,
|
'duration': 2913,
|
||||||
},
|
},
|
||||||
}]
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
@ -1,27 +1,35 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import functools
|
||||||
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import (
|
||||||
|
compat_chr,
|
||||||
|
compat_ord,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
OnDemandPagedList,
|
||||||
NO_DEFAULT,
|
|
||||||
parse_count,
|
parse_count,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MixcloudIE(InfoExtractor):
|
class MixcloudIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
|
||||||
IE_NAME = 'mixcloud'
|
IE_NAME = 'mixcloud'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach-cryptkeeper',
|
'id': 'dholbach-cryptkeeper',
|
||||||
'ext': 'mp3',
|
'ext': 'm4a',
|
||||||
'title': 'Cryptkeeper',
|
'title': 'Cryptkeeper',
|
||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'Daniel Holbach',
|
||||||
@ -39,22 +47,22 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||||
'uploader': 'Gilles Peterson Worldwide',
|
'uploader': 'Gilles Peterson Worldwide',
|
||||||
'uploader_id': 'gillespeterson',
|
'uploader_id': 'gillespeterson',
|
||||||
'thumbnail': 're:https?://.*/images/',
|
'thumbnail': 're:https?://.*',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _check_url(self, url, track_id, ext):
|
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
|
||||||
try:
|
@staticmethod
|
||||||
# We only want to know if the request succeed
|
def _decrypt_play_info(play_info):
|
||||||
# don't download the whole file
|
KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
|
||||||
self._request_webpage(
|
|
||||||
HEADRequest(url), track_id,
|
play_info = base64.b64decode(play_info.encode('ascii'))
|
||||||
'Trying %s URL' % ext)
|
|
||||||
return True
|
return ''.join([
|
||||||
except ExtractorError:
|
compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
|
||||||
return False
|
for idx, ch in enumerate(play_info)])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -68,19 +76,15 @@ class MixcloudIE(InfoExtractor):
|
|||||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
||||||
webpage, 'error message', default=None)
|
webpage, 'error message', default=None)
|
||||||
|
|
||||||
preview_url = self._search_regex(
|
encrypted_play_info = self._search_regex(
|
||||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"',
|
r'm-play-info="([^"]+)"', webpage, 'play info')
|
||||||
webpage, 'preview url', default=None if message else NO_DEFAULT)
|
play_info = self._parse_json(
|
||||||
|
self._decrypt_play_info(encrypted_play_info), track_id)
|
||||||
|
|
||||||
if message:
|
if message and 'stream_url' not in play_info:
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||||
|
|
||||||
song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
|
song_url = play_info['stream_url']
|
||||||
song_url = song_url.replace('/previews/', '/c/originals/')
|
|
||||||
if not self._check_url(song_url, track_id, 'mp3'):
|
|
||||||
song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
|
||||||
if not self._check_url(song_url, track_id, 'm4a'):
|
|
||||||
raise ExtractorError('Unable to extract track url')
|
|
||||||
|
|
||||||
PREFIX = (
|
PREFIX = (
|
||||||
r'm-play-on-spacebar[^>]+'
|
r'm-play-on-spacebar[^>]+'
|
||||||
@ -115,3 +119,201 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MixcloudPlaylistBaseIE(InfoExtractor):
|
||||||
|
_PAGE_SIZE = 24
|
||||||
|
|
||||||
|
def _find_urls_in_page(self, page):
|
||||||
|
for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
|
||||||
|
yield self.url_result(
|
||||||
|
compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
|
||||||
|
MixcloudIE.ie_key())
|
||||||
|
|
||||||
|
def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
|
||||||
|
real_page_number = real_page_number or current_page + 1
|
||||||
|
return self._download_webpage(
|
||||||
|
'https://www.mixcloud.com/%s/' % path, video_id,
|
||||||
|
note='Download %s (page %d)' % (page_name, current_page + 1),
|
||||||
|
errnote='Unable to download %s' % page_name,
|
||||||
|
query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
|
||||||
|
headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||||
|
|
||||||
|
def _tracks_page_func(self, page, video_id, page_name, current_page):
|
||||||
|
resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
|
||||||
|
|
||||||
|
for item in self._find_urls_in_page(resp):
|
||||||
|
yield item
|
||||||
|
|
||||||
|
def _get_user_description(self, page_content):
|
||||||
|
return self._html_search_regex(
|
||||||
|
r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
|
||||||
|
page_content, 'user description', fatal=False)
|
||||||
|
|
||||||
|
|
||||||
|
class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
|
||||||
|
IE_NAME = 'mixcloud:user'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.mixcloud.com/dholbach/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dholbach_uploads',
|
||||||
|
'title': 'Daniel Holbach (uploads)',
|
||||||
|
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.mixcloud.com/dholbach/uploads/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dholbach_uploads',
|
||||||
|
'title': 'Daniel Holbach (uploads)',
|
||||||
|
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.mixcloud.com/dholbach/favorites/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dholbach_favorites',
|
||||||
|
'title': 'Daniel Holbach (favorites)',
|
||||||
|
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'playlist_items': '1-100',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 100,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.mixcloud.com/dholbach/listens/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dholbach_listens',
|
||||||
|
'title': 'Daniel Holbach (listens)',
|
||||||
|
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'playlist_items': '1-100',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 100,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
user_id = mobj.group('user')
|
||||||
|
list_type = mobj.group('type')
|
||||||
|
|
||||||
|
# if only a profile URL was supplied, default to download all uploads
|
||||||
|
if list_type is None:
|
||||||
|
list_type = 'uploads'
|
||||||
|
|
||||||
|
video_id = '%s_%s' % (user_id, list_type)
|
||||||
|
|
||||||
|
profile = self._download_webpage(
|
||||||
|
'https://www.mixcloud.com/%s/' % user_id, video_id,
|
||||||
|
note='Downloading user profile',
|
||||||
|
errnote='Unable to download user profile')
|
||||||
|
|
||||||
|
username = self._og_search_title(profile)
|
||||||
|
description = self._get_user_description(profile)
|
||||||
|
|
||||||
|
entries = OnDemandPagedList(
|
||||||
|
functools.partial(
|
||||||
|
self._tracks_page_func,
|
||||||
|
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
|
||||||
|
self._PAGE_SIZE, use_cache=True)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, video_id, '%s (%s)' % (username, list_type), description)
|
||||||
|
|
||||||
|
|
||||||
|
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
|
||||||
|
IE_NAME = 'mixcloud:playlist'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'RedBullThre3style_tokyo-finalists-2015',
|
||||||
|
'title': 'National Champions 2015',
|
||||||
|
'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 16,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'maxvibes_jazzcat-on-ness-radio',
|
||||||
|
'title': 'Jazzcat on Ness Radio',
|
||||||
|
'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 23
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
user_id = mobj.group('user')
|
||||||
|
playlist_id = mobj.group('playlist')
|
||||||
|
video_id = '%s_%s' % (user_id, playlist_id)
|
||||||
|
|
||||||
|
profile = self._download_webpage(
|
||||||
|
url, user_id,
|
||||||
|
note='Downloading playlist page',
|
||||||
|
errnote='Unable to download playlist page')
|
||||||
|
|
||||||
|
description = self._get_user_description(profile)
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
|
||||||
|
profile, 'playlist title')
|
||||||
|
|
||||||
|
entries = OnDemandPagedList(
|
||||||
|
functools.partial(
|
||||||
|
self._tracks_page_func,
|
||||||
|
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
|
||||||
|
self._PAGE_SIZE)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id, playlist_title, description)
|
||||||
|
|
||||||
|
|
||||||
|
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
|
||||||
|
IE_NAME = 'mixcloud:stream'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.mixcloud.com/FirstEar/stream/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'FirstEar',
|
||||||
|
'title': 'First Ear',
|
||||||
|
'description': 'Curators of good music\nfirstearmusic.com',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 192,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, user_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
prev_page_url = None
|
||||||
|
|
||||||
|
def _handle_page(page):
|
||||||
|
entries.extend(self._find_urls_in_page(page))
|
||||||
|
return self._search_regex(
|
||||||
|
r'm-next-page-url="([^"]+)"', page,
|
||||||
|
'next page URL', default=None)
|
||||||
|
|
||||||
|
next_page_url = _handle_page(webpage)
|
||||||
|
|
||||||
|
for idx in itertools.count(0):
|
||||||
|
if not next_page_url or prev_page_url == next_page_url:
|
||||||
|
break
|
||||||
|
|
||||||
|
prev_page_url = next_page_url
|
||||||
|
current_page = int(self._search_regex(
|
||||||
|
r'\?page=(\d+)', next_page_url, 'next page number'))
|
||||||
|
|
||||||
|
next_page_url = _handle_page(self._fetch_tracks_page(
|
||||||
|
'%s/stream' % user_id, user_id, 'stream', idx,
|
||||||
|
real_page_number=current_page))
|
||||||
|
|
||||||
|
username = self._og_search_title(webpage)
|
||||||
|
description = self._get_user_description(webpage)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, user_id, username, description)
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user