Compare commits
469 Commits
2016.08.13
...
2016.10.02
Author | SHA1 | Date | |
---|---|---|---|
6c152ce20f | |||
26406d33c7 | |||
703b3afa93 | |||
99ed78c79e | |||
fd15264172 | |||
bd26441205 | |||
b19e275d99 | |||
f6ba581f89 | |||
6d2549fb4f | |||
4da4516973 | |||
e1e97c2446 | |||
53a7e3d287 | |||
d54739a2e6 | |||
63e0fd5bcc | |||
9c51a24642 | |||
9bd7bd0b80 | |||
4a76b73c6c | |||
e295618f9e | |||
d7753d1948 | |||
eaf9b22f94 | |||
a1001f47fc | |||
1609782258 | |||
de6babf922 | |||
b0582fc806 | |||
af33dd8ee7 | |||
70d7b323b6 | |||
a7ee8a00f4 | |||
c6eed6b8c0 | |||
3aa3953d28 | |||
efa97bdcf1 | |||
475f8a4580 | |||
93aa0b6318 | |||
0ce26ef228 | |||
0d72ff9c51 | |||
a56e74e271 | |||
f533490bb7 | |||
8bfda726c2 | |||
8f0cf20ab9 | |||
c8f45f763c | |||
dd2cffeeec | |||
cdfcc4ce95 | |||
e384552590 | |||
1a2fbe322e | |||
f9dd86a112 | |||
2342733f85 | |||
93933c9819 | |||
d75d9e343e | |||
72c3d02d29 | |||
d3dbb46330 | |||
fffb9cff94 | |||
d3c97bad61 | |||
2d5b4af007 | |||
f1ee462c82 | |||
5742c18bc1 | |||
ddb19772d5 | |||
a3d8b38168 | |||
e590b7ff9e | |||
f3625cc4ca | |||
2d3d29976b | |||
493353c7fd | |||
0a078550b9 | |||
f92bb612c6 | |||
ddde91952f | |||
63c583eb2c | |||
7fd57de6fb | |||
e71a450956 | |||
27e99078d3 | |||
6f126d903f | |||
7518a61d41 | |||
8e45e1cc4d | |||
f0bc5a8609 | |||
a54ffb8aa7 | |||
8add4bfecb | |||
0711995bca | |||
5968d7d2fe | |||
e6332059ac | |||
8eec691e8a | |||
24628cf7db | |||
71ad00c09f | |||
45cae3b021 | |||
4ddcb5999d | |||
628406db96 | |||
e3d6bdc8fc | |||
0a439c5c4c | |||
1978540a51 | |||
12f211d0cb | |||
3a5a18705f | |||
1ae0ae5db0 | |||
f62a77b99a | |||
4bfd294e2f | |||
e33a7253b2 | |||
c38f06818d | |||
cb57386873 | |||
59fd8f931d | |||
70b4cf9b1b | |||
cc764a6da8 | |||
d8dbf8707d | |||
a1da888d0c | |||
3acff9423d | |||
9ca93b99d1 | |||
14ae11efab | |||
190d2027d0 | |||
26394d021d | |||
30d0b549be | |||
86f4d14f81 | |||
21d21b0c72 | |||
b4c1d6e800 | |||
a0d5077c8d | |||
584d6f3457 | |||
e14c82bd6b | |||
c51a7f0b2f | |||
d05ef09d9d | |||
30d9e20938 | |||
fc86d4eed0 | |||
7d273a387a | |||
6ad0219556 | |||
98b7506e96 | |||
52dc8a9b3f | |||
9d8985a165 | |||
f5e008d134 | |||
e6bf3621e7 | |||
490b755769 | |||
1dec2c8a0e | |||
dcce092e0a | |||
32443dd346 | |||
2133565cec | |||
1da50aa34e | |||
d2522b86ac | |||
537f753399 | |||
c849836854 | |||
eb5b1fc021 | |||
95be29e1c6 | |||
c035dba19e | |||
87148bb711 | |||
797c636bcb | |||
0002962f3f | |||
3e4185c396 | |||
f6717dec8a | |||
a942d6cb48 | |||
961516bfd1 | |||
6db354a9f4 | |||
353f340e11 | |||
014b7e6b25 | |||
925194022c | |||
b690ea15eb | |||
5712c0f426 | |||
86d68f906e | |||
4875ff6847 | |||
1b6712ab23 | |||
8414c2da31 | |||
45396dd2ed | |||
7a7309219c | |||
fcba157e80 | |||
a6ccc3e518 | |||
1d16035bb4 | |||
e8bcd982cc | |||
a5ff05df1a | |||
d002e91986 | |||
546edb2efa | |||
be45730226 | |||
ee7e672eb0 | |||
0307d6fba6 | |||
fc150cba1d | |||
d667ab7fad | |||
eb87d4545a | |||
1c81476cbb | |||
bc9186c882 | |||
6599c72527 | |||
6bb05b32a9 | |||
fea74acad8 | |||
f01115c933 | |||
2cdbc06a1f | |||
2cb93afcd8 | |||
bfcda07a27 | |||
001a5fd3d7 | |||
1e35999c1e | |||
2512b17493 | |||
56c0ead4d3 | |||
7324243750 | |||
84a18e9b90 | |||
b29f842e0e | |||
f009fcac0d | |||
6c3affcb18 | |||
1e19ff2984 | |||
c6129feb7f | |||
bb5ebd4453 | |||
cb9cbd84ed | |||
4d5726b0d7 | |||
4614ad7b59 | |||
b717837190 | |||
2abad67e52 | |||
ad0e2b3359 | |||
37720844f6 | |||
6cfcb8ac36 | |||
7a979da8cb | |||
2fdc7b0e04 | |||
010d034fca | |||
02e552886f | |||
25042f7372 | |||
3f612f0767 | |||
17bf6e71cc | |||
881f35479d | |||
89f257d6e5 | |||
e78a5428b6 | |||
6656a82481 | |||
d7e794928d | |||
9c27188988 | |||
b84d311d53 | |||
f87feb4b68 | |||
2841bdcebb | |||
84b91dd4e3 | |||
92c9c2a88b | |||
9d54b02bae | |||
846d8b76a0 | |||
aa3f9fe695 | |||
8258f4457c | |||
948cd5b72d | |||
8d3737cda7 | |||
155bc674c4 | |||
c33c962adf | |||
bdcc046d12 | |||
a493f10208 | |||
f3eeaacb4e | |||
b4d6a85d60 | |||
0b36a96212 | |||
bc22a79694 | |||
340e31ca74 | |||
973dee491f | |||
1f85029d82 | |||
95be19d436 | |||
95843da529 | |||
abf2c79f95 | |||
b49ad71ce1 | |||
9127e1533d | |||
78e762d23c | |||
4809490108 | |||
8112bfeaba | |||
d9606d9b6c | |||
433af6ad30 | |||
feaa5ad787 | |||
100bd86a68 | |||
0def758782 | |||
919cf1a62f | |||
b29cd56591 | |||
622638512b | |||
37c7490ac6 | |||
091624f9da | |||
7e5dc339de | |||
4a69fa04e0 | |||
2e99cd30c3 | |||
25afc2a783 | |||
9603b66012 | |||
45aab4d30b | |||
ed2bfe93aa | |||
cdc783510b | |||
cf0efe9636 | |||
dedb177029 | |||
86c3bbbced | |||
4b3a607658 | |||
3a7d35b982 | |||
6496ccb413 | |||
3fcce30289 | |||
c2b2c7e138 | |||
dacb3a864a | |||
6066d03db0 | |||
6562d34a8c | |||
5e9e3d0f6b | |||
349fc5c705 | |||
2c3e0af93e | |||
6150502e47 | |||
b207d5ebd4 | |||
4191779dcd | |||
f97ec8bcb9 | |||
8276d3b87a | |||
af95ee94b4 | |||
8fb6af6bba | |||
f6af0f888b | |||
e816c9d158 | |||
9250181f37 | |||
f096ec2625 | |||
4c8ab6fd71 | |||
05d4612947 | |||
746a695b36 | |||
165c54e97d | |||
2896dd73bc | |||
f8fd510eb4 | |||
7a3e849f6e | |||
196c6ba067 | |||
165620e320 | |||
4fd350611c | |||
263fef43de | |||
a249ab83cb | |||
f7043ef39c | |||
64fc49aba0 | |||
245023a861 | |||
3c77a54d5d | |||
da30a20a4d | |||
1fe48afea5 | |||
42e05be867 | |||
fe45b0e060 | |||
a06e1498aa | |||
5a80e7b43a | |||
3fb2a23029 | |||
7be15d4097 | |||
cd10b3ea63 | |||
547993dcd0 | |||
6c9b71bc08 | |||
93b8404599 | |||
9ba1e1dcc0 | |||
b8079a40bc | |||
5bc8a73af6 | |||
b3eaeded12 | |||
ec65b391cb | |||
2982514072 | |||
98908bcf7c | |||
04b32c8f96 | |||
40eec6b15c | |||
39efc6e3e0 | |||
1198fe14a1 | |||
71e90766b5 | |||
d7aae610f6 | |||
92c27a0dbf | |||
d181cff685 | |||
3b4b82d4ce | |||
545ef4f531 | |||
906b87cf5f | |||
b281aad2dc | |||
6b18a24e6e | |||
c9de980106 | |||
f9b373afda | |||
298a120ab7 | |||
e3faecde30 | |||
a0f071a50d | |||
20bad91d76 | |||
b54a2da433 | |||
dc2c37f316 | |||
c1f62dd338 | |||
5a3efcd27c | |||
4c8f9c2577 | |||
f26a298247 | |||
ea01cdbf61 | |||
6a76b53355 | |||
d37708fc86 | |||
5c13c28566 | |||
f70e9229e6 | |||
30afe4aeb2 | |||
75fa990dc6 | |||
f39ffc5877 | |||
07ea9c9b05 | |||
073ac1225f | |||
0c6422cdd6 | |||
08773689f3 | |||
0c75abbb7b | |||
97653f81b2 | |||
d38b27dd9b | |||
6d94cbd2f4 | |||
30317f4887 | |||
8c3e35dd44 | |||
c86f51ee38 | |||
6e52bbb413 | |||
05bddcc512 | |||
1212e9972f | |||
ccb6570e9e | |||
18b6216150 | |||
fb009b7f53 | |||
3083e4dc07 | |||
7367bdef23 | |||
ad31642584 | |||
c7c43a93ba | |||
96229e5f95 | |||
55d119e2a1 | |||
6d2679ee26 | |||
afbab5688e | |||
3d897cc791 | |||
cf143c4d97 | |||
ad120ae1c5 | |||
d0fa172e5f | |||
f97f9f71e5 | |||
526656726b | |||
9b8c554ea7 | |||
d13bfc07b7 | |||
efe470e261 | |||
e3f6b56909 | |||
b1e676fde8 | |||
92d4cfa358 | |||
3d47ee0a9e | |||
d164a0d41b | |||
db29af6d36 | |||
2c6acdfd2d | |||
fddaa76a59 | |||
a809446750 | |||
d8f30a7e66 | |||
5b1d85754e | |||
e25586e471 | |||
292a2301bf | |||
dabe15701b | |||
4245f55880 | |||
5b9d187cc6 | |||
39e1c4f08c | |||
19f35402c5 | |||
70852b47ca | |||
a9a3b4a081 | |||
ecc90093f9 | |||
520251c093 | |||
55af45fcab | |||
b82232036a | |||
e4659b4547 | |||
9e5751b9fe | |||
bd1bcd3ea0 | |||
93a63b36f1 | |||
8b2dc4c328 | |||
850837b67a | |||
13585d7682 | |||
fd3ec986a4 | |||
b0d578ff7b | |||
b0c8f2e9c8 | |||
51815886a9 | |||
08a42f9c74 | |||
e15ad9ef09 | |||
4e9fee1015 | |||
7273e5849b | |||
b505e98784 | |||
92cd9fd565 | |||
b3d7dce429 | |||
a44694ab4e | |||
ab19b46b88 | |||
8804f10e6b | |||
6be17c0870 | |||
8652770bd2 | |||
2a1321a272 | |||
9c0fa60bf3 | |||
502d87c546 | |||
b35b0d73d8 | |||
6e7e4a6edf | |||
53fef319f1 | |||
2cabee2a7d | |||
11f502fac1 | |||
98affc1a48 | |||
70a2829fee | |||
837e56c8ee | |||
b5ddee8c77 | |||
fb64adcbd3 | |||
4f640f2890 | |||
254e64a20a | |||
818ac213eb | |||
cbef4d5c9f | |||
bf90c46790 | |||
69eb4d699f | |||
6d8ec8c3b7 | |||
760845ce99 | |||
5c2d087221 | |||
b6c4e36728 | |||
1a57b8c18c | |||
24eb13b1c6 | |||
525e0316c0 | |||
7e60ce9cf7 | |||
e811bcf8f8 | |||
6103f59095 | |||
9fa5789279 | |||
d2ac04674d | |||
1fd6e30988 | |||
884cdb6cd9 | |||
9771b1f901 | |||
2118fdd1a9 | |||
320d597c21 | |||
aaf44a2f47 | |||
fafabc0712 | |||
409760a932 | |||
097eba019d |
8
.github/ISSUE_TEMPLATE.md
vendored
8
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.13**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.08.13
|
||||
[debug] youtube-dl version 2016.10.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
|
||||
### Description of your *issue*, suggested solution and other information
|
||||
|
||||
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
||||
If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
|
||||
If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
|
||||
|
2
.github/ISSUE_TEMPLATE_tmpl.md
vendored
2
.github/ISSUE_TEMPLATE_tmpl.md
vendored
@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
|
||||
### Description of your *issue*, suggested solution and other information
|
||||
|
||||
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
||||
If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
|
||||
If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
|
||||
|
5
.github/PULL_REQUEST_TEMPLATE.md
vendored
5
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -10,8 +10,13 @@
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
|
||||
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
|
||||
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
|
||||
- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence)
|
||||
|
||||
### What is the purpose of your *pull request*?
|
||||
- [ ] Bug fix
|
||||
- [ ] Improvement
|
||||
- [ ] New extractor
|
||||
- [ ] New feature
|
||||
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -29,6 +29,7 @@ updates_key.pem
|
||||
*.m4a
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.3gp
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
|
4
AUTHORS
4
AUTHORS
@ -181,3 +181,7 @@ Nehal Patel
|
||||
Rob van Bekkum
|
||||
Petr Zvoníček
|
||||
Pratyush Singh
|
||||
Aleksander Nitecki
|
||||
Sebastian Blunt
|
||||
Matěj Cepl
|
||||
Xie Yanbo
|
||||
|
366
ChangeLog
366
ChangeLog
@ -1,3 +1,368 @@
|
||||
version 2016.10.02
|
||||
|
||||
Core
|
||||
* Fix possibly lost extended attributes during post-processing
|
||||
+ Support pyxattr as well as python-xattr for --xattrs and
|
||||
--xattr-set-filesize (#9054)
|
||||
|
||||
Extractors
|
||||
+ [jwplatform] Support DASH streams in JWPlayer
|
||||
+ [jwplatform] Support old-style JWPlayer playlists
|
||||
+ [byutv:event] Add extractor
|
||||
* [periscope:user] Fix extraction (#10820)
|
||||
* [dctp] Fix extraction (#10734)
|
||||
+ [instagram] Extract video dimensions (#10790)
|
||||
+ [tvland] Extend URL regular expression (#10812)
|
||||
+ [vgtv] Add support for tv.aftonbladet.se (#10800)
|
||||
- [aftonbladet] Remove extractor
|
||||
* [vk] Fix timestamp and view count extraction (#10760)
|
||||
+ [vk] Add support for running and finished live streams (#10799)
|
||||
+ [leeco] Recognize more Le Sports URLs (#10794)
|
||||
+ [instagram] Extract comments (#10788)
|
||||
+ [ketnet] Extract mzsource formats (#10770)
|
||||
* [limelight:media] Improve HTTP formats extraction
|
||||
|
||||
|
||||
version 2016.09.27
|
||||
|
||||
Core
|
||||
+ Add hdcore query parameter to akamai f4m formats
|
||||
+ Delegate HLS live streams downloading to ffmpeg
|
||||
+ Improved support for HTML5 subtitles
|
||||
|
||||
Extractors
|
||||
+ [vk] Add support for dailymotion embeds (#10661)
|
||||
* [promptfile] Fix extraction (#10634)
|
||||
* [kaltura] Speed up embed regular expressions (#10764)
|
||||
+ [npo] Add support for anderetijden.nl (#10754)
|
||||
+ [prosiebensat1] Add support for advopedia sites
|
||||
* [mwave] Relax URL regular expression (#10735, #10748)
|
||||
* [prosiebensat1] Fix playlist support (#10745)
|
||||
+ [prosiebensat1] Add support for sat1gold sites (#10745)
|
||||
+ [cbsnews:livevideo] Fix extraction and extract m3u8 formats
|
||||
+ [brightcove:new] Add support for live streams
|
||||
* [soundcloud] Generalize playlist entries extraction (#10733)
|
||||
+ [mtv] Add support for new URL schema (#8169, #9808)
|
||||
* [einthusan] Fix extraction (#10714)
|
||||
+ [twitter] Support Periscope embeds (#10737)
|
||||
+ [openload] Support subtitles (#10625)
|
||||
|
||||
|
||||
version 2016.09.24
|
||||
|
||||
Core
|
||||
+ Add support for watchTVeverywhere.com authentication provider based MSOs for
|
||||
Adobe Pass authentication (#10709)
|
||||
|
||||
Extractors
|
||||
+ [soundcloud:playlist] Provide video id for early playlist entries (#10733)
|
||||
+ [prosiebensat1] Add support for kabeleinsdoku (#10732)
|
||||
* [cbs] Extract info from thunder videoPlayerService (#10728)
|
||||
* [openload] Fix extraction (#10408)
|
||||
+ [ustream] Support the new HLS streams (#10698)
|
||||
+ [ooyala] Extract all HLS formats
|
||||
+ [cartoonnetwork] Add support for Adobe Pass authentication
|
||||
+ [soundcloud] Extract license metadata
|
||||
+ [fox] Add support for Adobe Pass authentication (#8584)
|
||||
+ [tbs] Add support for Adobe Pass authentication (#10642, #10222)
|
||||
+ [trutv] Add support for Adobe Pass authentication (#10519)
|
||||
+ [turner] Add support for Adobe Pass authentication
|
||||
|
||||
|
||||
version 2016.09.19
|
||||
|
||||
Extractors
|
||||
+ [crunchyroll] Check if already authenticated (#10700)
|
||||
- [twitch:stream] Remove fallback to profile extraction when stream is offline
|
||||
* [thisav] Improve title extraction (#10682)
|
||||
* [vyborymos] Improve station info extraction
|
||||
|
||||
|
||||
version 2016.09.18
|
||||
|
||||
Core
|
||||
+ Introduce manifest_url and fragments fields in formats dictionary for
|
||||
fragmented media
|
||||
+ Provide manifest_url field for DASH segments, HLS and HDS
|
||||
+ Provide fragments field for DASH segments
|
||||
* Rework DASH segments downloader to use fragments field
|
||||
+ Add helper method for Wowza Streaming Engine formats extraction
|
||||
|
||||
Extractors
|
||||
+ [vyborymos] Add extractor for vybory.mos.ru (#10692)
|
||||
+ [xfileshare] Add title regular expression for streamin.to (#10646)
|
||||
+ [globo:article] Add support for multiple videos (#10653)
|
||||
+ [thisav] Recognize HTML5 videos (#10447)
|
||||
* [jwplatform] Improve JWPlayer detection
|
||||
+ [mangomolo] Add support for Mangomolo embeds
|
||||
+ [toutv] Add support for authentication (#10669)
|
||||
* [franceinter] Fix upload date extraction
|
||||
* [tv4] Fix HLS and HDS formats extraction (#10659)
|
||||
|
||||
|
||||
version 2016.09.15
|
||||
|
||||
Core
|
||||
* Improve _hidden_inputs
|
||||
+ Introduce improved explicit Adobe Pass support
|
||||
+ Add --ap-mso to provide multiple-system operator identifier
|
||||
+ Add --ap-username to provide MSO account username
|
||||
+ Add --ap-password to provide MSO account password
|
||||
+ Add --ap-list-mso to list all supported MSOs
|
||||
+ Add support for Rogers Cable multiple-system operator (#10606)
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Fix authentication (#10655)
|
||||
* [twitch] Fix API calls (#10654, #10660)
|
||||
+ [bellmedia] Add support for more Bell Media Television sites
|
||||
* [franceinter] Fix extraction (#10538, #2105)
|
||||
* [kuwo] Improve error detection (#10650)
|
||||
+ [go] Add support for free full episodes (#10439)
|
||||
* [bilibili] Fix extraction for specific videos (#10647)
|
||||
* [nhk] Fix extraction (#10633)
|
||||
* [kaltura] Improve audio detection
|
||||
* [kaltura] Skip chun format
|
||||
+ [vimeo:ondemand] Pass Referer along with embed URL (#10624)
|
||||
+ [nbc] Add support for NBC Olympics (#10361)
|
||||
|
||||
|
||||
version 2016.09.11.1
|
||||
|
||||
Extractors
|
||||
+ [tube8] Extract categories and tags (#10579)
|
||||
+ [pornhub] Extract categories and tags (#10499)
|
||||
* [openload] Temporary fix (#10408)
|
||||
+ [foxnews] Add support Fox News articles (#10598)
|
||||
* [viafree] Improve video id extraction (#10615)
|
||||
* [iwara] Fix extraction after relaunch (#10462, #3215)
|
||||
+ [tfo] Add extractor for tfo.org
|
||||
* [lrt] Fix audio extraction (#10566)
|
||||
* [9now] Fix extraction (#10561)
|
||||
+ [canalplus] Add support for c8.fr (#10577)
|
||||
* [newgrounds] Fix uploader extraction (#10584)
|
||||
+ [polskieradio:category] Add support for category lists (#10576)
|
||||
+ [ketnet] Add extractor for ketnet.be (#10343)
|
||||
+ [canvas] Add support for een.be (#10605)
|
||||
+ [telequebec] Add extractor for telequebec.tv (#1999)
|
||||
* [parliamentliveuk] Fix extraction (#9137)
|
||||
|
||||
|
||||
version 2016.09.08
|
||||
|
||||
Extractors
|
||||
+ [jwplatform] Extract height from format label
|
||||
+ [yahoo] Extract Brightcove Legacy Studio embeds (#9345)
|
||||
* [videomore] Fix extraction (#10592)
|
||||
* [foxgay] Fix extraction (#10480)
|
||||
+ [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709)
|
||||
* [gamestar] Fix metadata extraction (#10479)
|
||||
* [puls4] Fix extraction (#10583)
|
||||
+ [cctv] Add extractor for CCTV and CNTV (#8153)
|
||||
+ [lci] Add extractor for lci.fr (#10573)
|
||||
+ [wat] Extract DASH formats
|
||||
+ [viafree] Improve video id detection (#10569)
|
||||
+ [trutv] Add extractor for trutv.com (#10519)
|
||||
+ [nick] Add support for nickelodeon.nl (#10559)
|
||||
+ [abcotvs:clips] Add support for clips.abcotvs.com
|
||||
+ [abcotvs] Add support for ABC Owned Television Stations sites (#9551)
|
||||
+ [miaopai] Add extractor for miaopai.com (#10556)
|
||||
* [gamestar] Fix metadata extraction (#10479)
|
||||
+ [bilibili] Add support for episodes (#10190)
|
||||
+ [tvnoe] Add extractor for tvnoe.cz (#10524)
|
||||
|
||||
|
||||
version 2016.09.04.1
|
||||
|
||||
Core
|
||||
* In DASH downloader if the first segment fails, abort the whole download
|
||||
process to prevent throttling (#10497)
|
||||
+ Add support for --skip-unavailable-fragments and --fragment retries in
|
||||
hlsnative downloader (#10165, #10448).
|
||||
+ Add support for --skip-unavailable-fragments in DASH downloader
|
||||
+ Introduce --skip-unavailable-fragments option for fragment based downloaders
|
||||
that allows to skip fragments unavailable due to a HTTP error
|
||||
* Fix extraction of video/audio entries with src attribute in
|
||||
_parse_html5_media_entries (#10540)
|
||||
|
||||
Extractors
|
||||
* [theplatform] Relax URL regular expression (#10546)
|
||||
* [youtube:playlist] Extend URL regular expression
|
||||
* [rottentomatoes] Delegate extraction to internetvideoarchive extractor
|
||||
* [internetvideoarchive] Extract all formats
|
||||
* [pornvoisines] Fix extraction (#10469)
|
||||
* [rottentomatoes] Fix extraction (#10467)
|
||||
* [espn] Extend URL regular expression (#10549)
|
||||
* [vimple] Extend URL regular expression (#10547)
|
||||
* [youtube:watchlater] Fix extraction (#10544)
|
||||
* [youjizz] Fix extraction (#10437)
|
||||
+ [foxnews] Add support for FoxNews Insider (#10445)
|
||||
+ [fc2] Recognize Flash player URLs (#10512)
|
||||
|
||||
|
||||
version 2016.09.03
|
||||
|
||||
Core
|
||||
* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
|
||||
_extract_m3u8_formats (#10522)
|
||||
* Handle semicolon in mimetype2ext
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add support for rental videos' previews (#10532)
|
||||
* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
|
||||
no playlist is actually served (#10537)
|
||||
+ [drtv] Add support for dr.dk/nyheder (#10536)
|
||||
+ [facebook:plugins:video] Add extractor (#10530)
|
||||
+ [go] Add extractor for *.go.com sites
|
||||
* [adobepass] Check for authz_token expiration (#10527)
|
||||
* [nytimes] improve extraction
|
||||
* [thestar] Fix extraction (#10465)
|
||||
* [glide] Fix extraction (#10478)
|
||||
- [exfm] Remove extractor (#10482)
|
||||
* [youporn] Fix categories and tags extraction (#10521)
|
||||
+ [curiositystream] Add extractor for app.curiositystream.com
|
||||
- [thvideo] Remove extractor (#10464)
|
||||
* [movingimage] Fix for the new site name (#10466)
|
||||
+ [cbs] Add support for once formats (#10515)
|
||||
* [limelight] Skip ism snd duplicate manifests
|
||||
+ [porncom] Extract categories and tags (#10510)
|
||||
+ [facebook] Extract timestamp (#10508)
|
||||
+ [yahoo] Extract more formats
|
||||
|
||||
|
||||
version 2016.08.31
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
|
||||
* [bandcamp:album] Fix title extraction (#10455)
|
||||
* [pyvideo] Fix extraction (#10468)
|
||||
+ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
|
||||
* [9c9media] Extract more metadata
|
||||
* [9c9media] Fix multiple stacks extraction (#10016)
|
||||
* [adultswim] Improve video info extraction (#10492)
|
||||
* [vodplatform] Improve embed regular expression
|
||||
- [played] Remove extractor (#10470)
|
||||
+ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
|
||||
+ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
|
||||
* [adultswim] Rework in terms of turner extractor
|
||||
* [cnn] Rework in terms of turner extractor
|
||||
* [nba] Rework in terms of turner extractor
|
||||
+ [turner] Add base extractor for Turner Broadcasting System based sites
|
||||
* [bilibili] Fix extraction (#10375)
|
||||
* [openload] Fix extraction (#10408)
|
||||
|
||||
|
||||
version 2016.08.28
|
||||
|
||||
Core
|
||||
+ Add warning message that ffmpeg doesn't support SOCKS
|
||||
* Improve thumbnail sorting
|
||||
+ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats
|
||||
* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative
|
||||
+ Add ac-3 to the list of audio codecs in parse_codecs
|
||||
|
||||
Extractors
|
||||
* [periscope:user] Fix extraction (#10453)
|
||||
* [douyutv] Fix extraction (#10153, #10318, #10444)
|
||||
+ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424)
|
||||
- [trutube] Remove extractor (#10438)
|
||||
+ [usanetwork] Add extractor for usanetwork.com
|
||||
* [crackle] Fix extraction (#10333)
|
||||
* [spankbang] Fix description and uploader extraction (#10339)
|
||||
* [discoverygo] Detect cable provider restricted videos (#10425)
|
||||
+ [cbc] Add support for watch.cbc.ca
|
||||
* [kickstarter] Silent the warning for og:description (#10415)
|
||||
* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
|
||||
|
||||
|
||||
version 2016.08.24.1
|
||||
|
||||
Extractors
|
||||
+ [pluralsight] Add support for subtitles (#9681)
|
||||
|
||||
|
||||
version 2016.08.24
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix authentication (#10392)
|
||||
* [openload] Fix extraction (#10408)
|
||||
+ [bravotv] Add support for Adobe Pass (#10407)
|
||||
* [bravotv] Fix clip info extraction (#10407)
|
||||
* [eagleplatform] Improve embedded videos detection (#10409)
|
||||
* [awaan] Fix extraction
|
||||
* [mtvservices:embedded] Update config URL
|
||||
+ [abc:iview] Add extractor (#6148)
|
||||
|
||||
|
||||
version 2016.08.22
|
||||
|
||||
Core
|
||||
* Improve formats and subtitles extension auto calculation
|
||||
+ Recognize full unit names in parse_filesize
|
||||
+ Add support for m3u8 manifests in HTML5 multimedia tags
|
||||
* Fix octal/hexadecimal number detection in js_to_json
|
||||
|
||||
Extractors
|
||||
+ [ivi] Add support for 720p and 1080p
|
||||
+ [charlierose] Add new extractor (#10382)
|
||||
* [1tv] Fix extraction (#9249)
|
||||
* [twitch] Renew authentication
|
||||
* [kaltura] Improve subtitles extension calculation
|
||||
+ [zingmp3] Add support for video clips
|
||||
* [zingmp3] Fix extraction (#10041)
|
||||
* [kaltura] Improve subtitles extraction (#10279)
|
||||
* [cultureunplugged] Fix extraction (#10330)
|
||||
+ [cnn] Add support for money.cnn.com (#2797)
|
||||
* [cbsnews] Fix extraction (#10362)
|
||||
* [cbs] Fix extraction (#10393)
|
||||
+ [litv] Support 'promo' URLs (#10385)
|
||||
* [snotr] Fix extraction (#10338)
|
||||
* [n-tv.de] Fix extraction (#10331)
|
||||
* [globo:article] Relax URL and video id regular expressions (#10379)
|
||||
|
||||
|
||||
version 2016.08.19
|
||||
|
||||
Core
|
||||
- Remove output template description from --help
|
||||
* Recognize lowercase units in parse_filesize
|
||||
|
||||
Extractors
|
||||
+ [porncom] Add extractor for porn.com (#2251, #10251)
|
||||
+ [generic] Add support for DBTV embeds
|
||||
* [vk:wallpost] Fix audio extraction for new site layout
|
||||
* [vk] Fix authentication
|
||||
+ [hgtvcom:show] Add extractor for hgtv.com shows (#10365)
|
||||
+ [discoverygo] Add support for another GO network sites
|
||||
|
||||
|
||||
version 2016.08.17
|
||||
|
||||
Core
|
||||
+ Add _get_netrc_login_info
|
||||
|
||||
Extractors
|
||||
* [mofosex] Extract all formats (#10335)
|
||||
+ [generic] Add support for vbox7 embeds
|
||||
+ [vbox7] Add support for embed URLs
|
||||
+ [viafree] Add extractor (#10358)
|
||||
+ [mtg] Add support for viafree URLs (#10358)
|
||||
* [theplatform] Extract all subtitles per language
|
||||
+ [xvideos] Fix HLS extraction (#10356)
|
||||
+ [amcnetworks] Add extractor
|
||||
+ [bbc:playlist] Add support for pagination (#10349)
|
||||
+ [fxnetworks] Add extractor (#9462)
|
||||
* [cbslocal] Fix extraction for SendtoNews-based videos
|
||||
* [sendtonews] Fix extraction
|
||||
* [jwplatform] Extract video id from JWPlayer data
|
||||
- [zippcast] Remove extractor (#10332)
|
||||
+ [viceland] Add extractor (#8799)
|
||||
+ [adobepass] Add base extractor for Adobe Pass Authentication
|
||||
* [life:embed] Improve extraction
|
||||
* [vgtv] Detect geo restricted videos (#10348)
|
||||
+ [uplynk] Add extractor
|
||||
* [xiami] Fix extraction (#10342)
|
||||
|
||||
|
||||
version 2016.08.13
|
||||
|
||||
Core
|
||||
@ -23,6 +388,7 @@ Extractors
|
||||
+ [pbs] Add support for high quality HTTP formats
|
||||
+ [crunchyroll] Add support for HLS formats (#10301)
|
||||
|
||||
|
||||
version 2016.08.12
|
||||
|
||||
Core
|
||||
|
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
|
102
README.md
102
README.md
@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||
--no-color Do not emit color codes in output
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
--fragment-retries RETRIES Number of retries for a fragment (default
|
||||
is 10), or "infinite" (DASH only)
|
||||
is 10), or "infinite" (DASH and hlsnative
|
||||
only)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
||||
hlsnative only)
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
@ -201,32 +206,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-a, --batch-file FILE File containing URLs to download ('-' for
|
||||
stdin)
|
||||
--id Use only video ID in file name
|
||||
-o, --output TEMPLATE Output filename template. Use %(title)s to
|
||||
get the title, %(uploader)s for the
|
||||
uploader name, %(uploader_id)s for the
|
||||
uploader nickname if different,
|
||||
%(autonumber)s to get an automatically
|
||||
incremented number, %(ext)s for the
|
||||
filename extension, %(format)s for the
|
||||
format description (like "22 - 1280x720" or
|
||||
"HD"), %(format_id)s for the unique id of
|
||||
the format (like YouTube's itags: "137"),
|
||||
%(upload_date)s for the upload date
|
||||
(YYYYMMDD), %(extractor)s for the provider
|
||||
(youtube, metacafe, etc), %(id)s for the
|
||||
video id, %(playlist_title)s,
|
||||
%(playlist_id)s, or %(playlist)s (=title if
|
||||
present, ID otherwise) for the playlist the
|
||||
video is in, %(playlist_index)s for the
|
||||
position in the playlist. %(height)s and
|
||||
%(width)s for the width and height of the
|
||||
video format. %(resolution)s for a textual
|
||||
description of the resolution of the video
|
||||
format. %% for a literal percent. Use - to
|
||||
output to stdout. Can also be used to
|
||||
download to a different directory, for
|
||||
example with -o '/my/downloads/%(uploader)s
|
||||
/%(title)s-%(id)s.%(ext)s' .
|
||||
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
||||
TEMPLATE" for all the info
|
||||
--autonumber-size NUMBER Specify the number of digits in
|
||||
%(autonumber)s when it is present in output
|
||||
filename template or --auto-number option
|
||||
@ -377,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
||||
|
||||
## Adobe Pass Options:
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
provider) identifier, use --ap-list-mso for
|
||||
a list of available MSOs
|
||||
--ap-username USERNAME Multiple-system operator account login
|
||||
--ap-password PASSWORD Multiple-system operator account password.
|
||||
If this option is left out, youtube-dl will
|
||||
ask interactively.
|
||||
--ap-list-mso List all supported multiple-system
|
||||
operators
|
||||
|
||||
## Post-processing Options:
|
||||
-x, --extract-audio Convert video files to audio-only files
|
||||
(requires ffmpeg or avconv and ffprobe or
|
||||
@ -436,11 +428,19 @@ You can configure youtube-dl by placing any supported command line option to a c
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
-x
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
# Lines starting with # are comments
|
||||
|
||||
# Always extract audio
|
||||
-x
|
||||
|
||||
# Do not copy the mtime
|
||||
--no-mtime
|
||||
|
||||
# Use this proxy
|
||||
--proxy 127.0.0.1:3128
|
||||
|
||||
# Save all videos under Movies directory in your home directory
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
@ -669,7 +669,11 @@ $ youtube-dl -f 'best[filesize<50M]'
|
||||
|
||||
# Download best format available via direct link over HTTP/HTTPS protocol
|
||||
$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
|
||||
|
||||
# Download the best video format and the best audio format without merging them
|
||||
$ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s'
|
||||
```
|
||||
Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name.
|
||||
|
||||
|
||||
# VIDEO SELECTION
|
||||
@ -750,7 +754,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
|
||||
|
||||
### I have downloaded a video but how can I play it?
|
||||
|
||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
|
||||
|
||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
||||
|
||||
@ -832,10 +836,42 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
||||
|
||||
### How do I pass cookies to youtube-dl?
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||
|
||||
### How do I stream directly to media player?
|
||||
|
||||
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
|
||||
|
||||
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
|
||||
|
||||
### How do I download only new videos from a playlist?
|
||||
|
||||
Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file.
|
||||
|
||||
For example, at first,
|
||||
|
||||
youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
|
||||
|
||||
will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any:
|
||||
|
||||
youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
|
||||
|
||||
### Should I add `--hls-prefer-native` into my config?
|
||||
|
||||
When youtube-dl detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/youtube-dl each handle some invalid cases better than the other, there is an option to switch the downloader if needed.
|
||||
|
||||
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
|
||||
|
||||
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
|
||||
|
||||
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
|
||||
|
||||
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||
|
||||
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||
|
@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e
|
||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||
|
||||
read -p "Is ChangeLog up to date? (y/n) " -n 1
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
||||
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make clean
|
||||
if $skip_tests ; then
|
||||
|
@ -13,12 +13,16 @@
|
||||
- **5min**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
- **9c9media:stack**
|
||||
- **9gag**
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
- **Abc7News**
|
||||
- **abc.net.au:iview**
|
||||
- **abcnews**
|
||||
- **abcnews:video**
|
||||
- **abcotvs**: ABC Owned Television Stations
|
||||
- **abcotvs:clips**
|
||||
- **AcademicEarth:Course**
|
||||
- **acast**
|
||||
- **acast:channel**
|
||||
@ -30,11 +34,12 @@
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **AfreecaTV**: afreecatv.com
|
||||
- **Aftonbladet**
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AMCNetworks**
|
||||
- **anderetijden**: npo.nl and ntr.nl
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
@ -65,6 +70,10 @@
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **auroravid**: AuroraVid
|
||||
- **AWAAN**
|
||||
- **awaan:live**
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
@ -80,6 +89,7 @@
|
||||
- **BeatportPro**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
- **BellMedia**
|
||||
- **Bet**
|
||||
- **Bigflix**
|
||||
- **Bild**: Bild.de
|
||||
@ -101,6 +111,7 @@
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **BYUtvEvent**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
- **CamWithHer**
|
||||
@ -109,17 +120,22 @@
|
||||
- **Canvas**
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CBC**
|
||||
- **CBCPlayer**
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:watch**
|
||||
- **cbc.ca:watch:video**
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CCTV**
|
||||
- **CDA**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
- **CharlieRose**
|
||||
- **Chaturbate**
|
||||
- **Chilloutzone**
|
||||
- **chirbit**
|
||||
@ -155,10 +171,11 @@
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**
|
||||
- **curiositystream:collection**
|
||||
- **CWTV**
|
||||
- **DailyMail**
|
||||
- **dailymotion**
|
||||
@ -170,10 +187,6 @@
|
||||
- **daum.net:playlist**
|
||||
- **daum.net:user**
|
||||
- **DBTV**
|
||||
- **DCN**
|
||||
- **dcn:live**
|
||||
- **dcn:season**
|
||||
- **dcn:video**
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
@ -215,13 +228,14 @@
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **FacebookPluginsVideo**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **fc2:embed**
|
||||
- **Fczenit**
|
||||
- **features.aol.com**
|
||||
- **fernsehkritik.tv**
|
||||
@ -234,7 +248,9 @@
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **Foxgay**
|
||||
- **FoxNews**: Fox News and Fox Business Video
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **foxnews:insider**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
@ -247,6 +263,7 @@
|
||||
- **Funimation**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **FXNetworks**
|
||||
- **GameInformer**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
@ -262,6 +279,7 @@
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **Go**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **Golem**
|
||||
@ -277,6 +295,7 @@
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HGTV**
|
||||
- **hgtv.com:show**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
@ -309,6 +328,7 @@
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Iwara**
|
||||
- **Izlesene**
|
||||
- **JeuxVideo**
|
||||
- **Jove**
|
||||
@ -322,6 +342,7 @@
|
||||
- **KarriereVideos**
|
||||
- **keek**
|
||||
- **KeezMovies**
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KonserthusetPlay**
|
||||
@ -337,6 +358,7 @@
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **Laola1Tv**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
- **Le**: 乐视网
|
||||
@ -367,6 +389,8 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
@ -375,6 +399,7 @@
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
- **MGTV**: 芒果TV
|
||||
- **MiaoPai**
|
||||
- **Minhateca**
|
||||
- **MinistryGrid**
|
||||
- **Minoto**
|
||||
@ -396,10 +421,13 @@
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **MovingImage**
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **MTV**
|
||||
- **mtg**: MTG services
|
||||
- **mtv**
|
||||
- **mtv.de**
|
||||
- **mtv:video**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
@ -421,6 +449,7 @@
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **NBCOlympics**
|
||||
- **NBCSports**
|
||||
- **NBCSportsVPlayer**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
@ -442,6 +471,7 @@
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **NhkVod**
|
||||
- **nhl.com**
|
||||
- **nhl.com:news**: NHL news
|
||||
- **nhl.com:videocenter**
|
||||
@ -450,7 +480,6 @@
|
||||
- **nick.de**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NineCNineMedia**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
@ -508,7 +537,6 @@
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
@ -520,6 +548,8 @@
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
- **PolskieRadioCategory**
|
||||
- **PornCom**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPlaylist**
|
||||
@ -559,6 +589,7 @@
|
||||
- **revision3:embed**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RMCDecouverte**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
- **RottenTomatoes**
|
||||
@ -648,7 +679,6 @@
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
@ -665,6 +695,7 @@
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
@ -679,9 +710,11 @@
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleQuebec**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
@ -690,8 +723,6 @@
|
||||
- **TheStar**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
- **THVideoPlaylist**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
@ -705,8 +736,7 @@
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **trollvids**
|
||||
- **TruTube**
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
- **tudou**
|
||||
@ -728,10 +758,10 @@
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **TVNoe**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
@ -748,8 +778,11 @@
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Unistra**
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USANetwork**
|
||||
- **USAToday**
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
@ -765,7 +798,9 @@
|
||||
- **VevoPlaylist**
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **Viafree**
|
||||
- **Vice**
|
||||
- **Viceland**
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
@ -818,6 +853,7 @@
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VyboryMos**
|
||||
- **Walla**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
@ -831,7 +867,7 @@
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **WNL**
|
||||
- **wnl**: npo.nl and ntr.nl
|
||||
- **WorldStarHipHop**
|
||||
- **wrzuta.pl**
|
||||
- **wrzuta.pl:playlist**
|
||||
@ -885,6 +921,4 @@
|
||||
- **Zapiks**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3:album**: mp3.zing.vn albums
|
||||
- **zingmp3:song**: mp3.zing.vn songs
|
||||
- **ZippCast**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
|
@ -39,6 +39,8 @@ from youtube_dl.utils import (
|
||||
is_html,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
mimetype2ext,
|
||||
month_by_name,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
@ -290,6 +292,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
|
||||
|
||||
def test_unified_timestamps(self):
|
||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||
@ -310,6 +313,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@ -625,6 +629,22 @@ class TestUtil(unittest.TestCase):
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_mimetype2ext(self):
|
||||
self.assertEqual(mimetype2ext(None), None)
|
||||
self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
|
||||
self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
|
||||
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||
|
||||
def test_month_by_name(self):
|
||||
self.assertEqual(month_by_name(None), None)
|
||||
self.assertEqual(month_by_name('December', 'en'), 12)
|
||||
self.assertEqual(month_by_name('décembre', 'fr'), 12)
|
||||
self.assertEqual(month_by_name('December'), 12)
|
||||
self.assertEqual(month_by_name('décembre'), None)
|
||||
self.assertEqual(month_by_name('Unknown', 'unknown'), None)
|
||||
|
||||
def test_parse_codecs(self):
|
||||
self.assertEqual(parse_codecs(''), {})
|
||||
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||
@ -712,6 +732,9 @@ class TestUtil(unittest.TestCase):
|
||||
inp = '''{"foo":101}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"foo":101}''')
|
||||
|
||||
inp = '''{"duration": "00:01:07"}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
@ -817,7 +840,10 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_filesize('2 MiB'), 2097152)
|
||||
self.assertEqual(parse_filesize('5 GB'), 5000000000)
|
||||
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
||||
self.assertEqual(parse_filesize('1.2tb'), 1200000000000)
|
||||
self.assertEqual(parse_filesize('1,24 KB'), 1240)
|
||||
self.assertEqual(parse_filesize('1,24 kb'), 1240)
|
||||
self.assertEqual(parse_filesize('8.5 megabytes'), 8500000)
|
||||
|
||||
def test_parse_count(self):
|
||||
self.assertEqual(parse_count(None), None)
|
||||
|
@ -131,6 +131,9 @@ class YoutubeDL(object):
|
||||
username: Username for authentication purposes.
|
||||
password: Password for authentication purposes.
|
||||
videopassword: Password for accessing a video.
|
||||
ap_mso: Adobe Pass multiple-system operator identifier.
|
||||
ap_username: Multiple-system operator account username.
|
||||
ap_password: Multiple-system operator account password.
|
||||
usenetrc: Use netrc for authentication instead.
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
@ -1256,8 +1259,10 @@ class YoutubeDL(object):
|
||||
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
||||
if thumbnails:
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
t.get('id'), t.get('url')))
|
||||
t.get('preference') if t.get('preference') is not None else -1,
|
||||
t.get('width') if t.get('width') is not None else -1,
|
||||
t.get('height') if t.get('height') is not None else -1,
|
||||
t.get('id') if t.get('id') is not None else '', t.get('url')))
|
||||
for i, t in enumerate(thumbnails):
|
||||
t['url'] = sanitize_url(t['url'])
|
||||
if t.get('width') and t.get('height'):
|
||||
@ -1299,7 +1304,7 @@ class YoutubeDL(object):
|
||||
for subtitle_format in subtitle:
|
||||
if subtitle_format.get('url'):
|
||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||
if 'ext' not in subtitle_format:
|
||||
if subtitle_format.get('ext') is None:
|
||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||
|
||||
if self.params.get('listsubtitles', False):
|
||||
@ -1354,7 +1359,7 @@ class YoutubeDL(object):
|
||||
note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||
)
|
||||
# Automatically determine file extension if missing
|
||||
if 'ext' not in format:
|
||||
if format.get('ext') is None:
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
# Automatically determine protocol if missing (useful for format
|
||||
# selection purposes)
|
||||
|
@ -34,12 +34,14 @@ from .utils import (
|
||||
setproctitle,
|
||||
std_headers,
|
||||
write_string,
|
||||
render_table,
|
||||
)
|
||||
from .update import update_self
|
||||
from .downloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
|
||||
@ -118,18 +120,26 @@ def _real_main(argv=None):
|
||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||
write_string(desc + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
if opts.ap_list_mso:
|
||||
table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
|
||||
write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Conflicting, missing and erroneous options
|
||||
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
||||
parser.error('using .netrc conflicts with giving username/password')
|
||||
if opts.password is not None and opts.username is None:
|
||||
parser.error('account username missing\n')
|
||||
if opts.ap_password is not None and opts.ap_username is None:
|
||||
parser.error('TV Provider account username missing\n')
|
||||
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
||||
parser.error('using output template conflicts with using title, video ID or auto number')
|
||||
if opts.usetitle and opts.useid:
|
||||
parser.error('using title conflicts with using video ID')
|
||||
if opts.username is not None and opts.password is None:
|
||||
opts.password = compat_getpass('Type account password and press [Return]: ')
|
||||
if opts.ap_username is not None and opts.ap_password is None:
|
||||
opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
|
||||
if opts.ratelimit is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
||||
if numeric_limit is None:
|
||||
@ -155,6 +165,8 @@ def _real_main(argv=None):
|
||||
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
||||
else:
|
||||
opts.max_sleep_interval = opts.sleep_interval
|
||||
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
||||
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
||||
|
||||
def parse_retries(retries):
|
||||
if retries in ('inf', 'infinite'):
|
||||
@ -254,8 +266,6 @@ def _real_main(argv=None):
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
})
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
if opts.embedthumbnail:
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
@ -264,6 +274,10 @@ def _real_main(argv=None):
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# XAttrMetadataPP should be run after post-processors that may change file
|
||||
# contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
@ -271,12 +285,6 @@ def _real_main(argv=None):
|
||||
'key': 'ExecAfterDownload',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
})
|
||||
if opts.xattr_set_filesize:
|
||||
try:
|
||||
import xattr
|
||||
xattr # Confuse flake8
|
||||
except ImportError:
|
||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||
external_downloader_args = None
|
||||
if opts.external_downloader_args:
|
||||
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
|
||||
@ -293,6 +301,9 @@ def _real_main(argv=None):
|
||||
'password': opts.password,
|
||||
'twofactor': opts.twofactor,
|
||||
'videopassword': opts.videopassword,
|
||||
'ap_mso': opts.ap_mso,
|
||||
'ap_username': opts.ap_username,
|
||||
'ap_password': opts.ap_password,
|
||||
'quiet': (opts.quiet or any_getting or any_printing),
|
||||
'no_warnings': opts.no_warnings,
|
||||
'forceurl': opts.geturl,
|
||||
@ -318,6 +329,7 @@ def _real_main(argv=None):
|
||||
'nooverwrites': opts.nooverwrites,
|
||||
'retries': opts.retries,
|
||||
'fragment_retries': opts.fragment_retries,
|
||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'continuedl': opts.continue_dl,
|
||||
|
@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
@ -19,32 +18,32 @@ class DashSegmentsFD(FragmentFD):
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
base_url = info_dict['url']
|
||||
segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
|
||||
initialization_url = info_dict.get('initialization_url')
|
||||
segments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(segment_urls) + (1 if initialization_url else 0),
|
||||
'total_frags': len(segments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
def append_url_to_file(target_url, tmp_filename, segment_name):
|
||||
def process_segment(segment, tmp_filename, num):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % num
|
||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = num == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
|
||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
@ -52,26 +51,27 @@ class DashSegmentsFD(FragmentFD):
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attemps
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# So, we will retry all fragments that fail with 404 HTTP error for now.
|
||||
if err.code != 404:
|
||||
raise
|
||||
# Retry fragment
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(segment_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(segment_name)
|
||||
return True
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
return True
|
||||
|
||||
if initialization_url:
|
||||
append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
|
||||
for i, segment in enumerate(segments):
|
||||
if not process_segment(segment, ctx['tmpfilename'], i):
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@ -220,6 +220,12 @@ class FFmpegFD(ExternalFD):
|
||||
if proxy:
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
proxy = 'http://%s' % proxy
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
self.report_warning(
|
||||
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||
|
||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||
# We could switch to the following code if we are able to detect version properly
|
||||
|
@ -6,6 +6,7 @@ import time
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
)
|
||||
@ -22,13 +23,19 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
Available options:
|
||||
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH only)
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
|
||||
and hlsnative only)
|
||||
skip_unavailable_fragments:
|
||||
Skip unavailable fragments (DASH and hlsnative only)
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, fragment_name, count, retries):
|
||||
def report_retry_fragment(self, err, fragment_name, count, retries):
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (fragment_name, count, self.format_retries(retries)))
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
|
||||
|
||||
def report_skip_fragment(self, fragment_name):
|
||||
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
|
@ -13,6 +13,7 @@ from .fragment import FragmentFD
|
||||
from .external import FFmpegFD
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_error,
|
||||
compat_urlparse,
|
||||
compat_struct_pack,
|
||||
)
|
||||
@ -20,6 +21,7 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
parse_m3u8_attributes,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@ -29,7 +31,7 @@ class HlsFD(FragmentFD):
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
@staticmethod
|
||||
def can_download(manifest):
|
||||
def can_download(manifest, info_dict):
|
||||
UNSUPPORTED_FEATURES = (
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
@ -51,6 +53,7 @@ class HlsFD(FragmentFD):
|
||||
)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
|
||||
check_results.append(not info_dict.get('is_live'))
|
||||
return all(check_results)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
@ -60,7 +63,7 @@ class HlsFD(FragmentFD):
|
||||
|
||||
s = manifest.decode('utf-8', 'ignore')
|
||||
|
||||
if not self.can_download(s):
|
||||
if not self.can_download(s, info_dict):
|
||||
self.report_warning(
|
||||
'hlsnative has detected features it does not support, '
|
||||
'extraction will be delegated to ffmpeg')
|
||||
@ -82,6 +85,14 @@ class HlsFD(FragmentFD):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@ -94,13 +105,37 @@ class HlsFD(FragmentFD):
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
frag_name = 'Frag%d' % i
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
# See https://github.com/rg3/youtube-dl/issues/10165,
|
||||
# https://github.com/rg3/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
self.report_skip_fragment(frag_name)
|
||||
continue
|
||||
self.report_error(
|
||||
'giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
frag_content = AES.new(
|
||||
@ -108,7 +143,7 @@ class HlsFD(FragmentFD):
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
frags_filenames.append(frag_sanitized)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
if test:
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
@ -116,10 +151,12 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
if 'IV' in decrypt_info:
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
|
@ -13,6 +13,9 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
write_xattr,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
)
|
||||
|
||||
|
||||
@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
import xattr
|
||||
xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
|
||||
except(OSError, IOError, ImportError) as err:
|
||||
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
|
||||
try:
|
||||
|
@ -7,12 +7,13 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||
@ -93,3 +94,59 @@ class ABCIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
|
||||
class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
|
||||
'md5': '979d10b2939101f0d27a06b79edad536',
|
||||
'info_dict': {
|
||||
'id': 'FA1505V024S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 27 Ep 24',
|
||||
'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d',
|
||||
'upload_date': '20160820',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1471719600,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_params = self._parse_json(self._search_regex(
|
||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
||||
title = video_params['title']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
|
||||
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
src_vtt = stream.get('captions', {}).get('src-vtt')
|
||||
if src_vtt:
|
||||
subtitles['en'] = [{
|
||||
'url': src_vtt,
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
||||
'duration': int_or_none(video_params.get('eventDuration')),
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': video_params.get('seriesTitle'),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)),
|
||||
'episode': self._html_search_meta('episode_title', webpage),
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ from ..compat import compat_urlparse
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||
@ -49,7 +49,7 @@ class AbcNewsVideoIE(AMPIE):
|
||||
|
||||
class AbcNewsIE(InfoExtractor):
|
||||
IE_NAME = 'abcnews'
|
||||
_VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
|
@ -1,13 +1,19 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class Abc7NewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
||||
class ABCOTVSIE(InfoExtractor):
|
||||
IE_NAME = 'abcotvs'
|
||||
IE_DESC = 'ABC Owned Television Stations'
|
||||
_VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
|
||||
@ -15,7 +21,7 @@ class Abc7NewsIE(InfoExtractor):
|
||||
'id': '472581',
|
||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||
'ext': 'mp4',
|
||||
'title': 'East Bay museum celebrates history of synthesized music',
|
||||
'title': 'East Bay museum celebrates vintage synthesizers',
|
||||
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421123075,
|
||||
@ -41,7 +47,7 @@ class Abc7NewsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
m3u8 = self._html_search_meta(
|
||||
'contentURL', webpage, 'm3u8 url', fatal=True)
|
||||
'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0]
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
@ -66,3 +72,41 @@ class Abc7NewsIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ABCOTVSClipsIE(InfoExtractor):
|
||||
IE_NAME = 'abcotvs:clips'
|
||||
_VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://clips.abcotvs.com/kabc/video/214814',
|
||||
'info_dict': {
|
||||
'id': '214814',
|
||||
'ext': 'mp4',
|
||||
'title': 'SpaceX launch pad explosion destroys rocket, satellite',
|
||||
'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
|
||||
'upload_date': '20160901',
|
||||
'timestamp': 1472756695,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
|
||||
title = video_data['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['videoURL'].split('?')[0], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('thumbnailURL'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('pubDate')),
|
||||
'formats': formats,
|
||||
}
|
1428
youtube_dl/extractor/adobepass.py
Normal file
1428
youtube_dl/extractor/adobepass.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -3,16 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AdultSwimIE(InfoExtractor):
|
||||
class AdultSwimIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
||||
|
||||
_TESTS = [{
|
||||
@ -96,7 +94,8 @@ class AdultSwimIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -148,7 +147,10 @@ class AdultSwimIE(InfoExtractor):
|
||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||
video_info = bootstrapped_data['slugged_video']
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video')
|
||||
video_info = bootstrapped_data.get(
|
||||
'heroMetadata', {}).get('trailer', {}).get('video')
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
|
||||
if not video_info:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
|
||||
@ -162,70 +164,38 @@ class AdultSwimIE(InfoExtractor):
|
||||
elif video_info.get('videoPlaybackID'):
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.'
|
||||
if video_info.get('auth') is True else 'Unable to find stream or clips',
|
||||
expected=True)
|
||||
if video_info.get('auth') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream or clips')
|
||||
|
||||
episode_id = video_info['id']
|
||||
episode_title = video_info['title']
|
||||
episode_description = video_info['description']
|
||||
episode_duration = video_info.get('duration')
|
||||
episode_description = video_info.get('description')
|
||||
episode_duration = int_or_none(video_info.get('duration'))
|
||||
view_count = int_or_none(video_info.get('views'))
|
||||
|
||||
entries = []
|
||||
for part_num, segment_id in enumerate(segment_ids):
|
||||
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id
|
||||
|
||||
segement_info = self._extract_cvp_info(
|
||||
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
|
||||
segment_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
||||
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
})
|
||||
segment_title = '%s - %s' % (show_title, episode_title)
|
||||
if len(segment_ids) > 1:
|
||||
segment_title += ' Part %d' % (part_num + 1)
|
||||
|
||||
idoc = self._download_xml(
|
||||
segment_url, segment_title,
|
||||
'Downloading segment information', 'Unable to download segment information')
|
||||
|
||||
segment_duration = float_or_none(
|
||||
xpath_text(idoc, './/trt', 'segment duration').strip())
|
||||
|
||||
formats = []
|
||||
file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
|
||||
|
||||
unique_urls = []
|
||||
unique_file_els = []
|
||||
for file_el in file_els:
|
||||
media_url = file_el.text
|
||||
if not media_url or determine_ext(media_url) == 'f4m':
|
||||
continue
|
||||
if file_el.text not in unique_urls:
|
||||
unique_urls.append(file_el.text)
|
||||
unique_file_els.append(file_el)
|
||||
|
||||
for file_el in unique_file_els:
|
||||
bitrate = file_el.attrib.get('bitrate')
|
||||
ftype = file_el.attrib.get('type')
|
||||
media_url = file_el.text
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, segment_title, 'mp4', preference=0,
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (bitrate, ftype),
|
||||
'url': file_el.text.strip(),
|
||||
# The bitrate may not be a number (for example: 'iphone')
|
||||
'tbr': int(bitrate) if bitrate.isdigit() else None,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
segement_info.update({
|
||||
'id': segment_id,
|
||||
'title': segment_title,
|
||||
'formats': formats,
|
||||
'duration': segment_duration,
|
||||
'description': episode_description
|
||||
'description': episode_description,
|
||||
})
|
||||
entries.append(segement_info)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
@ -234,5 +204,6 @@ class AdultSwimIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
'title': '%s - %s' % (show_title, episode_title),
|
||||
'description': episode_description,
|
||||
'duration': episode_duration
|
||||
'duration': episode_duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
|
@ -1,64 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class AftonbladetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
|
||||
'info_dict': {
|
||||
'id': '36015',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||
'timestamp': 1394142732,
|
||||
'upload_date': '20140306',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
|
||||
player_config = self._parse_json(self._html_search_regex(
|
||||
r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
|
||||
internal_meta_id = player_config['aptomaVideoId']
|
||||
internal_meta_url = meta_url % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
||||
# find internal video formats
|
||||
format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
internal_video_id = internal_meta_json['videoId']
|
||||
internal_formats_url = format_url % internal_video_id
|
||||
internal_formats_json = self._download_json(
|
||||
internal_formats_url, video_id, 'Downloading video formats')
|
||||
|
||||
formats = []
|
||||
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
|
||||
p = fmt['paths'][0]
|
||||
formats.append({
|
||||
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||
'ext': 'mp4',
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'tbr': int_or_none(fmt.get('bitrate')),
|
||||
'protocol': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json.get('imageUrl'),
|
||||
'description': internal_meta_json.get('shortPreamble'),
|
||||
'timestamp': int_or_none(internal_meta_json.get('timePublished')),
|
||||
'duration': int_or_none(internal_meta_json.get('duration')),
|
||||
'view_count': int_or_none(internal_meta_json.get('views')),
|
||||
}
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
|
92
youtube_dl/extractor/amcnetworks.py
Normal file
92
youtube_dl/extractor/amcnetworks.py
Normal file
@ -0,0 +1,92 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 's3MX01Nl4vPH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maron - Season 4 - Step 1',
|
||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
||||
'age_limit': 17,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462468831,
|
||||
'uploader': 'AMCN',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires TV provider accounts',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ifc.com/movies/chaos',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = theplatform_metadata['ratings'][0]['rating']
|
||||
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'age_limit': parse_age_limit(parse_age_limit(rating)),
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
series = theplatform_metadata.get(ns + '$show')
|
||||
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
|
||||
if season_number:
|
||||
title = 'Season %d - %s' % (season_number, title)
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
})
|
||||
return info
|
@ -238,7 +238,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'md5': 'd216c3a86493f9322545e045ddc3eb35',
|
||||
|
@ -12,74 +12,51 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
class AWAANIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
if video_id and int(video_id) > 0:
|
||||
return self.url_result(
|
||||
'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo')
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
|
||||
elif season_id and int(season_id) > 0:
|
||||
return self.url_result(smuggle_url(
|
||||
'http://www.dcndigital.ae/program/season/%s' % season_id,
|
||||
{'show_id': show_id}), 'DCNSeason')
|
||||
'http://awaan.ae/program/season/%s' % season_id,
|
||||
{'show_id': show_id}), 'AWAANSeason')
|
||||
else:
|
||||
return self.url_result(
|
||||
'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason')
|
||||
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
|
||||
|
||||
|
||||
class DCNBaseIE(InfoExtractor):
|
||||
def _extract_video_info(self, video_data, video_id, is_live):
|
||||
class AWAANBaseIE(InfoExtractor):
|
||||
def _parse_video_data(self, video_data, video_id, is_live):
|
||||
title = video_data.get('title_en') or video_data['title_ar']
|
||||
img = video_data.get('img')
|
||||
thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
|
||||
duration = int_or_none(video_data.get('duration'))
|
||||
description = video_data.get('description_en') or video_data.get('description_ar')
|
||||
timestamp = parse_iso8601(video_data.get('create_time'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'description': video_data.get('description_en') or video_data.get('description_ar'),
|
||||
'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None,
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
|
||||
formats = []
|
||||
format_url_base = 'http' + self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url_base + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url_base + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class DCNVideoIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:video'
|
||||
class AWAANVideoIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
|
||||
'md5': '5f61c33bfc7794315c671a62d43116aa',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '17375',
|
||||
@ -89,10 +66,7 @@ class DCNVideoIE(DCNBaseIE):
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'uploader_id': '71',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
@ -102,54 +76,69 @@ class DCNVideoIE(DCNBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = sanitized_Request(
|
||||
video_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
video_data = self._download_json(request, video_id)
|
||||
info = self._extract_video_info(video_data, video_id, False)
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
|
||||
compat_urllib_parse_urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), video_id)
|
||||
info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native')
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloVideo',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class DCNLiveIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:live'
|
||||
class AWAANLiveIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://awaan.ae/live/6/dubai-tv',
|
||||
'info_dict': {
|
||||
'id': '6',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'upload_date': '20150107',
|
||||
'timestamp': 1420588800,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
request = sanitized_Request(
|
||||
channel_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
channel_data = self._download_json(request, channel_id)
|
||||
info = self._extract_video_info(channel_data, channel_id, True)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
|
||||
compat_urllib_parse_urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), channel_id)
|
||||
info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8')
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloLive',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class DCNSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'dcn:season'
|
||||
class AWAANSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'awaan:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
|
||||
@ -170,21 +159,17 @@ class DCNSeasonIE(InfoExtractor):
|
||||
data['season'] = season_id
|
||||
show_id = smuggled_data.get('show_id')
|
||||
if show_id is None:
|
||||
request = sanitized_Request(
|
||||
season = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
season = self._download_json(request, season_id)
|
||||
season_id, headers={'Origin': 'http://awaan.ae'})
|
||||
show_id = season['id']
|
||||
data['show_id'] = show_id
|
||||
request = sanitized_Request(
|
||||
show = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/show',
|
||||
urlencode_postdata(data),
|
||||
{
|
||||
'Origin': 'http://www.dcndigital.ae',
|
||||
show_id, data=urlencode_postdata(data), headers={
|
||||
'Origin': 'http://awaan.ae',
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
|
||||
show = self._download_json(request, show_id)
|
||||
if not season_id:
|
||||
season_id = show['default_season']
|
||||
for season in show['seasons']:
|
||||
@ -195,6 +180,6 @@ class DCNSeasonIE(InfoExtractor):
|
||||
for video in show['videos']:
|
||||
video_id = compat_str(video['id'])
|
||||
entries.append(self.url_result(
|
||||
'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id))
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
|
||||
|
||||
return self.playlist_result(entries, season_id, title)
|
@ -103,7 +103,7 @@ class AzubuIE(InfoExtractor):
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
|
@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# with escaped quote in title
|
||||
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
|
||||
'info_dict': {
|
||||
'title': '"Entropy" EP',
|
||||
'uploader_id': 'jstrecords',
|
||||
'id': 'entropy-ep',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
title = self._search_regex(
|
||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = title.replace(r'\"', '"')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@ -17,6 +18,7 @@ from ..utils import (
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
@ -1026,7 +1028,7 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
|
||||
class BBCCoUkArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
||||
IE_NAME = 'bbc.co.uk:article'
|
||||
IE_DESC = 'BBC articles'
|
||||
|
||||
@ -1056,19 +1058,35 @@ class BBCCoUkArticleIE(InfoExtractor):
|
||||
|
||||
|
||||
class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
def _entries(self, webpage, url, playlist_id):
|
||||
single_page = 'page' in compat_urlparse.parse_qs(
|
||||
compat_urlparse.urlparse(url).query)
|
||||
for page_num in itertools.count(2):
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
|
||||
yield self.url_result(
|
||||
self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
if single_page:
|
||||
return
|
||||
next_page = self._search_regex(
|
||||
r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||
webpage, 'next page url', default=None, group='url')
|
||||
if not next_page:
|
||||
break
|
||||
webpage = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, next_page), playlist_id,
|
||||
'Downloading page %d' % page_num, page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
|
||||
|
||||
title, description = self._extract_title_and_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.playlist_result(
|
||||
self._entries(webpage, url, playlist_id),
|
||||
playlist_id, title, description)
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
@ -1117,6 +1135,24 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# multipage playlist, explicit page
|
||||
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
|
||||
'info_dict': {
|
||||
'id': 'b00mfl7n',
|
||||
'title': 'Frozen Planet - Clips - BBC One',
|
||||
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
}, {
|
||||
# multipage playlist, all pages
|
||||
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
|
||||
'info_dict': {
|
||||
'id': 'b00mfl7n',
|
||||
'title': 'Frozen Planet - Clips - BBC One',
|
||||
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||
},
|
||||
'playlist_mincount': 142,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
|
||||
'only_matching': True,
|
||||
|
75
youtube_dl/extractor/bellmedia.py
Normal file
75
youtube_dl/extractor/bellmedia.py
Normal file
@ -0,0 +1,75 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BellMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:
|
||||
ctv|
|
||||
tsn|
|
||||
bnn|
|
||||
thecomedynetwork|
|
||||
discovery|
|
||||
discoveryvelocity|
|
||||
sciencechannel|
|
||||
investigationdiscovery|
|
||||
animalplanet|
|
||||
bravo|
|
||||
mtv|
|
||||
space
|
||||
)\.ca|
|
||||
much\.com
|
||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
'info_dict': {
|
||||
'id': '706966',
|
||||
'ext': 'mp4',
|
||||
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
|
||||
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
|
||||
'upload_date': '20150919',
|
||||
'timestamp': 1442624700,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
'discoveryvelocity': 'discvel',
|
||||
'sciencechannel': 'discsci',
|
||||
'investigationdiscovery': 'invdisc',
|
||||
'animalplanet': 'aniplan',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
domain = domain.split('.')[0]
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class BetIE(MTVServicesInfoExtractor):
|
||||
@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor):
|
||||
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return compat_urllib_parse_urlencode({
|
||||
return {
|
||||
'uuid': uri,
|
||||
})
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
|
||||
|
@ -1,33 +1,27 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
_TEST = {
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'id': '1074402',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
@ -38,128 +32,70 @@ class BiliBiliIE(InfoExtractor):
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
'info_dict': {
|
||||
'id': '1507019',
|
||||
'ext': 'mp4',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'timestamp': 1396530060,
|
||||
'upload_date': '20140403',
|
||||
'uploader': '枫叶逝去',
|
||||
'uploader_id': '520116',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '7802182',
|
||||
'ext': 'mp4',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
# Missing upload time
|
||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||
'info_dict': {
|
||||
'id': '2880301',
|
||||
'ext': 'mp4',
|
||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||
'uploader': '黑夜为猫',
|
||||
'uploader_id': '610729',
|
||||
},
|
||||
'params': {
|
||||
# Just to test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['upload time'],
|
||||
}]
|
||||
}
|
||||
|
||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||
# the Android client
|
||||
# TODO: find the sign algorithm used in the flash player
|
||||
_APP_KEY = '86385cdc024c0f6c'
|
||||
_APP_KEY = '6f90a59ac58a4123'
|
||||
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
params = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))
|
||||
cid = params['cid'][0]
|
||||
|
||||
info_xml_str = self._download_webpage(
|
||||
'http://interface.bilibili.com/v_cdn_play',
|
||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||
note='Downloading video info page')
|
||||
|
||||
err_msg = None
|
||||
durls = None
|
||||
info_xml = None
|
||||
try:
|
||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||
err_msg = (info_json or {}).get('error_text')
|
||||
if 'anime/v' not in url:
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
err_msg = xpath_text(info_xml, './message')
|
||||
js = self._download_json(
|
||||
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||
data=urlencode_postdata({'episode_id': video_id}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
cid = js['result']['cid']
|
||||
|
||||
if info_xml is not None:
|
||||
durls = info_xml.findall('./durl')
|
||||
if not durls:
|
||||
if err_msg:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||
else:
|
||||
raise ExtractorError('No videos found!')
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page')
|
||||
|
||||
entries = []
|
||||
|
||||
for durl in durls:
|
||||
size = xpath_text(durl, ['./filesize', './size'])
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl.find('./url').text,
|
||||
'filesize': int_or_none(size),
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.findall('./backup_url/url'):
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats.append({
|
||||
'url': backup_url.text,
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': compat_str(cid),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
|
@ -1,31 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
|
||||
_TEST = {
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
|
||||
'md5': 'd60cdf68904e854fac669bd26cccf801',
|
||||
'md5': '9086d0b7ef0ea2aabc4781d75f4e5863',
|
||||
'info_dict': {
|
||||
'id': 'LitrBdX64qLn',
|
||||
'id': 'zHyk1_HU_mPy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Last Chance Kitchen Returns',
|
||||
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
|
||||
'timestamp': 1448926740,
|
||||
'upload_date': '20151130',
|
||||
'title': 'LCK Ep 12: Fishy Finale',
|
||||
'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20160302',
|
||||
'timestamp': 1456945320,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
|
||||
release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
|
||||
{'force_smil_url': True}), 'ThePlatform', release_pid)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
|
||||
display_id)
|
||||
info = {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
account_pid, release_pid = [None] * 2
|
||||
tve = settings.get('sharedTVE')
|
||||
if tve:
|
||||
query['manifest'] = 'm3u'
|
||||
account_pid = 'HNK2IC'
|
||||
release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('adobePass', {})
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'bravo'),
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
|
||||
else:
|
||||
shared_playlist = settings['shared_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
||||
release_pid = metadata['release_pid']
|
||||
info.update({
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'season_number': int_or_none(metadata.get('season_num')),
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid),
|
||||
query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
|
@ -621,15 +621,21 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'url': text_track['src'],
|
||||
})
|
||||
|
||||
is_live = False
|
||||
duration = float_or_none(json_data.get('duration'), 1000)
|
||||
if duration and duration < 0:
|
||||
is_live = True
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||
'duration': duration,
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': account_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -8,15 +7,15 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||
'info_dict': {
|
||||
'id': 'studio-c-season-5-episode-5',
|
||||
'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
|
||||
'display_id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
|
||||
'title': 'Season 5 Episode 5',
|
||||
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1486.486,
|
||||
},
|
||||
@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_code = self._search_regex(
|
||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||
|
||||
ep = self._parse_json(
|
||||
episode_code, display_id, transform_source=lambda s:
|
||||
re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
|
||||
|
||||
if ep['providerType'] != 'Ooyala':
|
||||
raise ExtractorError('Unsupported provider %s' % ep['provider'])
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': ep['title'],
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
|
||||
|
||||
class BYUtvEventIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
|
||||
'info_dict': {
|
||||
'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toledo vs. BYU (9/30/16)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
episode_code = self._search_regex(
|
||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||
episode_json = re.sub(
|
||||
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
|
||||
ep = json.loads(episode_json)
|
||||
|
||||
if ep['providerType'] == 'Ooyala':
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'title': ep['title'],
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('Unsupported provider %s' % ep['provider'])
|
||||
ooyala_id = self._search_regex(
|
||||
r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala id', group='id')
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
|
||||
'title').strip()
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ooyala_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ class CamdemyIE(InfoExtractor):
|
||||
|
||||
|
||||
class CamdemyFolderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# links with trailing slash
|
||||
'url': 'http://www.camdemy.com/folder/450',
|
||||
|
@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor):
|
||||
(?:(?:www|m)\.)?canalplus\.fr|
|
||||
(?:www\.)?piwiplus\.fr|
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?c8\.fr|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'canalplus': 'cplus',
|
||||
'piwiplus': 'teletoon',
|
||||
'd8': 'd8',
|
||||
'c8': 'd8',
|
||||
'd17': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
@ -1,11 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
||||
@ -38,22 +40,42 @@ class CanvasIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
|
||||
'display_id': 'herbekijk-sorry-voor-alles',
|
||||
'ext': 'mp4',
|
||||
'title': 'Herbekijk Sorry voor alles',
|
||||
'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 3788.06,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, display_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._search_regex(
|
||||
title = (self._search_regex(
|
||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage)).strip()
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
|
||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id)
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), display_id)
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
|
42
youtube_dl/extractor/cartoonnetwork.py
Normal file
42
youtube_dl/extractor/cartoonnetwork.py
Normal file
@ -0,0 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
||||
'info_dict': {
|
||||
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starfire the Cat Lady',
|
||||
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
||||
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
||||
return self._extract_cvp_info(
|
||||
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': self._search_regex(
|
||||
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
|
||||
webpage, 'auth required', default='false') == 'true',
|
||||
})
|
@ -9,10 +9,19 @@ from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
xpath_with_ns,
|
||||
find_xpath_attr,
|
||||
parse_iso8601,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
@ -114,6 +123,7 @@ class CBCIE(InfoExtractor):
|
||||
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
@ -167,3 +177,165 @@ class CBCPlayerIE(InfoExtractor):
|
||||
}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class CBCWatchBaseIE(InfoExtractor):
|
||||
_device_id = None
|
||||
_device_token = None
|
||||
_API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
|
||||
_NS_MAP = {
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||
result = self._download_xml(url, video_id, headers={
|
||||
'X-Clearleap-DeviceId': self._device_id,
|
||||
'X-Clearleap-DeviceToken': self._device_token,
|
||||
})
|
||||
error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
|
||||
if error_message:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
|
||||
return result
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._device_id or not self._device_token:
|
||||
device = self._downloader.cache.load('cbcwatch', 'device') or {}
|
||||
self._device_id, self._device_token = device.get('id'), device.get('token')
|
||||
if not self._device_id or not self._device_token:
|
||||
result = self._download_xml(
|
||||
self._API_BASE_URL + 'device/register',
|
||||
None, data=b'<device><type>web</type></device>')
|
||||
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
||||
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||
self._downloader.cache.store(
|
||||
'cbcwatch', 'device', {
|
||||
'id': self._device_id,
|
||||
'token': self._device_token,
|
||||
})
|
||||
|
||||
def _parse_rss_feed(self, rss):
|
||||
channel = xpath_element(rss, 'channel', fatal=True)
|
||||
|
||||
def _add_ns(path):
|
||||
return xpath_with_ns(path, self._NS_MAP)
|
||||
|
||||
entries = []
|
||||
for item in channel.findall('item'):
|
||||
guid = xpath_text(item, 'guid', fatal=True)
|
||||
title = xpath_text(item, 'title', fatal=True)
|
||||
|
||||
media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
|
||||
content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
|
||||
content_url = content.attrib['url']
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail.get('profile'),
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
timestamp = None
|
||||
release_date = find_xpath_attr(
|
||||
item, _add_ns('media:credit'), 'role', 'releaseDate')
|
||||
if release_date is not None:
|
||||
timestamp = parse_iso8601(release_date.text)
|
||||
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': content_url,
|
||||
'id': guid,
|
||||
'title': title,
|
||||
'description': xpath_text(item, 'description'),
|
||||
'timestamp': timestamp,
|
||||
'duration': int_or_none(content.get('duration')),
|
||||
'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
|
||||
'episode': xpath_text(item, _add_ns('clearleap:episode')),
|
||||
'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
|
||||
'series': xpath_text(item, _add_ns('clearleap:series')),
|
||||
'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
|
||||
'thumbnails': thumbnails,
|
||||
'ie_key': 'CBCWatchVideo',
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, xpath_text(channel, 'guid'),
|
||||
xpath_text(channel, 'title'),
|
||||
xpath_text(channel, 'description'))
|
||||
|
||||
|
||||
class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||
IE_NAME = 'cbc.ca:watch:video'
|
||||
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
result = self._call_api(url, video_id)
|
||||
|
||||
m3u8_url = xpath_text(result, 'url', fatal=True)
|
||||
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||
if len(formats) < 2:
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
# Despite metadata in m3u8 all video+audio formats are
|
||||
# actually video-only (no audio)
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = 'none'
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
rss = xpath_element(result, 'rss')
|
||||
if rss:
|
||||
info.update(self._parse_rss_feed(rss)['entries'][0])
|
||||
del info['url']
|
||||
del info['_type']
|
||||
del info['ie_key']
|
||||
return info
|
||||
|
||||
|
||||
class CBCWatchIE(CBCWatchBaseIE):
|
||||
IE_NAME = 'cbc.ca:watch'
|
||||
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
|
||||
'info_dict': {
|
||||
'id': '38e815a-009e3ab12e4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Customer (Dis)Service',
|
||||
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
|
||||
'upload_date': '20160219',
|
||||
'timestamp': 1455840000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||
'info_dict': {
|
||||
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||
'title': 'Arthur',
|
||||
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
rss = self._call_api('web/browse/' + video_id, video_id)
|
||||
return self._parse_rss_feed(rss)
|
||||
|
@ -4,6 +4,9 @@ from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@ -17,19 +20,6 @@ class CBSBaseIE(ThePlatformFeedIE):
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info(
|
||||
'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: {
|
||||
'series': entry.get('cbs$SeriesTitle'),
|
||||
'season_number': int_or_none(entry.get('cbs$SeasonNumber')),
|
||||
'episode': entry.get('cbs$EpisodeTitle'),
|
||||
'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')),
|
||||
}, {
|
||||
'StreamPack': {
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||
@ -38,7 +28,6 @@ class CBSIE(CBSBaseIE):
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
||||
'display_id': 'connect-chat-feat-garth-brooks',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connect Chat feat. Garth Brooks',
|
||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||
@ -47,7 +36,10 @@ class CBSIE(CBSBaseIE):
|
||||
'upload_date': '20131127',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
@ -56,8 +48,53 @@ class CBSIE(CBSBaseIE):
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||
|
||||
def _extract_video_info(self, content_id):
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
|
||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types:
|
||||
continue
|
||||
asset_types.append(asset_type)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': asset_type,
|
||||
}
|
||||
if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
|
||||
query['formats'] = 'MPEG4,M3U'
|
||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||
query['formats'] = 'MPEG4,FLV'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
info.update({
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
return self._extract_video_info('byGuid=%s' % content_id, content_id)
|
||||
return self._extract_video_info(content_id)
|
||||
|
@ -41,13 +41,8 @@ class CBSLocalIE(AnvatoIE):
|
||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'upload_date': '20160516',
|
||||
'timestamp': 1463433840,
|
||||
'duration': 49,
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
@ -60,12 +55,11 @@ class CBSLocalIE(AnvatoIE):
|
||||
|
||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||
if sendtonews_url:
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, sendtonews_url),
|
||||
}
|
||||
else:
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
return self.url_result(
|
||||
compat_urlparse.urljoin(url, sendtonews_url),
|
||||
ie=SendtoNewsIE.ie_key())
|
||||
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||
|
@ -2,13 +2,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .cbs import CBSBaseIE
|
||||
from .cbs import CBSIE
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class CBSNewsIE(CBSBaseIE):
|
||||
class CBSNewsIE(CBSIE):
|
||||
IE_NAME = 'cbsnews'
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
@ -35,7 +36,8 @@ class CBSNewsIE(CBSBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '20140404',
|
||||
'timestamp': 1396650660,
|
||||
'uploader': 'CBSI-NEW',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 205,
|
||||
@ -63,51 +65,43 @@ class CBSNewsIE(CBSBaseIE):
|
||||
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
guid = item['mpxRefId']
|
||||
return self._extract_video_info('byGuid=%s' % guid, guid)
|
||||
return self._extract_video_info(guid)
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_NAME = 'cbsnews:livevideo'
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
'skip': 'Video gone, redirected to http://www.cbsnews.com/live/',
|
||||
}, {
|
||||
'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/',
|
||||
'info_dict': {
|
||||
'id': 'video-shows-intense-paragliding-accident',
|
||||
'ext': 'flv',
|
||||
'title': 'Video Shows Intense Paragliding Accident',
|
||||
},
|
||||
}]
|
||||
'skip': 'Video gone',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_info = self._download_json(
|
||||
'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={
|
||||
'device': 'desktop',
|
||||
'dvr_slug': display_id,
|
||||
})
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
|
||||
|
||||
hdcore_sign = 'hdcore=3.3.1'
|
||||
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
|
||||
if f4m_formats:
|
||||
for entry in f4m_formats:
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
self._sort_formats(f4m_formats)
|
||||
formats = self._extract_akamai_formats(video_info['url'], display_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'title': video_info['headline'],
|
||||
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||
'duration': parse_duration(video_info.get('segmentDur')),
|
||||
'formats': f4m_formats,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ from .cbs import CBSBaseIE
|
||||
|
||||
|
||||
class CBSSportsIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
|
||||
@ -23,6 +23,9 @@ class CBSSportsIE(CBSBaseIE):
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||
|
53
youtube_dl/extractor/cctv.py
Normal file
53
youtube_dl/extractor/cctv.py
Normal file
@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class CCTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:.+?\.)?
|
||||
(?:
|
||||
cctv\.(?:com|cn)|
|
||||
cntv\.cn
|
||||
)/
|
||||
(?:
|
||||
video/[^/]+/(?P<id>[0-9a-f]{32})|
|
||||
\d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+)
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
|
||||
'md5': '819c7b49fc3927d529fb4cd555621823',
|
||||
'info_dict': {
|
||||
'id': '454368eb19ad44a1925bf1eb96140a61',
|
||||
'ext': 'mp4',
|
||||
'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})',
|
||||
webpage, 'video_id')
|
||||
api_data = self._download_json(
|
||||
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id)
|
||||
m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': api_data['title'],
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False),
|
||||
'duration': float_or_none(api_data.get('video', {}).get('totalLength')),
|
||||
}
|
@ -17,7 +17,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
|
51
youtube_dl/extractor/charlierose.py
Normal file
51
youtube_dl/extractor/charlierose.py
Normal file
@ -0,0 +1,51 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class CharlieRoseIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://charlierose.com/videos/27996',
|
||||
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
||||
'info_dict': {
|
||||
'id': '27996',
|
||||
'ext': 'mp4',
|
||||
'title': 'Remembering Zaha Hadid',
|
||||
'thumbnail': 're:^https?://.*\.jpg\?\d+',
|
||||
'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.',
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'ext': 'vtt',
|
||||
}],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://charlierose.com/videos/27996',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - Charlie Rose')
|
||||
|
||||
info_dict = self._parse_html5_media_entries(
|
||||
self._PLAYER_BASE % video_id, webpage, video_id,
|
||||
m3u8_entry_protocol='m3u8_native')[0]
|
||||
|
||||
self._sort_formats(info_dict['formats'])
|
||||
self._remove_duplicate_formats(info_dict['formats'])
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
})
|
||||
|
||||
return info_dict
|
@ -65,7 +65,7 @@ class ChirbitIE(InfoExtractor):
|
||||
|
||||
class ChirbitProfileIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit:profile'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://chirbit.com/ScarletBeauty',
|
||||
'info_dict': {
|
||||
|
@ -1,9 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
|
||||
config_json = self._search_regex(
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
||||
'configuration')
|
||||
config = json.loads(config_json)
|
||||
'configuration'), video_id)
|
||||
|
||||
video_info = config['videoInfo']
|
||||
sources = config['sources']
|
||||
|
@ -6,7 +6,7 @@ from ..utils import ExtractorError
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
||||
|
||||
_TESTS = [{
|
||||
|
@ -3,15 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
url_basename,
|
||||
)
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
|
||||
_TESTS = [{
|
||||
@ -25,6 +22,7 @@ class CNNIE(InfoExtractor):
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
@ -34,7 +32,8 @@ class CNNIE(InfoExtractor):
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||
'upload_date': '20130821',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
@ -44,80 +43,61 @@ class CNNIE(InfoExtractor):
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
path = mobj.group('path')
|
||||
page_title = mobj.group('title')
|
||||
info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path
|
||||
info = self._download_xml(info_url, page_title)
|
||||
|
||||
formats = []
|
||||
rex = re.compile(r'''(?x)
|
||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
(?:_(?P<bitrate>[0-9]+)k)?
|
||||
''')
|
||||
for f in info.findall('files/file'):
|
||||
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip())
|
||||
fdct = {
|
||||
'format_id': f.attrib['bitrate'],
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
mf = rex.match(f.attrib['bitrate'])
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mf = rex.search(f.text)
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
|
||||
if mi:
|
||||
if mi.group(1) == 'audio':
|
||||
fdct['vcodec'] = 'none'
|
||||
fdct['ext'] = 'm4a'
|
||||
else:
|
||||
fdct['tbr'] = int(mi.group(1))
|
||||
|
||||
formats.append(fdct)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'height': int(t.attrib['height']),
|
||||
'width': int(t.attrib['width']),
|
||||
'url': t.text,
|
||||
} for t in info.findall('images/image')]
|
||||
|
||||
metas_el = info.find('metas')
|
||||
upload_date = (
|
||||
metas_el.attrib.get('version') if metas_el is not None else None)
|
||||
|
||||
duration_el = info.find('length')
|
||||
duration = parse_duration(duration_el.text)
|
||||
|
||||
return {
|
||||
'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': info.find('description').text,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
@ -132,6 +112,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
@ -146,7 +127,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
@ -154,9 +135,10 @@ class CNNArticleIE(InfoExtractor):
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
|
@ -87,6 +87,9 @@ class InfoExtractor(object):
|
||||
|
||||
Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* manifest_url
|
||||
The URL of the manifest file in case of
|
||||
fragmented media (DASH, hls, hds)
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
@ -115,6 +118,11 @@ class InfoExtractor(object):
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||
"m3u8", "m3u8_native" or "http_dash_segments".
|
||||
* fragments A list of fragments of the fragmented media,
|
||||
with the following entries:
|
||||
* "url" (mandatory) - fragment's URL
|
||||
* "duration" (optional, int or float)
|
||||
* "filesize" (optional, int)
|
||||
* preference Order number of this format. If this field is
|
||||
present and not None, the formats get sorted
|
||||
by this field, regardless of all other values.
|
||||
@ -662,35 +670,48 @@ class InfoExtractor(object):
|
||||
else:
|
||||
return res
|
||||
|
||||
def _get_login_info(self):
|
||||
def _get_netrc_login_info(self, netrc_machine=None):
|
||||
username = None
|
||||
password = None
|
||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||
|
||||
if self._downloader.params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(netrc_machine)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError(
|
||||
'No authenticators for %s' % netrc_machine)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning(
|
||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||
|
||||
return username, password
|
||||
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
"""
|
||||
Get the login info as (username, password)
|
||||
It will look in the netrc file using the _NETRC_MACHINE value
|
||||
First look for the manually specified credentials using username_option
|
||||
and password_option as keys in params dictionary. If no such credentials
|
||||
available look in the netrc file using the netrc_machine or _NETRC_MACHINE
|
||||
value.
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
if self._downloader is None:
|
||||
return (None, None)
|
||||
|
||||
username = None
|
||||
password = None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username') is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
||||
if downloader_params.get(username_option) is not None:
|
||||
username = downloader_params[username_option]
|
||||
password = downloader_params[password_option]
|
||||
else:
|
||||
username, password = self._get_netrc_login_info(netrc_machine)
|
||||
|
||||
return (username, password)
|
||||
return username, password
|
||||
|
||||
def _get_tfa_info(self, note='two-factor verification code'):
|
||||
"""
|
||||
@ -878,16 +899,16 @@ class InfoExtractor(object):
|
||||
def _hidden_inputs(html):
|
||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||
hidden_inputs = {}
|
||||
for input in re.findall(r'(?i)<input([^>]+)>', html):
|
||||
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
|
||||
for input in re.findall(r'(?i)(<input[^>]+>)', html):
|
||||
attrs = extract_attributes(input)
|
||||
if not input:
|
||||
continue
|
||||
name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
|
||||
if not name:
|
||||
if attrs.get('type') not in ('hidden', 'submit'):
|
||||
continue
|
||||
value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
|
||||
if not value:
|
||||
continue
|
||||
hidden_inputs[name.group('value')] = value.group('value')
|
||||
name = attrs.get('name') or attrs.get('id')
|
||||
value = attrs.get('value')
|
||||
if name and value is not None:
|
||||
hidden_inputs[name] = value
|
||||
return hidden_inputs
|
||||
|
||||
def _form_hidden_inputs(self, form_id, html):
|
||||
@ -1129,6 +1150,7 @@ class InfoExtractor(object):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': manifest_url,
|
||||
'manifest_url': manifest_url,
|
||||
'ext': 'flv' if bootstrap_info is not None else None,
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
@ -1153,13 +1175,6 @@ class InfoExtractor(object):
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
@ -1170,6 +1185,13 @@ class InfoExtractor(object):
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
# We should try extracting formats only from master playlists [1], i.e.
|
||||
# playlists that describe available qualities. On the other hand media
|
||||
# playlists [2] should be returned as is since they contain just the media
|
||||
@ -1191,35 +1213,54 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
last_info = None
|
||||
last_media = None
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
last_media = parse_m3u8_attributes(line)
|
||||
media = parse_m3u8_attributes(line)
|
||||
media_type = media.get('TYPE')
|
||||
if media_type in ('VIDEO', 'AUDIO'):
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
format_id = []
|
||||
for v in (media.get('GROUP-ID'), media.get('NAME')):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(media_url),
|
||||
'language': media.get('LANGUAGE'),
|
||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
})
|
||||
else:
|
||||
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||
# data be used by regular URI lines below
|
||||
last_media = media
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
if last_info is None:
|
||||
formats.append({'url': format_url(line)})
|
||||
continue
|
||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||
tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media_name
|
||||
stream_name = last_info.get('NAME') or last_media.get('NAME')
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
manifest_url = format_url(line.strip())
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(line.strip()),
|
||||
'url': manifest_url,
|
||||
'manifest_url': manifest_url,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
@ -1242,11 +1283,9 @@ class InfoExtractor(object):
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
@ -1493,9 +1532,10 @@ class InfoExtractor(object):
|
||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
@ -1516,42 +1556,52 @@ class InfoExtractor(object):
|
||||
|
||||
def extract_multisegment_info(element, ms_parent_info):
|
||||
ms_info = ms_parent_info.copy()
|
||||
|
||||
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
|
||||
# common attributes and elements. We will only extract relevant
|
||||
# for us.
|
||||
def extract_common(source):
|
||||
segment_timeline = source.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
start_number = source.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
timescale = source.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = source.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
|
||||
def extract_Initialization(source):
|
||||
initialization = source.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
|
||||
segment_list = element.find(_add_ns('SegmentList'))
|
||||
if segment_list is not None:
|
||||
extract_common(segment_list)
|
||||
extract_Initialization(segment_list)
|
||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||
if segment_urls_e:
|
||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||
initialization = segment_list.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
else:
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
start_number = segment_template.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
else:
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = segment_template.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
extract_common(segment_template)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
@ -1559,11 +1609,14 @@ class InfoExtractor(object):
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
else:
|
||||
initialization = segment_template.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
extract_Initialization(segment_template)
|
||||
return ms_info
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats = []
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
@ -1606,6 +1659,7 @@ class InfoExtractor(object):
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
'manifest_url': mpd_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
@ -1620,9 +1674,7 @@ class InfoExtractor(object):
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
@ -1631,46 +1683,79 @@ class InfoExtractor(object):
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template:
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
'url': media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
representation_ms_info['segment_urls'] = []
|
||||
# $Number*$ or $Time$ in media template with S list available
|
||||
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||
representation_ms_info['fragments'] = []
|
||||
segment_time = 0
|
||||
segment_d = None
|
||||
segment_number = representation_ms_info['start_number']
|
||||
|
||||
def add_segment_url():
|
||||
representation_ms_info['segment_urls'].append(
|
||||
media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
)
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||
})
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
segment_d = s['d']
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += s['d']
|
||||
segment_time += segment_d
|
||||
add_segment_url()
|
||||
segment_time += s['d']
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
segment_number += 1
|
||||
segment_time += segment_d
|
||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||
# No media template
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
s_num = 0
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
s = representation_ms_info['s'][s_num]
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
fragments.append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||
})
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# NB: MPD manifest may contain direct URLs to unfragmented media.
|
||||
# No fragments key is present in this case.
|
||||
if 'fragments' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
'fragments': [],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({'url': initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
for fragment in f['fragments']:
|
||||
fragment['url'] = combine_url(base_url, fragment['url'])
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
@ -1685,7 +1770,7 @@ class InfoExtractor(object):
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage):
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
|
||||
@ -1700,6 +1785,21 @@ class InfoExtractor(object):
|
||||
return f
|
||||
return {}
|
||||
|
||||
def _media_formats(src, cur_media_type):
|
||||
full_url = absolute_url(src)
|
||||
if determine_ext(full_url) == 'm3u8':
|
||||
is_plain_url = False
|
||||
formats = self._extract_m3u8_formats(
|
||||
full_url, video_id, ext='mp4',
|
||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
|
||||
else:
|
||||
is_plain_url = True
|
||||
formats = [{
|
||||
'url': full_url,
|
||||
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
||||
}]
|
||||
return is_plain_url, formats
|
||||
|
||||
entries = []
|
||||
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||
media_info = {
|
||||
@ -1709,10 +1809,8 @@ class InfoExtractor(object):
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = media_attributes.get('src')
|
||||
if src:
|
||||
media_info['formats'].append({
|
||||
'url': absolute_url(src),
|
||||
'vcodec': 'none' if media_type == 'audio' else None,
|
||||
})
|
||||
_, formats = _media_formats(src, media_type)
|
||||
media_info['formats'].extend(formats)
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
if media_content:
|
||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||
@ -1720,16 +1818,17 @@ class InfoExtractor(object):
|
||||
src = source_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(source_attributes.get('type'))
|
||||
f.update({
|
||||
'url': absolute_url(src),
|
||||
'vcodec': 'none' if media_type == 'audio' else None,
|
||||
})
|
||||
media_info['formats'].append(f)
|
||||
is_plain_url, formats = _media_formats(src, media_type)
|
||||
if is_plain_url:
|
||||
f = parse_content_type(source_attributes.get('type'))
|
||||
f.update(formats[0])
|
||||
media_info['formats'].append(f)
|
||||
else:
|
||||
media_info['formats'].extend(formats)
|
||||
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind == 'subtitles':
|
||||
if not kind or kind in ('subtitles', 'captions'):
|
||||
src = track_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
@ -1737,10 +1836,70 @@ class InfoExtractor(object):
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
if media_info['formats']:
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id):
|
||||
formats = []
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
if 'hdcore=' not in f4m_url:
|
||||
f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False)
|
||||
for entry in f4m_formats:
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
formats.extend(f4m_formats)
|
||||
m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
return formats
|
||||
|
||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||
url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
|
||||
http_base_url = 'http' + url_base
|
||||
formats = []
|
||||
if 'm3u8' not in skip_protocols:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
http_base_url + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
if 'f4m' not in skip_protocols:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
http_base_url + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
if re.search(r'(?:/smil:|\.smil)', url_base):
|
||||
if 'dash' not in skip_protocols:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
http_base_url + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
if 'smil' not in skip_protocols:
|
||||
rtmp_formats = self._extract_smil_formats(
|
||||
http_base_url + '/jwplayer.smil',
|
||||
video_id, fatal=False)
|
||||
for rtmp_format in rtmp_formats:
|
||||
rtsp_format = rtmp_format.copy()
|
||||
rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
|
||||
del rtsp_format['play_path']
|
||||
del rtsp_format['ext']
|
||||
rtsp_format.update({
|
||||
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
|
||||
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
|
||||
'protocol': 'rtsp',
|
||||
})
|
||||
formats.extend([rtmp_format, rtsp_format])
|
||||
else:
|
||||
for protocol in ('rtmp', 'rtsp'):
|
||||
if protocol not in skip_protocols:
|
||||
formats.append({
|
||||
'url': protocol + url_base,
|
||||
'format_id': protocol,
|
||||
'protocol': protocol,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
|
@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
@ -8,12 +8,22 @@ from ..utils import int_or_none
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
||||
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||
'info_dict': {
|
||||
'id': '2496419',
|
||||
'id': '2498934',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heavy Lies the Head',
|
||||
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
||||
'title': 'Everybody Respects A Bloody Nose',
|
||||
'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 906,
|
||||
'series': 'Comedians In Cars Getting Coffee',
|
||||
'season_number': 8,
|
||||
'episode_number': 4,
|
||||
'subtitles': {
|
||||
'en-US': [{
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@ -21,12 +31,8 @@ class CrackleIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
||||
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
||||
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
@ -48,16 +54,21 @@ class CrackleIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config_doc = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
|
||||
video_id, 'Downloading config')
|
||||
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id).find('i')
|
||||
title = item.attrib['t']
|
||||
|
||||
thumbnail = None
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
thumbnail = None
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||
@ -76,7 +87,7 @@ class CrackleIE(InfoExtractor):
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
subtitles[locale] = [{
|
||||
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
||||
'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v),
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
@ -85,7 +96,7 @@ class CrackleIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': item.attrib.get('d'),
|
||||
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
||||
'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
|
@ -1,13 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CriterionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+'
|
||||
_VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.criterion.com/films/184-le-samourai',
|
||||
'md5': 'bc51beba55685509883a9a7830919ec3',
|
||||
@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Samouraï',
|
||||
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
final_url = self._search_regex(
|
||||
r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._search_regex(
|
||||
r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
webpage, 'thumbnail url')
|
||||
|
||||
return {
|
||||
|
@ -34,22 +34,58 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
||||
_LOGIN_FORM = 'login_form'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://www.crunchyroll.com/?a=formhandler'
|
||||
data = urlencode_postdata({
|
||||
'formname': 'RpcApiUser_Login',
|
||||
'name': username,
|
||||
'password': password,
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
def is_logged(webpage):
|
||||
return '<title>Redirecting' in webpage
|
||||
|
||||
# Already logged in
|
||||
if is_logged(login_page):
|
||||
return
|
||||
|
||||
login_form_str = self._search_regex(
|
||||
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
||||
login_page, 'login form', group='form')
|
||||
|
||||
post_url = extract_attributes(login_form_str).get('action')
|
||||
if not post_url:
|
||||
post_url = self._LOGIN_URL
|
||||
elif not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
|
||||
|
||||
login_form.update({
|
||||
'login_form[name]': username,
|
||||
'login_form[password]': password,
|
||||
})
|
||||
login_request = sanitized_Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', 'Wrong login info',
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if is_logged(response):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
'(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
@ -1,30 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
'info_dict': {
|
||||
'id': '706966',
|
||||
'ext': 'mp4',
|
||||
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
|
||||
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
|
||||
'upload_date': '20150919',
|
||||
'timestamp': 1442624700,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:ctv_web:%s' % video_id,
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
@ -1,9 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
)
|
||||
|
||||
|
||||
class CultureUnpluggedIE(InfoExtractor):
|
||||
@ -32,6 +36,9 @@ class CultureUnpluggedIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
# request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
|
||||
self._request_webpage(HEADRequest(
|
||||
'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
|
||||
movie_data = self._download_json(
|
||||
'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id)
|
||||
|
||||
|
120
youtube_dl/extractor/curiositystream.py
Normal file
120
youtube_dl/extractor/curiositystream.py
Normal file
@ -0,0 +1,120 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CuriosityStreamBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'curiositystream'
|
||||
_auth_token = None
|
||||
_API_BASE_URL = 'https://api.curiositystream.com/v1/'
|
||||
|
||||
def _handle_errors(self, result):
|
||||
error = result.get('error', {}).get('message')
|
||||
if error:
|
||||
if isinstance(error, dict):
|
||||
error = ', '.join(error.values())
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
headers = {}
|
||||
if self._auth_token:
|
||||
headers['X-Auth-Token'] = self._auth_token
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + path, video_id, headers=headers)
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
def _real_initialize(self):
|
||||
(email, password) = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + 'login', None, data=urlencode_postdata({
|
||||
'email': email,
|
||||
'password': password,
|
||||
}))
|
||||
self._handle_errors(result)
|
||||
self._auth_token = result['message']['auth_token']
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
limelight_media_id = media['limelight_media_id']
|
||||
title = media['title']
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
if not sub_url:
|
||||
continue
|
||||
lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'limelight:media:' + limelight_media_id,
|
||||
'title': title,
|
||||
'description': media.get('description'),
|
||||
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'tags': media.get('tags'),
|
||||
'subtitles': subtitles,
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
'timestamp': 1448388615,
|
||||
'upload_date': '20151124',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
return self._extract_media_info(media)
|
||||
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_id = self._match_id(url)
|
||||
collection = self._call_api(
|
||||
'collections/' + collection_id, collection_id)
|
||||
entries = []
|
||||
for media in collection.get('media', []):
|
||||
entries.append(self._extract_media_info(media))
|
||||
return self.playlist_result(
|
||||
entries, collection_id,
|
||||
collection.get('title'), collection.get('description'))
|
@ -394,7 +394,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
|
||||
|
||||
class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/'
|
||||
_VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/'
|
||||
_VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
|
||||
_VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
|
||||
|
||||
|
@ -38,6 +38,12 @@ class DBTVIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
|
@ -1,61 +1,54 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_TEST = {
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'md5': '174dd4a8a6225cf5655952f969cfbe24',
|
||||
'info_dict': {
|
||||
'id': '1324',
|
||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||
'ext': 'mp4',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||
'description': 'Kurzfilm',
|
||||
'upload_date': '20110407',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
|
||||
version_json = self._download_json(
|
||||
base_url + 'version.json',
|
||||
video_id, note='Determining file version')
|
||||
version = version_json['version_name']
|
||||
info_json = self._download_json(
|
||||
'{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
|
||||
video_id, note='Fetching object ID')
|
||||
object_id = compat_str(info_json['object_id'])
|
||||
meta_json = self._download_json(
|
||||
'{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
|
||||
video_id, note='Downloading metadata')
|
||||
uuid = meta_json['uuid']
|
||||
title = meta_json['title']
|
||||
wide = meta_json['is_wide']
|
||||
if wide:
|
||||
ratio = '16x9'
|
||||
else:
|
||||
ratio = '4x3'
|
||||
play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
object_id = self._html_search_meta('DC.identifier', webpage)
|
||||
|
||||
servers_json = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/',
|
||||
'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
|
||||
video_id, note='Downloading server list')
|
||||
url = servers_json[0]['endpoint']
|
||||
server = servers_json[0]['server']
|
||||
m3u8_path = self._search_regex(
|
||||
r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('DC.description', webpage)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('DC.date.created', webpage))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': object_id,
|
||||
'title': title,
|
||||
'format': 'rtmp',
|
||||
'url': url,
|
||||
'play_path': play_path,
|
||||
'rtmp_real_time': True,
|
||||
'ext': 'flv',
|
||||
'display_id': video_id
|
||||
'formats': formats,
|
||||
'display_id': video_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class DemocracynowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)'
|
||||
_VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P<id>[^\?]*)'
|
||||
IE_NAME = 'democracynow'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.democracynow.org/shows/2015/7/3',
|
||||
|
@ -7,11 +7,22 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class DiscoveryGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocitychannel
|
||||
)go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'''
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
|
||||
'info_dict': {
|
||||
@ -43,7 +54,14 @@ class DiscoveryGoIE(InfoExtractor):
|
||||
|
||||
title = video['name']
|
||||
|
||||
stream = video['stream']
|
||||
stream = video.get('stream')
|
||||
if not stream:
|
||||
if video.get('authenticated') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream')
|
||||
STREAM_URL_SUFFIX = 'streamUrl'
|
||||
formats = []
|
||||
for stream_kind in ('', 'hds'):
|
||||
|
@ -10,18 +10,18 @@ from ..utils import (
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
'md5': '0914d4d69605090f623b7ac329fea66e',
|
||||
'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
|
||||
'md5': '21c7ff600f545358134fea762a6d42b6',
|
||||
'info_dict': {
|
||||
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09',
|
||||
'ext': 'flv',
|
||||
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
||||
'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||
'duration': 3169,
|
||||
'uploader': '4v4l0n42',
|
||||
'timestamp': 1292248482.625,
|
||||
'upload_date': '20101213',
|
||||
'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever',
|
||||
'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p',
|
||||
'duration': 198,
|
||||
'uploader': 'liuxt',
|
||||
'timestamp': 1385778501.104,
|
||||
'upload_date': '20131130',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
@ -3,9 +3,17 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (ExtractorError, unescapeHTML)
|
||||
from ..compat import (compat_str, compat_basestring)
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
@ -21,7 +29,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 're:.*m7show@163\.com.*',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@ -37,7 +44,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'douyu小漠',
|
||||
'uploader_id': '3769985',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@ -54,7 +60,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 're:.*m7show@163\.com.*',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@ -65,6 +70,10 @@ class DouyuTVIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
|
||||
# is encrypted originally, but ffdec can dump memory to get the decrypted one.
|
||||
_API_KEY = 'A12Svb&%1UUmf@hC'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@ -75,74 +84,56 @@ class DouyuTVIE(InfoExtractor):
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||
|
||||
config = None
|
||||
# Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache"
|
||||
# Retry with different parameters - same parameters cause same errors
|
||||
for i in range(5):
|
||||
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||
room_id, int(time.time()))
|
||||
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||
room = self._download_json(
|
||||
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
|
||||
note='Downloading room info')['data']
|
||||
|
||||
config_page = self._download_webpage(
|
||||
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||
video_id)
|
||||
try:
|
||||
config = self._parse_json(config_page, video_id, fatal=False)
|
||||
except ExtractorError:
|
||||
# Wait some time before retrying to get a different time() value
|
||||
self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. '
|
||||
'Waiting for %(timeout)s seconds before retrying')
|
||||
continue
|
||||
else:
|
||||
break
|
||||
if config is None:
|
||||
raise ExtractorError('Unable to fetch API result')
|
||||
|
||||
data = config['data']
|
||||
|
||||
error_code = config.get('error', 0)
|
||||
if error_code is not 0:
|
||||
error_desc = 'Server reported error %i' % error_code
|
||||
if isinstance(data, (compat_str, compat_basestring)):
|
||||
error_desc += ': ' + data
|
||||
raise ExtractorError(error_desc, expected=True)
|
||||
|
||||
show_status = data.get('show_status')
|
||||
# 1 = live, 2 = offline
|
||||
if show_status == '2':
|
||||
if room.get('show_status') == '2':
|
||||
raise ExtractorError('Live stream is offline', expected=True)
|
||||
|
||||
tt = compat_str(int(time.time() / 60))
|
||||
did = uuid.uuid4().hex.upper()
|
||||
|
||||
sign_content = ''.join((room_id, did, self._API_KEY, tt))
|
||||
sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest()
|
||||
|
||||
flv_data = compat_urllib_parse_urlencode({
|
||||
'cdn': 'ws',
|
||||
'rate': '0',
|
||||
'tt': tt,
|
||||
'did': did,
|
||||
'sign': sign,
|
||||
})
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id,
|
||||
data=flv_data, note='Downloading video info',
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
error_code = video_info.get('error', 0)
|
||||
if error_code is not 0:
|
||||
raise ExtractorError(
|
||||
'Live stream is offline', expected=True)
|
||||
'%s reported error %i' % (self.IE_NAME, error_code),
|
||||
expected=True)
|
||||
|
||||
base_url = data['rtmp_url']
|
||||
live_path = data['rtmp_live']
|
||||
base_url = video_info['data']['rtmp_url']
|
||||
live_path = video_info['data']['rtmp_live']
|
||||
|
||||
title = self._live_title(unescapeHTML(data['room_name']))
|
||||
description = data.get('show_details')
|
||||
thumbnail = data.get('room_src')
|
||||
video_url = '%s/%s' % (base_url, live_path)
|
||||
|
||||
uploader = data.get('nickname')
|
||||
uploader_id = data.get('owner_uid')
|
||||
|
||||
multi_formats = data.get('rtmp_multi_bitrate')
|
||||
if not isinstance(multi_formats, dict):
|
||||
multi_formats = {}
|
||||
multi_formats['live'] = live_path
|
||||
|
||||
formats = [{
|
||||
'url': '%s/%s' % (base_url, format_path),
|
||||
'format_id': format_id,
|
||||
'preference': 1 if format_id == 'live' else 0,
|
||||
} for format_id, format_path in multi_formats.items()]
|
||||
self._sort_formats(formats)
|
||||
title = self._live_title(unescapeHTML(room['room_name']))
|
||||
description = room.get('notice')
|
||||
thumbnail = room.get('room_src')
|
||||
uploader = room.get('nickname')
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@ -4,26 +4,45 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
|
||||
'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
'info_dict': {
|
||||
'id': 'panisk-paske-5',
|
||||
'id': 'klassen-darlig-taber-10',
|
||||
'ext': 'mp4',
|
||||
'title': 'Panisk Påske (5)',
|
||||
'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
|
||||
'timestamp': 1426984612,
|
||||
'upload_date': '20150322',
|
||||
'duration': 1455,
|
||||
'title': 'Klassen - Dårlig taber (10)',
|
||||
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
|
||||
'timestamp': 1471991907,
|
||||
'upload_date': '20160823',
|
||||
'duration': 606.84,
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||
'md5': '2c37175c718155930f939ef59952474a',
|
||||
'info_dict': {
|
||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
||||
'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
|
||||
'timestamp': 1472800279,
|
||||
'upload_date': '20160902',
|
||||
'duration': 131.4,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -35,7 +54,8 @@ class DRTVIE(InfoExtractor):
|
||||
'Video %s is not available' % video_id, expected=True)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id')
|
||||
|
||||
programcard = self._download_json(
|
||||
@ -43,9 +63,12 @@ class DRTVIE(InfoExtractor):
|
||||
video_id, 'Downloading video JSON')
|
||||
data = programcard['Data'][0]
|
||||
|
||||
title = data['Title']
|
||||
description = data['Description']
|
||||
timestamp = parse_iso8601(data['CreatedTime'])
|
||||
title = remove_end(self._og_search_title(
|
||||
webpage, default=None), ' | TV | DR') or data['Title']
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or data.get('Description')
|
||||
|
||||
timestamp = parse_iso8601(data.get('CreatedTime'))
|
||||
|
||||
thumbnail = None
|
||||
duration = None
|
||||
@ -56,16 +79,18 @@ class DRTVIE(InfoExtractor):
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
if asset['Kind'] == 'Image':
|
||||
thumbnail = asset['Uri']
|
||||
elif asset['Kind'] == 'VideoResource':
|
||||
duration = asset['DurationInMilliseconds'] / 1000.0
|
||||
restricted_to_denmark = asset['RestrictedToDenmark']
|
||||
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
|
||||
for link in asset['Links']:
|
||||
uri = link['Uri']
|
||||
target = link['Target']
|
||||
format_id = target
|
||||
if asset.get('Kind') == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
elif asset.get('Kind') == 'VideoResource':
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
||||
for link in asset.get('Links', []):
|
||||
uri = link.get('Uri')
|
||||
if not uri:
|
||||
continue
|
||||
target = link.get('Target')
|
||||
format_id = target or ''
|
||||
preference = None
|
||||
if spoken_subtitles:
|
||||
preference = -1
|
||||
@ -76,8 +101,8 @@ class DRTVIE(InfoExtractor):
|
||||
video_id, preference, f4m_id=format_id))
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', preference=preference,
|
||||
m3u8_id=format_id))
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=preference, m3u8_id=format_id))
|
||||
else:
|
||||
bitrate = link.get('Bitrate')
|
||||
if bitrate:
|
||||
@ -85,7 +110,7 @@ class DRTVIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': format_id,
|
||||
'tbr': bitrate,
|
||||
'tbr': int_or_none(bitrate),
|
||||
'ext': link.get('FileFormat'),
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
@ -94,12 +119,18 @@ class DRTVIE(InfoExtractor):
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
lang = subs['Language']
|
||||
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
|
||||
if not subs.get('Uri'):
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': subs['Uri'],
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
raise ExtractorError(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
|
||||
self.raise_geo_restricted(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.',
|
||||
expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -52,11 +52,24 @@ class EaglePlatformIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# Regular iframe embedding
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
# Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
|
||||
mobj = re.search(
|
||||
r'''(?xs)
|
||||
<script[^>]+
|
||||
src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
|
||||
.+?
|
||||
<div[^>]+
|
||||
class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
|
||||
data-id=["\'](?P<id>\d+)
|
||||
''', webpage)
|
||||
if mobj is not None:
|
||||
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
|
||||
|
||||
@staticmethod
|
||||
def _handle_error(response):
|
||||
|
@ -14,7 +14,7 @@ class EinthusanIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||
'md5': 'af244f4458cd667205e513d75da5b8b1',
|
||||
'md5': 'd71379996ff5b7f217eca034c34e3461',
|
||||
'info_dict': {
|
||||
'id': '2447',
|
||||
'ext': 'mp4',
|
||||
@ -25,13 +25,13 @@ class EinthusanIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||
'md5': 'ef63c7a803e22315880ed182c10d1c5c',
|
||||
'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
|
||||
'info_dict': {
|
||||
'id': '1671',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soodhu Kavvuum',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
|
||||
'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
|
||||
}
|
||||
},
|
||||
]
|
||||
@ -50,9 +50,11 @@ class EinthusanIE(InfoExtractor):
|
||||
video_id = self._search_regex(
|
||||
r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
|
||||
|
||||
video_url = self._download_webpage(
|
||||
m3u8_url = self._download_webpage(
|
||||
'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
|
||||
% video_id, video_id)
|
||||
% video_id, video_id, headers={'Referer': url})
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._html_search_regex(
|
||||
@ -64,7 +66,7 @@ class EinthusanIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
|
@ -5,7 +5,7 @@ from ..utils import remove_end
|
||||
|
||||
|
||||
class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip?id=10365079',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,58 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ExfmIE(InfoExtractor):
|
||||
IE_NAME = 'exfm'
|
||||
IE_DESC = 'ex.fm'
|
||||
_VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
|
||||
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://ex.fm/song/eh359',
|
||||
'md5': 'e45513df5631e6d760970b14cc0c11e7',
|
||||
'info_dict': {
|
||||
'id': '44216187',
|
||||
'ext': 'mp3',
|
||||
'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive',
|
||||
'uploader': 'deadjournalist',
|
||||
'upload_date': '20120424',
|
||||
'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||
},
|
||||
'note': 'Soundcloud song',
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
{
|
||||
'url': 'http://ex.fm/song/wddt8',
|
||||
'md5': '966bd70741ac5b8570d8e45bfaed3643',
|
||||
'info_dict': {
|
||||
'id': 'wddt8',
|
||||
'ext': 'mp3',
|
||||
'title': 'Safe and Sound',
|
||||
'uploader': 'Capital Cities',
|
||||
},
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group('id')
|
||||
info_url = 'http://ex.fm/api/v3/song/%s' % song_id
|
||||
info = self._download_json(info_url, song_id)['song']
|
||||
song_url = info['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream', ''), 'Soundcloud')
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
'ext': 'mp3',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['image']['large'],
|
||||
'uploader': info['artist'],
|
||||
'view_count': info['loved_count'],
|
||||
}
|
@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ExpoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
|
||||
'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
|
||||
|
@ -1,12 +1,18 @@
|
||||
# flake8: noqa
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .abc import ABCIE
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .abc import (
|
||||
ABCIE,
|
||||
ABCIViewIE,
|
||||
)
|
||||
from .abcnews import (
|
||||
AbcNewsIE,
|
||||
AbcNewsVideoIE,
|
||||
)
|
||||
from .abcotvs import (
|
||||
ABCOTVSIE,
|
||||
ABCOTVSClipsIE,
|
||||
)
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .acast import (
|
||||
ACastIE,
|
||||
@ -25,10 +31,10 @@ from .aenetworks import (
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
@ -67,6 +73,12 @@ from .atttechchannel import ATTTechChannelIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audioboom import AudioBoomIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .awaan import (
|
||||
AWAANIE,
|
||||
AWAANVideoIE,
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@ -80,6 +92,7 @@ from .bbc import (
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
from .beatportpro import BeatportProIE
|
||||
from .bet import BetIE
|
||||
from .bigflix import BigflixIE
|
||||
@ -103,7 +116,10 @@ from .brightcove import (
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .byutv import (
|
||||
BYUtvIE,
|
||||
BYUtvEventIE,
|
||||
)
|
||||
from .c56 import C56IE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
@ -117,9 +133,12 @@ from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
)
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
CBCWatchVideoIE,
|
||||
CBCWatchIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
@ -130,9 +149,11 @@ from .cbsnews import (
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .charlierose import CharlieRoseIE
|
||||
from .chaturbate import ChaturbateIE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .chirbit import (
|
||||
@ -177,9 +198,12 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
CuriosityStreamIE,
|
||||
CuriosityStreamCollectionIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
@ -195,12 +219,6 @@ from .daum import (
|
||||
DaumUserIE,
|
||||
)
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import (
|
||||
DCNIE,
|
||||
DCNVideoIE,
|
||||
DCNLiveIE,
|
||||
DCNSeasonIE,
|
||||
)
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .democracynow import DemocracynowIE
|
||||
@ -249,13 +267,18 @@ from .espn import ESPNIE
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import FacebookIE
|
||||
from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPluginsVideoIE,
|
||||
)
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fc2 import (
|
||||
FC2IE,
|
||||
FC2EmbedIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
@ -270,7 +293,11 @@ from .formula1 import Formula1IE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
FoxNewsInsiderIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
@ -287,6 +314,7 @@ from .freevideo import FreeVideoIE
|
||||
from .funimation import FunimationIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
@ -306,6 +334,7 @@ from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .golem import GolemIE
|
||||
@ -322,7 +351,10 @@ from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVIE
|
||||
from .hgtv import (
|
||||
HGTVIE,
|
||||
HGTVComShowIE,
|
||||
)
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
@ -366,6 +398,7 @@ from .ivi import (
|
||||
IviCompilationIE
|
||||
)
|
||||
from .ivideon import IvideonIE
|
||||
from .iwara import IwaraIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
@ -378,6 +411,7 @@ from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
@ -396,6 +430,7 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
LcpIE,
|
||||
@ -439,6 +474,10 @@ from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .meta import METAIE
|
||||
@ -446,6 +485,7 @@ from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .mgtv import MGTVIE
|
||||
from .miaopai import MiaoPaiIE
|
||||
from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyIE,
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
@ -474,9 +514,11 @@ from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVVideoIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
@ -499,6 +541,7 @@ from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
)
|
||||
@ -530,6 +573,7 @@ from .nextmedia import (
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhk import NhkVodIE
|
||||
from .nhl import (
|
||||
NHLVideocenterIE,
|
||||
NHLNewsIE,
|
||||
@ -541,7 +585,10 @@ from .nick import (
|
||||
NickDeIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaStackIE,
|
||||
NineCNineMediaIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
@ -567,13 +614,14 @@ from .nowtv import (
|
||||
)
|
||||
from .noz import NozIE
|
||||
from .npo import (
|
||||
AndereTijdenIE,
|
||||
NPOIE,
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
SchoolTVIE,
|
||||
VPROIE,
|
||||
WNLIE
|
||||
WNLIE,
|
||||
)
|
||||
from .npr import NprIE
|
||||
from .nrk import (
|
||||
@ -623,7 +671,6 @@ from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
@ -635,8 +682,12 @@ from .pluralsight import (
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import PokemonIE
|
||||
from .polskieradio import PolskieRadioIE
|
||||
from .polskieradio import (
|
||||
PolskieRadioIE,
|
||||
PolskieRadioCategoryIE,
|
||||
)
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
@ -688,6 +739,7 @@ from .revision3 import (
|
||||
)
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .roosterteeth import RoosterTeethIE
|
||||
@ -790,7 +842,6 @@ from .srgssr import (
|
||||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
@ -810,6 +861,7 @@ from .tagesschau import (
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tass import TassIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
@ -824,10 +876,12 @@ from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telequebec import TeleQuebecIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theplatform import (
|
||||
ThePlatformIE,
|
||||
@ -852,16 +906,11 @@ from .tnaflix import (
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
THVideoPlaylistIE
|
||||
)
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trollvids import TrollvidsIE
|
||||
from .trutube import TruTubeIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
from .tudou import (
|
||||
@ -891,12 +940,16 @@ from .tvc import (
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
)
|
||||
from .tvplay import TVPlayIE
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
)
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
@ -926,8 +979,13 @@ from .udn import UDNEmbedIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .uol import UOLIE
|
||||
from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usanetwork import USANetworkIE
|
||||
from .usatoday import USATodayIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import (
|
||||
@ -954,6 +1012,7 @@ from .vice import (
|
||||
ViceIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viceland import VicelandIE
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
@ -1014,6 +1073,7 @@ from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
@ -1100,8 +1160,4 @@ from .youtube import (
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
from .zippcast import ZippCastIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
|
@ -1,20 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..utils import str_to_int
|
||||
from .keezmovies import KeezMoviesIE
|
||||
|
||||
|
||||
class ExtremeTubeIE(InfoExtractor):
|
||||
class ExtremeTubeIE(KeezMoviesIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||
'info_dict': {
|
||||
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'ext': 'mp4',
|
||||
@ -35,58 +29,22 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
if not info['title']:
|
||||
info['title'] = self._search_regex(
|
||||
r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
uploader = self._html_search_regex(
|
||||
r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
flash_vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for quality_key, video_url in flash_vars.items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r'quality_(\d+)[pP]$', quality_key, 'height', default=None))
|
||||
if not height:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
mobj = re.search(
|
||||
r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
f.update({
|
||||
'format_id': '%dp-%dk' % (height, bitrate),
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
info.update({
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
@ -15,6 +15,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
'uploader': 'Tennis on Facebook',
|
||||
'upload_date': '20140908',
|
||||
'timestamp': 1410199200,
|
||||
}
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
'md5': '54706e4db4f5ad58fbad82dde1f1213f',
|
||||
'md5': 'b2c28d528273b323abe5c6ab59f0f030',
|
||||
'info_dict': {
|
||||
'id': '957955867617029',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||
'uploader': 'Demy de Zeeuw',
|
||||
'upload_date': '20160110',
|
||||
'timestamp': 1452431627,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
@ -340,3 +351,32 @@ class FacebookIE(InfoExtractor):
|
||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||
video_id, fatal_if_no_video=True)
|
||||
return info_dict
|
||||
|
||||
|
||||
class FacebookPluginsVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
|
||||
'md5': '5954e92cdfe51fe5782ae9bda7058a07',
|
||||
'info_dict': {
|
||||
'id': '10154383743583686',
|
||||
'ext': 'mp4',
|
||||
'title': 'What to do during the haze?',
|
||||
'uploader': 'Gov.sg',
|
||||
'upload_date': '20160826',
|
||||
'timestamp': 1472184808,
|
||||
},
|
||||
'add_ie': [FacebookIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
compat_urllib_parse_unquote(self._match_id(url)),
|
||||
FacebookIE.ie_key())
|
||||
|
@ -1,10 +1,12 @@
|
||||
#! -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
@ -16,7 +18,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
@ -75,12 +77,17 @@ class FC2IE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._login()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
webpage = None
|
||||
if not url.startswith('fc2:'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
title = 'FC2 video %s' % video_id
|
||||
thumbnail = None
|
||||
if webpage is not None:
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
|
||||
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
@ -113,3 +120,41 @@ class FC2IE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class FC2EmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
|
||||
IE_NAME = 'fc2:embed'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
|
||||
'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
|
||||
'info_dict': {
|
||||
'id': '201403223kCqB3Ez',
|
||||
'ext': 'flv',
|
||||
'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query = compat_parse_qs(mobj.group('query'))
|
||||
|
||||
video_id = query['i'][-1]
|
||||
title = query.get('tl', ['FC2 video %s' % video_id])[0]
|
||||
|
||||
sj = query.get('sj', [None])[0]
|
||||
thumbnail = None
|
||||
if sj:
|
||||
# See thumbnailImagePath() in ServerConst.as of flv2.swf
|
||||
thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
|
||||
sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FC2IE.ie_key(),
|
||||
'url': 'fc2:%s' % video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@ -2,44 +2,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_xpath
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
xpath_attr,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = '1tv'
|
||||
IE_DESC = 'Первый канал'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single format via video_materials.json API
|
||||
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
|
||||
'md5': '82a2777648acae812d58b3f5bd42882b',
|
||||
# single format
|
||||
'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
|
||||
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||
'info_dict': {
|
||||
'id': '35930',
|
||||
'id': '40049',
|
||||
'ext': 'mp4',
|
||||
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
|
||||
'description': 'md5:357933adeede13b202c7c21f91b871b2',
|
||||
'description': 'md5:36a39c1d19618fec57d12efe212a8370',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20150212',
|
||||
'duration': 2694,
|
||||
},
|
||||
}, {
|
||||
# multiple formats via video_materials.json API
|
||||
'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641',
|
||||
# multiple formats
|
||||
'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
|
||||
'info_dict': {
|
||||
'id': '113641',
|
||||
'id': '364746',
|
||||
'ext': 'mp4',
|
||||
'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
|
||||
'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2',
|
||||
'description': 'md5:a242eea0031fd180a4497d52640a9572',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20160407',
|
||||
'duration': 179,
|
||||
@ -48,84 +44,47 @@ class FirstTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API
|
||||
'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038',
|
||||
'md5': '519d306c5b5669761fd8906c39dbee23',
|
||||
'info_dict': {
|
||||
'id': '47038',
|
||||
'ext': 'mp4',
|
||||
'title': '"Побег". Второй сезон. 3 серия',
|
||||
'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20120516',
|
||||
'duration': 3080,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/videoarchive/9967',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
# Videos with multiple formats only available via this API
|
||||
video = self._download_json(
|
||||
'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id,
|
||||
video_id, fatal=False)
|
||||
|
||||
description, thumbnail, upload_date, duration = [None] * 4
|
||||
|
||||
if video:
|
||||
item = video[0]
|
||||
title = item['title']
|
||||
quality = qualities(('ld', 'sd', 'hd', ))
|
||||
formats = [{
|
||||
'url': f['src'],
|
||||
'format_id': f.get('name'),
|
||||
'quality': quality(f.get('name')),
|
||||
} for f in item['mbr'] if f.get('src')]
|
||||
thumbnail = item.get('poster')
|
||||
else:
|
||||
# Some videos are not available via video_materials.json
|
||||
video = self._download_xml(
|
||||
'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id,
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
item = xpath_element(video, './channel/item', fatal=True)
|
||||
title = xpath_text(item, './title', fatal=True)
|
||||
formats = [{
|
||||
'url': content.attrib['url'],
|
||||
} for content in item.findall(
|
||||
compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')]
|
||||
thumbnail = xpath_attr(
|
||||
item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
playlist_url = compat_urlparse.urljoin(url, self._search_regex(
|
||||
r'data-playlist-url="([^"]+)', webpage, 'playlist url'))
|
||||
|
||||
item = self._download_json(playlist_url, display_id)[0]
|
||||
video_id = item['id']
|
||||
quality = qualities(('ld', 'sd', 'hd', ))
|
||||
formats = []
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src:
|
||||
continue
|
||||
fname = f.get('name')
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': fname,
|
||||
'quality': quality(fname),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False)
|
||||
if webpage:
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"),
|
||||
webpage, 'title', default=None) or title
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"),
|
||||
webpage, 'title', default=None) or item['title']
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
|
@ -11,9 +11,13 @@ class Formula1IE(InfoExtractor):
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
|
@ -1,14 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FOXIE(InfoExtractor):
|
||||
class FOXIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||
@ -30,14 +30,26 @@ class FOXIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url']
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), video_id)
|
||||
fox_pdk_player = settings['fox_pdk_player']
|
||||
release_url = fox_pdk_player['release_url']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'switch': 'http'
|
||||
}
|
||||
if fox_pdk_player.get('access') == 'locked':
|
||||
ap_p = settings['foxAdobePassProvider']
|
||||
rating = ap_p.get('videoRating')
|
||||
if rating == 'n/a':
|
||||
rating = None
|
||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(
|
||||
release_url, {'switch': 'http'}), {'force_smil_url': True}),
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
@ -1,18 +1,24 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class FoxgayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
|
||||
_TEST = {
|
||||
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
|
||||
'md5': '80d72beab5d04e1655a56ad37afe6841',
|
||||
'md5': '344558ccfea74d33b7adbce22e577f54',
|
||||
'info_dict': {
|
||||
'id': '2582',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
|
||||
'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
|
||||
'title': 'Fuck Turkish-style',
|
||||
'description': 'md5:6ae2d9486921891efe89231ace13ffdf',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
},
|
||||
@ -22,27 +28,35 @@ class FoxgayIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(?P<title>.*?)</title>',
|
||||
webpage, 'title', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
|
||||
webpage, 'description', fatal=False)
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com')
|
||||
description = get_element_by_id('inf_tit', webpage)
|
||||
|
||||
# The default user-agent with foxgay cookies leads to pages without videos
|
||||
self._downloader.cookiejar.clear('.foxgay.com')
|
||||
# Find the URL for the iFrame which contains the actual video.
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
|
||||
'video frame', group='url')
|
||||
iframe = self._download_webpage(
|
||||
self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
|
||||
video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
|
||||
thumb_url = self._html_search_regex(
|
||||
r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
|
||||
iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'},
|
||||
note='Downloading video frame')
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id)
|
||||
|
||||
formats = [{
|
||||
'url': source,
|
||||
'height': resolution,
|
||||
} for source, resolution in zip(
|
||||
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumb_url,
|
||||
'thumbnail': video_data.get('act_vid', {}).get('thumb'),
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FoxNewsIE(AMPIE):
|
||||
IE_NAME = 'foxnews'
|
||||
IE_DESC = 'Fox News and Fox Business Video'
|
||||
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
||||
@ -49,6 +51,11 @@ class FoxNewsIE(AMPIE):
|
||||
'url': 'http://video.foxbusiness.com/v/4442309889001',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
|
||||
'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -58,3 +65,76 @@ class FoxNewsIE(AMPIE):
|
||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
|
||||
info['id'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class FoxNewsArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:article'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
|
||||
'info_dict': {
|
||||
'id': '5116295019001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||
'description': 'Veterans react on \'The Kelly File\'',
|
||||
'timestamp': 1473299755,
|
||||
'upload_date': '20160908',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
||||
webpage, 'video ID', group='id')
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id,
|
||||
FoxNewsIE.ie_key())
|
||||
|
||||
|
||||
class FoxNewsInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:insider'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'md5': 'a10c755e582d28120c62749b4feb4c0c',
|
||||
'info_dict': {
|
||||
'id': '5099377331001',
|
||||
'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'ext': 'mp4',
|
||||
'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
|
||||
'description': 'Is campus censorship getting out of control?',
|
||||
'timestamp': 1472168725,
|
||||
'upload_date': '20160825',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FoxNewsIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FoxNewsIE.ie_key(),
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
@ -2,21 +2,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import month_by_name
|
||||
|
||||
|
||||
class FranceInterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
|
||||
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
||||
'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
||||
'md5': '9e54d7bdb6fdc02a841007f8a975c094',
|
||||
'info_dict': {
|
||||
'id': '793962',
|
||||
'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
||||
'ext': 'mp3',
|
||||
'title': 'L’Histoire dans les jeux vidéo',
|
||||
'description': 'md5:7e93ddb4451e7530022792240a3049c7',
|
||||
'timestamp': 1387369800,
|
||||
'upload_date': '20131218',
|
||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||
'upload_date': '20160907',
|
||||
},
|
||||
}
|
||||
|
||||
@ -25,23 +25,30 @@ class FranceInterIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
path = self._search_regex(
|
||||
r'<a id="player".+?href="([^"]+)"', webpage, 'video url')
|
||||
video_url = 'http://www.franceinter.fr/' + path
|
||||
video_url = self._search_regex(
|
||||
r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video url', group='url')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span class="title-diffusion">(.+?)</span>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<span class="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'data-date="(\d+)"', webpage, 'upload date', fatal=False))
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
upload_date_str = self._search_regex(
|
||||
r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date_str:
|
||||
upload_date_list = upload_date_str.split()
|
||||
upload_date_list.reverse()
|
||||
upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
|
||||
upload_date_list[2] = '%02d' % int(upload_date_list[2])
|
||||
upload_date = ''.join(upload_date_list)
|
||||
else:
|
||||
upload_date = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'upload_date': upload_date,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
'vcodec': 'none',
|
||||
|
@ -8,7 +8,7 @@ from .common import InfoExtractor
|
||||
|
||||
class FreespeechIE(InfoExtractor):
|
||||
IE_NAME = 'freespeech.org'
|
||||
_VALID_URL = r'https://www\.freespeech\.org/video/(?P<title>.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)'
|
||||
_TEST = {
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
|
||||
|
70
youtube_dl/extractor/fxnetworks.py
Normal file
70
youtube_dl/extractor/fxnetworks.py
Normal file
@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
extract_attributes,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class FXNetworksIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fxnetworks.com/video/719841347694',
|
||||
'md5': '1447d4722e42ebca19e5232ab93abb22',
|
||||
'info_dict': {
|
||||
'id': '719841347694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vanpage',
|
||||
'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NEWA-FNG-FX',
|
||||
'upload_date': '20160706',
|
||||
'timestamp': 1467844741,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.simpsonsworld.com/video/716094019682',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
||||
release_url = video_data['rel']
|
||||
title = video_data['data-title']
|
||||
rating = video_data.get('data-rating')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if player_type == 'movies':
|
||||
query.update({
|
||||
'manifest': 'm3u',
|
||||
})
|
||||
else:
|
||||
query.update({
|
||||
'switch': 'http',
|
||||
})
|
||||
if video_data.get('data-req-auth') == '1':
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data['data-channel'], title,
|
||||
video_data.get('data-guid'), rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'thumbnail': video_data.get('data-large-thumb'),
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@ -1,19 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class GameStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
|
||||
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
|
||||
@ -21,8 +17,9 @@ class GameStarIE(InfoExtractor):
|
||||
'id': '76110',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
|
||||
'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406542020,
|
||||
'upload_date': '20140728',
|
||||
'duration': 17
|
||||
}
|
||||
@ -32,41 +29,27 @@ class GameStarIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
og_title = self._og_search_title(webpage)
|
||||
title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
|
||||
|
||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
thumbnail = self._proto_relative_url(
|
||||
self._og_search_thumbnail(webpage), scheme='http:')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+) ',
|
||||
webpage, 'upload_date', fatal=False))
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r' Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r' Zuschauer: ([0-9\.]+) ', webpage,
|
||||
'view_count', fatal=False))
|
||||
# TODO: there are multiple ld+json objects in the webpage,
|
||||
# while _search_json_ld finds only the first one
|
||||
json_ld = self._parse_json(self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>',
|
||||
webpage, 'JSON-LD', group='json_ld'), video_id)
|
||||
info_dict = self._json_ld(json_ld, video_id)
|
||||
info_dict['title'] = remove_end(info_dict['title'], ' - GameStar')
|
||||
|
||||
view_count = json_ld.get('interactionCount')
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
|
||||
r'([0-9]+) Kommentare</span>', webpage, 'comment_count',
|
||||
fatal=False))
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@ -72,6 +72,8 @@ from .kaltura import KalturaIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .facebook import FacebookIE
|
||||
from .soundcloud import SoundcloudIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .dbtv import DBTVIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -102,7 +104,8 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
]
|
||||
],
|
||||
'skip': 'URL invalid',
|
||||
},
|
||||
# Direct download with broken HEAD
|
||||
{
|
||||
@ -266,7 +269,8 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# m3u8 served with Content-Type: text/plain
|
||||
{
|
||||
@ -281,7 +285,8 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
@ -366,6 +371,7 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'skip': 'video gone',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.championat.com/video/football/v/87/87499.html',
|
||||
@ -419,6 +425,7 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'movie expired',
|
||||
},
|
||||
# embed.ly video
|
||||
{
|
||||
@ -446,6 +453,8 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
# HEAD requests lead to endless 301, while GET is OK
|
||||
'expected_warnings': ['301'],
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
@ -520,6 +529,9 @@ class GenericIE(InfoExtractor):
|
||||
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
# This forum does not allow <iframe> syntaxes anymore
|
||||
# Now HTML tags are displayed as-is
|
||||
'skip': 'No videos on this page',
|
||||
},
|
||||
# Embedded TED video
|
||||
{
|
||||
@ -568,7 +580,8 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# francetv embed
|
||||
{
|
||||
@ -1356,6 +1369,11 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
},
|
||||
{
|
||||
# generic vimeo embed that requires original URL passed as Referer
|
||||
'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
@ -1373,6 +1391,27 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [ArkenaIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
|
||||
'info_dict': {
|
||||
'id': '1c7141f46c',
|
||||
'ext': 'mp4',
|
||||
'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [Vbox7IE.ie_key()],
|
||||
},
|
||||
{
|
||||
# DBTV embeds
|
||||
'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
|
||||
'info_dict': {
|
||||
'id': '43254897',
|
||||
'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -1618,7 +1657,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0])
|
||||
doc, video_id,
|
||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||
@ -2209,11 +2250,40 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for VODPlatform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
|
||||
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
|
||||
|
||||
# Look for Mangomolo embeds
|
||||
mobj = re.search(
|
||||
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
|
||||
(?:
|
||||
video\?.*?\bid=(?P<video_id>\d+)|
|
||||
index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
||||
).+?)\1''', webpage)
|
||||
if mobj is not None:
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
video_id = mobj.group('video_id')
|
||||
if video_id:
|
||||
info.update({
|
||||
'ie_key': 'MangomoloVideo',
|
||||
'id': video_id,
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'ie_key': 'MangomoloLive',
|
||||
'id': mobj.group('channel_id'),
|
||||
})
|
||||
return info
|
||||
|
||||
# Look for Instagram embeds
|
||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||
@ -2239,6 +2309,16 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
# Look for VBOX7 embeds
|
||||
vbox7_url = Vbox7IE._extract_url(webpage)
|
||||
if vbox7_url:
|
||||
return self.url_result(vbox7_url, Vbox7IE.ie_key())
|
||||
|
||||
# Look for DBTV embeds
|
||||
dbtv_urls = DBTVIE._extract_urls(webpage)
|
||||
if dbtv_urls:
|
||||
return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
@ -2252,12 +2332,23 @@ class GenericIE(InfoExtractor):
|
||||
info_dict.update(json_ld)
|
||||
return info_dict
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
for entry in entries:
|
||||
entry.update({
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
})
|
||||
self._sort_formats(entry['formats'])
|
||||
return self.playlist_result(entries)
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||
|
||||
def filter_video(urls):
|
||||
return list(filter(check_video, urls))
|
||||
@ -2307,9 +2398,6 @@ class GenericIE(InfoExtractor):
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||
if not found:
|
||||
# HTML5 video
|
||||
found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||
if not found:
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
found = re.search(
|
||||
|
@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class GlideIE(InfoExtractor):
|
||||
@ -14,10 +13,8 @@ class GlideIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
|
||||
'ext': 'mp4',
|
||||
'title': 'Damon Timm\'s Glide message',
|
||||
'title': "Damon's Glide message",
|
||||
'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
|
||||
'uploader': 'Damon Timm',
|
||||
'upload_date': '20140919',
|
||||
}
|
||||
}
|
||||
|
||||
@ -27,7 +24,8 @@ class GlideIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
r'<title>(.+?)</title>', webpage,
|
||||
'title', default=None) or self._og_search_title(webpage)
|
||||
video_url = self._proto_relative_url(self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'video URL', default=None,
|
||||
@ -36,18 +34,10 @@ class GlideIE(InfoExtractor):
|
||||
r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'thumbnail url', default=None,
|
||||
group='url')) or self._og_search_thumbnail(webpage)
|
||||
uploader = self._search_regex(
|
||||
r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<div[^>]+class="info-date"[^>]*>([^<]+)',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -14,12 +15,13 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GloboIE(InfoExtractor):
|
||||
_VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||
|
||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
||||
@ -63,6 +65,9 @@ class GloboIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'globo:3607726',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
class MD5(object):
|
||||
@ -396,33 +401,41 @@ class GloboIE(InfoExtractor):
|
||||
|
||||
|
||||
class GloboArticleIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id=["\'](\d{7,})',
|
||||
r'\bdata-player-videosids=["\'](\d{7,})',
|
||||
r'\bvideosIDs\s*:\s*["\'](\d{7,})',
|
||||
r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
|
||||
r'\bdata-id=["\'](\d{7,})',
|
||||
r'<div[^>]+\bid=["\'](\d{7,})',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
|
||||
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
|
||||
'info_dict': {
|
||||
'id': '3652183',
|
||||
'ext': 'mp4',
|
||||
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
|
||||
'duration': 110.711,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
}
|
||||
'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
|
||||
'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
|
||||
'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
|
||||
'info_dict': {
|
||||
'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
|
||||
'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
|
||||
'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@ -432,5 +445,12 @@ class GloboArticleIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
|
||||
return self.url_result('globo:%s' % video_id, 'Globo')
|
||||
video_ids = []
|
||||
for video_regex in self._VIDEOID_REGEXES:
|
||||
video_ids.extend(re.findall(video_regex, webpage))
|
||||
entries = [
|
||||
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
|
||||
for video_id in orderedSet(video_ids)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
return self.playlist_result(entries, display_id, title, description)
|
||||
|
122
youtube_dl/extractor/go.py
Normal file
122
youtube_dl/extractor/go.py
Normal file
@ -0,0 +1,122 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class GoIE(InfoExtractor):
|
||||
_BRANDS = {
|
||||
'abc': '001',
|
||||
'freeform': '002',
|
||||
'watchdisneychannel': '004',
|
||||
'watchdisneyjunior': '008',
|
||||
'watchdisneyxd': '009',
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys())
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'info_dict': {
|
||||
'id': '0_g86w5onx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sneak Peek: Language Arts',
|
||||
'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id')
|
||||
brand = self._BRANDS[sub_domain]
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
||||
video_id)['video'][0]
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for asset in video_data.get('assets', {}).get('asset', []):
|
||||
asset_url = asset.get('value')
|
||||
if not asset_url:
|
||||
continue
|
||||
format_id = asset.get('format')
|
||||
ext = determine_ext(asset_url)
|
||||
if ext == 'm3u8':
|
||||
video_type = video_data.get('type')
|
||||
if video_type == 'lf':
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
video_id, data=urlencode_postdata({
|
||||
'video_id': video_data['id'],
|
||||
'video_type': video_type,
|
||||
'brand': brand,
|
||||
'device': '001',
|
||||
}))
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': asset_url,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for cc in video_data.get('closedcaption', {}).get('src', []):
|
||||
cc_url = cc.get('value')
|
||||
if not cc_url:
|
||||
continue
|
||||
ext = determine_ext(cc_url)
|
||||
if ext == 'xml':
|
||||
ext = 'ttml'
|
||||
subtitles.setdefault(cc.get('lang'), []).append({
|
||||
'url': cc_url,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
|
||||
thumbnail_url = thumbnail.get('value')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('longdescription') or video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
|
||||
'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
|
||||
'episode_number': int_or_none(video_data.get('episodenumber')),
|
||||
'series': video_data.get('show', {}).get('title'),
|
||||
'season_number': int_or_none(video_data.get('season', {}).get('num')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -10,7 +10,7 @@ from ..utils import unified_strdate
|
||||
|
||||
class GooglePlusIE(InfoExtractor):
|
||||
IE_DESC = 'Google Plus'
|
||||
_VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
||||
IE_NAME = 'plus.google'
|
||||
_TEST = {
|
||||
'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
|
||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GoshgayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
|
||||
_VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
|
||||
'md5': '4b6db9a0a333142eb9f15913142b0ed1',
|
||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class HarkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+'
|
||||
_VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
|
||||
'md5': '6783a58491b47b92c7c1af5a77d4cbee',
|
||||
|
@ -46,3 +46,34 @@ class HGTVIE(InfoExtractor):
|
||||
'episode_number': int_or_none(embed_vars.get('episode')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
||||
class HGTVComShowIE(InfoExtractor):
|
||||
IE_NAME = 'hgtv.com:show'
|
||||
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos',
|
||||
'info_dict': {
|
||||
'id': 'flip-or-flop-full-episodes-videos',
|
||||
'title': 'Flip or Flop Full Episodes',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
|
||||
webpage, 'video config'),
|
||||
display_id)['channels'][0]
|
||||
|
||||
entries = [
|
||||
self.url_result(video['releaseUrl'])
|
||||
for video in config['videos'] if video.get('releaseUrl')]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, display_id, config.get('title'), config.get('description'))
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class HotNewHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
|
||||
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
mimetype2ext,
|
||||
qualities,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@ -19,7 +20,7 @@ class ImdbIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2524815897',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
||||
'title': 'Ice Age: Continental Drift Trailer (No. 2)',
|
||||
'description': 'md5:9061c2219254e5d14e03c25c98e96a81',
|
||||
}
|
||||
}, {
|
||||
@ -83,17 +84,17 @@ class ImdbIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'title': remove_end(self._og_search_title(webpage), ' - IMDb'),
|
||||
'formats': formats,
|
||||
'description': descr,
|
||||
'thumbnail': format_info['slate'],
|
||||
'thumbnail': format_info.get('slate'),
|
||||
}
|
||||
|
||||
|
||||
class ImdbListIE(InfoExtractor):
|
||||
IE_NAME = 'imdb:list'
|
||||
IE_DESC = 'Internet Movie Database lists'
|
||||
_VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
|
||||
_VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
|
||||
_TEST = {
|
||||
'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
|
||||
'info_dict': {
|
||||
|
@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': 'Naomi Leonor Phan-Quang',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
},
|
||||
}, {
|
||||
# missing description
|
||||
@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': 'Britney Spears',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count) = [None] * 8
|
||||
uploader_id, like_count, comment_count, height, width) = [None] * 10
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor):
|
||||
shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = media.get('caption')
|
||||
thumbnail = media.get('display_src')
|
||||
timestamp = int_or_none(media.get('date'))
|
||||
@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor):
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
like_count = int_or_none(media.get('likes', {}).get('count'))
|
||||
comment_count = int_or_none(media.get('comments', {}).get('count'))
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
|
||||
if not video_url:
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
|
||||
if not uploader_id:
|
||||
uploader_id = self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
|
||||
@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by %s' % uploader_id,
|
||||
'description': description,
|
||||
@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'comments': comments,
|
||||
}
|
||||
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user