Compare commits
561 Commits
2016.07.07
...
2016.09.04
Author | SHA1 | Date | |
---|---|---|---|
|
4809490108 | ||
|
8112bfeaba | ||
|
d9606d9b6c | ||
|
433af6ad30 | ||
|
feaa5ad787 | ||
|
100bd86a68 | ||
|
0def758782 | ||
|
919cf1a62f | ||
|
b29cd56591 | ||
|
622638512b | ||
|
37c7490ac6 | ||
|
091624f9da | ||
|
7e5dc339de | ||
|
4a69fa04e0 | ||
|
2e99cd30c3 | ||
|
25afc2a783 | ||
|
9603b66012 | ||
|
45aab4d30b | ||
|
ed2bfe93aa | ||
|
cdc783510b | ||
|
cf0efe9636 | ||
|
dedb177029 | ||
|
86c3bbbced | ||
|
4b3a607658 | ||
|
3a7d35b982 | ||
|
6496ccb413 | ||
|
3fcce30289 | ||
|
c2b2c7e138 | ||
|
dacb3a864a | ||
|
6066d03db0 | ||
|
6562d34a8c | ||
|
5e9e3d0f6b | ||
|
349fc5c705 | ||
|
2c3e0af93e | ||
|
6150502e47 | ||
|
b207d5ebd4 | ||
|
4191779dcd | ||
|
f97ec8bcb9 | ||
|
8276d3b87a | ||
|
af95ee94b4 | ||
|
8fb6af6bba | ||
|
f6af0f888b | ||
|
e816c9d158 | ||
|
9250181f37 | ||
|
f096ec2625 | ||
|
4c8ab6fd71 | ||
|
05d4612947 | ||
|
746a695b36 | ||
|
165c54e97d | ||
|
2896dd73bc | ||
|
f8fd510eb4 | ||
|
7a3e849f6e | ||
|
196c6ba067 | ||
|
165620e320 | ||
|
4fd350611c | ||
|
263fef43de | ||
|
a249ab83cb | ||
|
f7043ef39c | ||
|
64fc49aba0 | ||
|
245023a861 | ||
|
3c77a54d5d | ||
|
da30a20a4d | ||
|
1fe48afea5 | ||
|
42e05be867 | ||
|
fe45b0e060 | ||
|
a06e1498aa | ||
|
5a80e7b43a | ||
|
3fb2a23029 | ||
|
cd10b3ea63 | ||
|
547993dcd0 | ||
|
6c9b71bc08 | ||
|
93b8404599 | ||
|
9ba1e1dcc0 | ||
|
b8079a40bc | ||
|
5bc8a73af6 | ||
|
b3eaeded12 | ||
|
ec65b391cb | ||
|
2982514072 | ||
|
98908bcf7c | ||
|
04b32c8f96 | ||
|
40eec6b15c | ||
|
39efc6e3e0 | ||
|
1198fe14a1 | ||
|
71e90766b5 | ||
|
d7aae610f6 | ||
|
92c27a0dbf | ||
|
d181cff685 | ||
|
3b4b82d4ce | ||
|
545ef4f531 | ||
|
906b87cf5f | ||
|
b281aad2dc | ||
|
6b18a24e6e | ||
|
c9de980106 | ||
|
f9b373afda | ||
|
298a120ab7 | ||
|
e3faecde30 | ||
|
a0f071a50d | ||
|
20bad91d76 | ||
|
b54a2da433 | ||
|
dc2c37f316 | ||
|
c1f62dd338 | ||
|
5a3efcd27c | ||
|
4c8f9c2577 | ||
|
f26a298247 | ||
|
ea01cdbf61 | ||
|
6a76b53355 | ||
|
d37708fc86 | ||
|
5c13c28566 | ||
|
f70e9229e6 | ||
|
30afe4aeb2 | ||
|
75fa990dc6 | ||
|
f39ffc5877 | ||
|
07ea9c9b05 | ||
|
073ac1225f | ||
|
0c6422cdd6 | ||
|
08773689f3 | ||
|
0c75abbb7b | ||
|
97653f81b2 | ||
|
d38b27dd9b | ||
|
6d94cbd2f4 | ||
|
30317f4887 | ||
|
8c3e35dd44 | ||
|
c86f51ee38 | ||
|
6e52bbb413 | ||
|
05bddcc512 | ||
|
1212e9972f | ||
|
ccb6570e9e | ||
|
18b6216150 | ||
|
fb009b7f53 | ||
|
3083e4dc07 | ||
|
7367bdef23 | ||
|
ad31642584 | ||
|
c7c43a93ba | ||
|
96229e5f95 | ||
|
55d119e2a1 | ||
|
6d2679ee26 | ||
|
afbab5688e | ||
|
3d897cc791 | ||
|
cf143c4d97 | ||
|
ad120ae1c5 | ||
|
d0fa172e5f | ||
|
f97f9f71e5 | ||
|
526656726b | ||
|
9b8c554ea7 | ||
|
d13bfc07b7 | ||
|
efe470e261 | ||
|
e3f6b56909 | ||
|
b1e676fde8 | ||
|
92d4cfa358 | ||
|
3d47ee0a9e | ||
|
d164a0d41b | ||
|
db29af6d36 | ||
|
2c6acdfd2d | ||
|
fddaa76a59 | ||
|
a809446750 | ||
|
d8f30a7e66 | ||
|
5b1d85754e | ||
|
e25586e471 | ||
|
292a2301bf | ||
|
dabe15701b | ||
|
4245f55880 | ||
|
5b9d187cc6 | ||
|
39e1c4f08c | ||
|
19f35402c5 | ||
|
70852b47ca | ||
|
a9a3b4a081 | ||
|
ecc90093f9 | ||
|
520251c093 | ||
|
55af45fcab | ||
|
b82232036a | ||
|
e4659b4547 | ||
|
9e5751b9fe | ||
|
bd1bcd3ea0 | ||
|
93a63b36f1 | ||
|
8b2dc4c328 | ||
|
850837b67a | ||
|
13585d7682 | ||
|
fd3ec986a4 | ||
|
b0d578ff7b | ||
|
b0c8f2e9c8 | ||
|
51815886a9 | ||
|
08a42f9c74 | ||
|
e15ad9ef09 | ||
|
4e9fee1015 | ||
|
7273e5849b | ||
|
b505e98784 | ||
|
92cd9fd565 | ||
|
b3d7dce429 | ||
|
a44694ab4e | ||
|
ab19b46b88 | ||
|
8804f10e6b | ||
|
6be17c0870 | ||
|
8652770bd2 | ||
|
2a1321a272 | ||
|
9c0fa60bf3 | ||
|
502d87c546 | ||
|
b35b0d73d8 | ||
|
6e7e4a6edf | ||
|
53fef319f1 | ||
|
2cabee2a7d | ||
|
11f502fac1 | ||
|
98affc1a48 | ||
|
70a2829fee | ||
|
837e56c8ee | ||
|
b5ddee8c77 | ||
|
fb64adcbd3 | ||
|
4f640f2890 | ||
|
254e64a20a | ||
|
818ac213eb | ||
|
cbef4d5c9f | ||
|
bf90c46790 | ||
|
69eb4d699f | ||
|
6d8ec8c3b7 | ||
|
760845ce99 | ||
|
5c2d087221 | ||
|
b6c4e36728 | ||
|
1a57b8c18c | ||
|
24eb13b1c6 | ||
|
525e0316c0 | ||
|
7e60ce9cf7 | ||
|
e811bcf8f8 | ||
|
6103f59095 | ||
|
9fa5789279 | ||
|
d2ac04674d | ||
|
1fd6e30988 | ||
|
884cdb6cd9 | ||
|
9771b1f901 | ||
|
2118fdd1a9 | ||
|
320d597c21 | ||
|
aaf44a2f47 | ||
|
fafabc0712 | ||
|
409760a932 | ||
|
097eba019d | ||
|
73a85620ee | ||
|
a560f28c98 | ||
|
5ec5461e1a | ||
|
542130a5d9 | ||
|
82997dad57 | ||
|
647a7bf5e8 | ||
|
77afa008dd | ||
|
db535435b3 | ||
|
c2a453b461 | ||
|
cd29eaab95 | ||
|
52aa7e7476 | ||
|
e97c55ee6a | ||
|
acfccacad5 | ||
|
5f2c2b7936 | ||
|
cb55908e51 | ||
|
e581224843 | ||
|
f50365e91c | ||
|
c366f8d30a | ||
|
6a26c5f9d5 | ||
|
bd6fb007de | ||
|
b69b2ff736 | ||
|
794e5dcd7e | ||
|
f0d3669437 | ||
|
98e698f1ff | ||
|
3cddb8d6a7 | ||
|
990d533ee4 | ||
|
b0081562d2 | ||
|
fff37cfd4f | ||
|
a3be69b7f0 | ||
|
0fd1b1624c | ||
|
367976d49f | ||
|
0aef0771f8 | ||
|
0c070681c5 | ||
|
30b25d382d | ||
|
e5f878c205 | ||
|
e2e84aed7e | ||
|
b1927f4e8a | ||
|
3b9323d96e | ||
|
7f832413d6 | ||
|
7f2ed47595 | ||
|
c3fa77bdef | ||
|
57ce8a6d08 | ||
|
69d8eeeec5 | ||
|
81c13222c6 | ||
|
b1ce2ba197 | ||
|
5c8411e968 | ||
|
cc9c8ce5df | ||
|
20ef4123b9 | ||
|
4e62d26aa2 | ||
|
b657816684 | ||
|
9778b3e7ee | ||
|
25dd58ca6a | ||
|
5e42f8a0ad | ||
|
1ad6b891b2 | ||
|
7aa589a5e1 | ||
|
065bc35489 | ||
|
3a380766d1 | ||
|
affaea0688 | ||
|
77426a087b | ||
|
8991844ea2 | ||
|
082395d0a0 | ||
|
e8ed7354e6 | ||
|
1e7f602e2a | ||
|
522f6c066d | ||
|
321b5e082a | ||
|
3711fa1eb2 | ||
|
395c74615c | ||
|
3dc240e8c6 | ||
|
a41a6c5094 | ||
|
d71207121d | ||
|
b1c6f21c74 | ||
|
412abb8760 | ||
|
f17d5f6d14 | ||
|
6bb801cfaf | ||
|
de02d1f4e9 | ||
|
e1f93a0a76 | ||
|
d21a661bb4 | ||
|
b2bd968f4b | ||
|
4a01befb34 | ||
|
845dfcdc40 | ||
|
d92cb46305 | ||
|
a8795327ca | ||
|
d34995a9e3 | ||
|
958849275f | ||
|
998f094452 | ||
|
aaa42cf0cf | ||
|
9fb64c04cd | ||
|
f9622868e7 | ||
|
37768f9242 | ||
|
a1aadd09a4 | ||
|
b47a75017b | ||
|
e37b54b140 | ||
|
c1decda58c | ||
|
d3f8e038fe | ||
|
ad152e2d95 | ||
|
b0af12154e | ||
|
d16b3c6677 | ||
|
c57244cdb1 | ||
|
a7e5f27412 | ||
|
089a40955c | ||
|
d73ebac100 | ||
|
e563c0d73b | ||
|
491c42e690 | ||
|
7f2339c617 | ||
|
8122e79fef | ||
|
fe3ad1d456 | ||
|
038a5e1a65 | ||
|
84bc23b41b | ||
|
46933a15d6 | ||
|
3859ebeee6 | ||
|
d50aca41f8 | ||
|
0ca057b965 | ||
|
5ca968d0a6 | ||
|
f0d31c624e | ||
|
08c655906c | ||
|
5a993e1692 | ||
|
a7d2953073 | ||
|
fdd0b8f8e0 | ||
|
f65dc41b72 | ||
|
962250f7ea | ||
|
7dc2a74e0a | ||
|
b02b960c6b | ||
|
4f427c4be8 | ||
|
8a00ea567b | ||
|
8895be01fc | ||
|
52e7fcfeb7 | ||
|
2396062c74 | ||
|
14704aeff6 | ||
|
3c2c3af059 | ||
|
1891ea2d76 | ||
|
1094074c04 | ||
|
217d5ae013 | ||
|
8b40854529 | ||
|
6bb0fbf9fb | ||
|
8d3b226b83 | ||
|
42b7a5afe0 | ||
|
899d2bea63 | ||
|
9cb0e65d7e | ||
|
b070564efb | ||
|
ce28252c48 | ||
|
3aa9a73554 | ||
|
6a9b3b61ea | ||
|
45408eb075 | ||
|
eafc66855d | ||
|
e03d3e6453 | ||
|
a70e45f80a | ||
|
697655a7c0 | ||
|
e382b953f0 | ||
|
116e7e0d04 | ||
|
cf03e34ad3 | ||
|
2903137292 | ||
|
9361f2169c | ||
|
35aa6c538f | ||
|
fa9f1d16b8 | ||
|
485fedf6fd | ||
|
da0baba5c8 | ||
|
bb9f3bfedf | ||
|
dbc0b39b91 | ||
|
481c5c5137 | ||
|
0cacae2807 | ||
|
d9d56deadf | ||
|
74ba450a81 | ||
|
db19df6ca0 | ||
|
fbdf8d15d1 | ||
|
94aae01548 | ||
|
39eef54cf0 | ||
|
05c8268c81 | ||
|
289a16b4f3 | ||
|
7935926baa | ||
|
dcbb07c35a | ||
|
40090e8d51 | ||
|
3e050d51d4 | ||
|
ced70c8640 | ||
|
9a700deea4 | ||
|
dc35ba0eba | ||
|
88bd486b9a | ||
|
7f8b92e3cf | ||
|
35f6e0ff36 | ||
|
326fa4e6e5 | ||
|
c74299a72c | ||
|
10a1bb3a78 | ||
|
4d3e543c73 | ||
|
05d1e7aaa9 | ||
|
a3aa814b77 | ||
|
5c32a77cad | ||
|
14a28e705b | ||
|
cc99d4f826 | ||
|
712c7530ff | ||
|
0a147785e8 | ||
|
59eaf69e33 | ||
|
e8be2943a7 | ||
|
8fdc538b46 | ||
|
9513c1eb17 | ||
|
ae6fff4e64 | ||
|
5a65668e25 | ||
|
f75e6890db | ||
|
d9cb92c840 | ||
|
94c04a3c79 | ||
|
f094834857 | ||
|
111de00289 | ||
|
b4a131e1a5 | ||
|
f1991ce928 | ||
|
6548030a17 | ||
|
3a8947650b | ||
|
1979969f91 | ||
|
0673741af3 | ||
|
c8e170b209 | ||
|
bbe1f3634a | ||
|
4671dd41b2 | ||
|
f164b97123 | ||
|
5275efe30d | ||
|
b13647cf3c | ||
|
add7d2a0e2 | ||
|
e298d3a08c | ||
|
fd8c8c7dcd | ||
|
9158af16cc | ||
|
c6668e4ad1 | ||
|
84e8cca48b | ||
|
790b06b7d4 | ||
|
740d7c49c2 | ||
|
4e51ec5f57 | ||
|
05087d1b4c | ||
|
a66a73ee90 | ||
|
8188b923db | ||
|
d993a1354d | ||
|
e8882e7043 | ||
|
1056821799 | ||
|
890e6d3309 | ||
|
246080d378 | ||
|
b1ea680270 | ||
|
45550d1039 | ||
|
7cdfc4c90f | ||
|
af21f56f98 | ||
|
1a8f0773b6 | ||
|
59cc5bd8bf | ||
|
49bc16b95e | ||
|
a2f9ca1e67 | ||
|
371ddb14fe | ||
|
998895dffa | ||
|
aadd3ce21f | ||
|
ae7b846203 | ||
|
21ba7d0981 | ||
|
691fbe7f98 | ||
|
2e221ca3a8 | ||
|
317f7ab634 | ||
|
23495d6a39 | ||
|
224db034ab | ||
|
ad27649be3 | ||
|
84571be645 | ||
|
7b0d333a7e | ||
|
342f0c3682 | ||
|
38e0f16a94 | ||
|
e910fe2fe4 | ||
|
233b58dec7 | ||
|
c39b2ed990 | ||
|
35ec86689c | ||
|
c485959034 | ||
|
a0560d8ab8 | ||
|
0385aa6199 | ||
|
00f4764cb7 | ||
|
51c2cd0b83 | ||
|
5f5a9d6158 | ||
|
2d19fb5072 | ||
|
9d865a1af6 | ||
|
41aa44259d | ||
|
381ff44756 | ||
|
7f29cf545a | ||
|
7d1219f3e0 | ||
|
f1b4af7d79 | ||
|
8a8590a617 | ||
|
4a7a5e41f7 | ||
|
2a49d01600 | ||
|
b99af8a51c | ||
|
8e7020daef | ||
|
a26bcc61c1 | ||
|
5c4dcf8172 | ||
|
e9fb6a4bbe | ||
|
e2dbcaa1bf | ||
|
ae01850165 | ||
|
c3baaedfc8 | ||
|
0b68de3cc1 | ||
|
39e9d524e5 | ||
|
865b087224 | ||
|
3121b25639 | ||
|
0286b85c79 | ||
|
ab52bb5137 | ||
|
61a98b8623 | ||
|
6daf34a045 | ||
|
c03adf90bd | ||
|
0ece114b7b | ||
|
5b6a74856b | ||
|
ce43100a01 | ||
|
8cc9b4016d | ||
|
31eeab9f41 | ||
|
9558dcec9c | ||
|
6e6b70d65f | ||
|
d417fd88d0 | ||
|
9e4f5dc1e9 | ||
|
1251565ee0 | ||
|
1f7258a367 | ||
|
0af985069b | ||
|
0de168f7ed | ||
|
95b31e266b | ||
|
6b3a3098b5 | ||
|
2de624fdd5 | ||
|
3fee7f636c | ||
|
89e2fff2b7 | ||
|
cedc70b292 | ||
|
07d7689f2e | ||
|
ae8cb5328d | ||
|
2e32ac0b9a | ||
|
672f01c370 | ||
|
e2d616dd30 | ||
|
0ab7f4fe2b | ||
|
29c4a07776 | ||
|
826e911e41 | ||
|
30d22dae8e | ||
|
ec3518725b | ||
|
5f87d845eb | ||
|
571808a7aa | ||
|
dfe5fa49ae | ||
|
01a0c511eb | ||
|
b3d30315ce | ||
|
882af14d7d | ||
|
47335a0efa | ||
|
59bbe4911a | ||
|
4f3c5e0627 | ||
|
ccff2c404d |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.07**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04.1**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.07.07
|
||||
[debug] youtube-dl version 2016.09.04.1
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -7,9 +7,6 @@ python:
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
sudo: false
|
||||
install:
|
||||
- bash ./devscripts/install_srelay.sh
|
||||
- export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
6
AUTHORS
6
AUTHORS
@@ -177,3 +177,9 @@ Roman Tsiupa
|
||||
Artur Krysiak
|
||||
Jakub Adam Wieczorek
|
||||
Aleksandar Topuzović
|
||||
Nehal Patel
|
||||
Rob van Bekkum
|
||||
Petr Zvoníček
|
||||
Pratyush Singh
|
||||
Aleksander Nitecki
|
||||
Sebastian Blunt
|
||||
|
@@ -46,7 +46,7 @@ Make sure that someone has not already opened the issue you're trying to open. S
|
||||
|
||||
### Why are existing options not enough?
|
||||
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
|
||||
### Is there enough context in your bug report?
|
||||
|
||||
|
587
ChangeLog
Normal file
587
ChangeLog
Normal file
@@ -0,0 +1,587 @@
|
||||
version 2016.09.04.1
|
||||
|
||||
Core
|
||||
* In DASH downloader if the first segment fails, abort the whole download
|
||||
process to prevent throttling (#10497)
|
||||
+ Add support for --skip-unavailable-fragments and --fragment retries in
|
||||
hlsnative downloader (#10165, #10448).
|
||||
+ Add support for --skip-unavailable-fragments in DASH downloader
|
||||
+ Introduce --skip-unavailable-fragments option for fragment based downloaders
|
||||
that allows to skip fragments unavailable due to a HTTP error
|
||||
* Fix extraction of video/audio entries with src attribute in
|
||||
_parse_html5_media_entries (#10540)
|
||||
|
||||
Extractors
|
||||
* [theplatform] Relax URL regular expression (#10546)
|
||||
* [youtube:playlist] Extend URL regular expression
|
||||
* [rottentomatoes] Delegate extraction to internetvideoarchive extractor
|
||||
* [internetvideoarchive] Extract all formats
|
||||
* [pornvoisines] Fix extraction (#10469)
|
||||
* [rottentomatoes] Fix extraction (#10467)
|
||||
* [espn] Extend URL regular expression (#10549)
|
||||
* [vimple] Extend URL regular expression (#10547)
|
||||
* [youtube:watchlater] Fix extraction (#10544)
|
||||
* [youjizz] Fix extraction (#10437)
|
||||
+ [foxnews] Add support for FoxNews Insider (#10445)
|
||||
+ [fc2] Recognize Flash player URLs (#10512)
|
||||
|
||||
|
||||
version 2016.09.03
|
||||
|
||||
Core
|
||||
* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
|
||||
_extract_m3u8_formats (#10522)
|
||||
* Handle semicolon in mimetype2ext
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add support for rental videos' previews (#10532)
|
||||
* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
|
||||
no playlist is actually served (#10537)
|
||||
+ [drtv] Add support for dr.dk/nyheder (#10536)
|
||||
+ [facebook:plugins:video] Add extractor (#10530)
|
||||
+ [go] Add extractor for *.go.com sites
|
||||
* [adobepass] Check for authz_token expiration (#10527)
|
||||
* [nytimes] improve extraction
|
||||
* [thestar] Fix extraction (#10465)
|
||||
* [glide] Fix extraction (#10478)
|
||||
- [exfm] Remove extractor (#10482)
|
||||
* [youporn] Fix categories and tags extraction (#10521)
|
||||
+ [curiositystream] Add extractor for app.curiositystream.com
|
||||
- [thvideo] Remove extractor (#10464)
|
||||
* [movingimage] Fix for the new site name (#10466)
|
||||
+ [cbs] Add support for once formats (#10515)
|
||||
* [limelight] Skip ism snd duplicate manifests
|
||||
+ [porncom] Extract categories and tags (#10510)
|
||||
+ [facebook] Extract timestamp (#10508)
|
||||
+ [yahoo] Extract more formats
|
||||
|
||||
|
||||
version 2016.08.31
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
|
||||
* [bandcamp:album] Fix title extraction (#10455)
|
||||
* [pyvideo] Fix extraction (#10468)
|
||||
+ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
|
||||
* [9c9media] Extract more metadata
|
||||
* [9c9media] Fix multiple stacks extraction (#10016)
|
||||
* [adultswim] Improve video info extraction (#10492)
|
||||
* [vodplatform] Improve embed regular expression
|
||||
- [played] Remove extractor (#10470)
|
||||
+ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
|
||||
+ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
|
||||
* [adultswim] Rework in terms of turner extractor
|
||||
* [cnn] Rework in terms of turner extractor
|
||||
* [nba] Rework in terms of turner extractor
|
||||
+ [turner] Add base extractor for Turner Broadcasting System based sites
|
||||
* [bilibili] Fix extraction (#10375)
|
||||
* [openload] Fix extraction (#10408)
|
||||
|
||||
|
||||
version 2016.08.28
|
||||
|
||||
Core
|
||||
+ Add warning message that ffmpeg doesn't support SOCKS
|
||||
* Improve thumbnail sorting
|
||||
+ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats
|
||||
* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative
|
||||
+ Add ac-3 to the list of audio codecs in parse_codecs
|
||||
|
||||
Extractors
|
||||
* [periscope:user] Fix extraction (#10453)
|
||||
* [douyutv] Fix extraction (#10153, #10318, #10444)
|
||||
+ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424)
|
||||
- [trutube] Remove extractor (#10438)
|
||||
+ [usanetwork] Add extractor for usanetwork.com
|
||||
* [crackle] Fix extraction (#10333)
|
||||
* [spankbang] Fix description and uploader extraction (#10339)
|
||||
* [discoverygo] Detect cable provider restricted videos (#10425)
|
||||
+ [cbc] Add support for watch.cbc.ca
|
||||
* [kickstarter] Silent the warning for og:description (#10415)
|
||||
* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
|
||||
|
||||
|
||||
version 2016.08.24.1
|
||||
|
||||
Extractors
|
||||
+ [pluralsight] Add support for subtitles (#9681)
|
||||
|
||||
|
||||
version 2016.08.24
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix authentication (#10392)
|
||||
* [openload] Fix extraction (#10408)
|
||||
+ [bravotv] Add support for Adobe Pass (#10407)
|
||||
* [bravotv] Fix clip info extraction (#10407)
|
||||
* [eagleplatform] Improve embedded videos detection (#10409)
|
||||
* [awaan] Fix extraction
|
||||
* [mtvservices:embedded] Update config URL
|
||||
+ [abc:iview] Add extractor (#6148)
|
||||
|
||||
|
||||
version 2016.08.22
|
||||
|
||||
Core
|
||||
* Improve formats and subtitles extension auto calculation
|
||||
+ Recognize full unit names in parse_filesize
|
||||
+ Add support for m3u8 manifests in HTML5 multimedia tags
|
||||
* Fix octal/hexadecimal number detection in js_to_json
|
||||
|
||||
Extractors
|
||||
+ [ivi] Add support for 720p and 1080p
|
||||
+ [charlierose] Add new extractor (#10382)
|
||||
* [1tv] Fix extraction (#9249)
|
||||
* [twitch] Renew authentication
|
||||
* [kaltura] Improve subtitles extension calculation
|
||||
+ [zingmp3] Add support for video clips
|
||||
* [zingmp3] Fix extraction (#10041)
|
||||
* [kaltura] Improve subtitles extraction (#10279)
|
||||
* [cultureunplugged] Fix extraction (#10330)
|
||||
+ [cnn] Add support for money.cnn.com (#2797)
|
||||
* [cbsnews] Fix extraction (#10362)
|
||||
* [cbs] Fix extraction (#10393)
|
||||
+ [litv] Support 'promo' URLs (#10385)
|
||||
* [snotr] Fix extraction (#10338)
|
||||
* [n-tv.de] Fix extraction (#10331)
|
||||
* [globo:article] Relax URL and video id regular expressions (#10379)
|
||||
|
||||
|
||||
version 2016.08.19
|
||||
|
||||
Core
|
||||
- Remove output template description from --help
|
||||
* Recognize lowercase units in parse_filesize
|
||||
|
||||
Extractors
|
||||
+ [porncom] Add extractor for porn.com (#2251, #10251)
|
||||
+ [generic] Add support for DBTV embeds
|
||||
* [vk:wallpost] Fix audio extraction for new site layout
|
||||
* [vk] Fix authentication
|
||||
+ [hgtvcom:show] Add extractor for hgtv.com shows (#10365)
|
||||
+ [discoverygo] Add support for another GO network sites
|
||||
|
||||
|
||||
version 2016.08.17
|
||||
|
||||
Core
|
||||
+ Add _get_netrc_login_info
|
||||
|
||||
Extractors
|
||||
* [mofosex] Extract all formats (#10335)
|
||||
+ [generic] Add support for vbox7 embeds
|
||||
+ [vbox7] Add support for embed URLs
|
||||
+ [viafree] Add extractor (#10358)
|
||||
+ [mtg] Add support for viafree URLs (#10358)
|
||||
* [theplatform] Extract all subtitles per language
|
||||
+ [xvideos] Fix HLS extraction (#10356)
|
||||
+ [amcnetworks] Add extractor
|
||||
+ [bbc:playlist] Add support for pagination (#10349)
|
||||
+ [fxnetworks] Add extractor (#9462)
|
||||
* [cbslocal] Fix extraction for SendtoNews-based videos
|
||||
* [sendtonews] Fix extraction
|
||||
* [jwplatform] Extract video id from JWPlayer data
|
||||
- [zippcast] Remove extractor (#10332)
|
||||
+ [viceland] Add extractor (#8799)
|
||||
+ [adobepass] Add base extractor for Adobe Pass Authentication
|
||||
* [life:embed] Improve extraction
|
||||
* [vgtv] Detect geo restricted videos (#10348)
|
||||
+ [uplynk] Add extractor
|
||||
* [xiami] Fix extraction (#10342)
|
||||
|
||||
|
||||
version 2016.08.13
|
||||
|
||||
Core
|
||||
* Show progress for curl external downloader
|
||||
* Forward more options to curl external downloader
|
||||
|
||||
Extractors
|
||||
* [pbs] Fix description extraction
|
||||
* [franceculture] Fix extraction (#10324)
|
||||
* [pornotube] Fix extraction (#10322)
|
||||
* [4tube] Fix metadata extraction (#10321)
|
||||
* [imgur] Fix width and height extraction (#10325)
|
||||
* [expotv] Improve extraction
|
||||
+ [vbox7] Fix extraction (#10309)
|
||||
- [tapely] Remove extractor (#10323)
|
||||
* [muenchentv] Fix extraction (#10313)
|
||||
+ [24video] Add support for .me and .xxx TLDs
|
||||
* [24video] Fix comment count extraction
|
||||
* [sunporno] Add support for embed URLs
|
||||
* [sunporno] Fix metadata extraction (#10316)
|
||||
+ [hgtv] Add extractor for hgtv.ca (#3999)
|
||||
- [pbs] Remove request to unavailable API
|
||||
+ [pbs] Add support for high quality HTTP formats
|
||||
+ [crunchyroll] Add support for HLS formats (#10301)
|
||||
|
||||
|
||||
version 2016.08.12
|
||||
|
||||
Core
|
||||
* Subtitles are now written as is. Newline conversions are disabled. (#10268)
|
||||
+ Recognize more formats in unified_timestamp
|
||||
|
||||
Extractors
|
||||
- [goldenmoustache] Remove extractor (#10298)
|
||||
* [drtuber] Improve title extraction
|
||||
* [drtuber] Make dislike count optional (#10297)
|
||||
* [chirbit] Fix extraction (#10296)
|
||||
* [francetvinfo] Relax URL regular expression
|
||||
* [rtlnl] Relax URL regular expression (#10282)
|
||||
* [formula1] Relax URL regular expression (#10283)
|
||||
* [wat] Improve extraction (#10281)
|
||||
* [ctsnews] Fix extraction
|
||||
|
||||
|
||||
version 2016.08.10
|
||||
|
||||
Core
|
||||
* Make --metadata-from-title non fatal when title does not match the pattern
|
||||
* Introduce options for randomized sleep before each download
|
||||
--min-sleep-interval and --max-sleep-interval (#9930)
|
||||
* Respect default in _search_json_ld
|
||||
|
||||
Extractors
|
||||
+ [uol] Add extractor for uol.com.br (#4263)
|
||||
* [rbmaradio] Fix extraction and extract all formats (#10242)
|
||||
+ [sonyliv] Add extractor for sonyliv.com (#10258)
|
||||
* [aparat] Fix extraction
|
||||
* [cwtv] Extract HTTP formats
|
||||
+ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253)
|
||||
* [kuwo:singer] Fix extraction
|
||||
|
||||
|
||||
version 2016.08.07
|
||||
|
||||
Core
|
||||
+ Add support for TV Parental Guidelines ratings in parse_age_limit
|
||||
+ Add decode_png (#9706)
|
||||
+ Add support for partOfTVSeries in JSON-LD
|
||||
* Lower master M3U8 manifest preference for better format sorting
|
||||
|
||||
Extractors
|
||||
+ [discoverygo] Add extractor (#10245)
|
||||
* [flipagram] Make JSON-LD extraction non fatal
|
||||
* [generic] Make JSON-LD extraction non fatal
|
||||
+ [bbc] Add support for morph embeds (#10239)
|
||||
* [tnaflixnetworkbase] Improve title extraction
|
||||
* [tnaflix] Fix metadata extraction (#10249)
|
||||
* [fox] Fix theplatform release URL query
|
||||
* [openload] Fix extraction (#9706)
|
||||
* [bbc] Skip duplicate manifest URLs
|
||||
* [bbc] Improve format code
|
||||
+ [bbc] Add support for DASH and F4M
|
||||
* [bbc] Improve format sorting and listing
|
||||
* [bbc] Improve playlist extraction
|
||||
+ [pokemon] Add extractor (#10093)
|
||||
+ [condenast] Add fallback scenario for video info extraction
|
||||
|
||||
|
||||
version 2016.08.06
|
||||
|
||||
Core
|
||||
* Add support for JSON-LD root list entries (#10203)
|
||||
* Improve unified_timestamp
|
||||
* Lower preference of RTSP formats in generic sorting
|
||||
+ Add support for multiple properties in _og_search_property
|
||||
* Improve password hiding from verbose output
|
||||
|
||||
Extractors
|
||||
+ [adultswim] Add support for trailers (#10235)
|
||||
* [archiveorg] Improve extraction (#10219)
|
||||
+ [jwplatform] Add support for playlists
|
||||
+ [jwplatform] Add support for relative URLs
|
||||
* [jwplatform] Improve audio detection
|
||||
+ [tvplay] Capture and output native error message
|
||||
+ [tvplay] Extract series metadata
|
||||
+ [tvplay] Add support for subtitles (#10194)
|
||||
* [tvp] Improve extraction (#7799)
|
||||
* [cbslocal] Fix timestamp parsing (#10213)
|
||||
+ [naver] Add support for subtitles (#8096)
|
||||
* [naver] Improve extraction
|
||||
* [condenast] Improve extraction
|
||||
* [engadget] Relax URL regular expression
|
||||
* [5min] Fix extraction
|
||||
+ [nationalgeographic] Add support for Episode Guide
|
||||
+ [kaltura] Add support for subtitles
|
||||
* [kaltura] Optimize network requests
|
||||
+ [vodplatform] Add extractor for vod-platform.net
|
||||
- [gamekings] Remove extractor
|
||||
* [limelight] Extract HTTP formats
|
||||
* [ntvru] Fix extraction
|
||||
+ [comedycentral] Re-add :tds and :thedailyshow shortnames
|
||||
|
||||
|
||||
version 2016.08.01
|
||||
|
||||
Fixed/improved extractors
|
||||
- [yandexmusic:track] Adapt to changes in track location JSON (#10193)
|
||||
- [bloomberg] Support another form of player (#10187)
|
||||
- [limelight] Skip DRM protected videos
|
||||
- [safari] Relax regular expressions for URL matching (#10202)
|
||||
- [cwtv] Add support for cwtvpr.com (#10196)
|
||||
|
||||
|
||||
version 2016.07.30
|
||||
|
||||
Fixed/improved extractors
|
||||
- [twitch:clips] Sort formats
|
||||
- [tv2] Use m3u8_native
|
||||
- [tv2:article] Fix video detection (#10188)
|
||||
- rtve (#10076)
|
||||
- [dailymotion:playlist] Optimize download archive processing (#10180)
|
||||
|
||||
|
||||
version 2016.07.28
|
||||
|
||||
Fixed/improved extractors
|
||||
- shared (#10170)
|
||||
- soundcloud (#10179)
|
||||
- twitch (#9767)
|
||||
|
||||
|
||||
version 2016.07.26.2
|
||||
|
||||
Fixed/improved extractors
|
||||
- smotri
|
||||
- camdemy
|
||||
- mtv
|
||||
- comedycentral
|
||||
- cmt
|
||||
- cbc
|
||||
- mgtv
|
||||
- orf
|
||||
|
||||
|
||||
version 2016.07.24
|
||||
|
||||
New extractors
|
||||
- arkena (#8682)
|
||||
- lcp (#8682)
|
||||
|
||||
Fixed/improved extractors
|
||||
- facebook (#10151)
|
||||
- dailymail
|
||||
- telegraaf
|
||||
- dcn
|
||||
- onet
|
||||
- tvp
|
||||
|
||||
Miscellaneous
|
||||
- Support $Time$ in DASH manifests
|
||||
|
||||
|
||||
version 2016.07.22
|
||||
|
||||
New extractors
|
||||
- odatv (#9285)
|
||||
|
||||
Fixed/improved extractors
|
||||
- bbc
|
||||
- youjizz (#10131)
|
||||
- youtube (#10140)
|
||||
- pornhub (#10138)
|
||||
- eporner (#10139)
|
||||
|
||||
|
||||
version 2016.07.17
|
||||
|
||||
New extractors
|
||||
- nintendo (#9986)
|
||||
- streamable (#9122)
|
||||
|
||||
Fixed/improved extractors
|
||||
- ard (#10095)
|
||||
- mtv
|
||||
- comedycentral (#10101)
|
||||
- viki (#10098)
|
||||
- spike (#10106)
|
||||
|
||||
Miscellaneous
|
||||
- Improved twitter player detection (#10090)
|
||||
|
||||
|
||||
version 2016.07.16
|
||||
|
||||
New extractors
|
||||
- ninenow (#5181)
|
||||
|
||||
Fixed/improved extractors
|
||||
- rtve (#10076)
|
||||
- brightcove
|
||||
- 3qsdn
|
||||
- syfy (#9087, #3820, #2388)
|
||||
- youtube (#10083)
|
||||
|
||||
Miscellaneous
|
||||
- Fix subtitle embedding for video-only and audio-only files (#10081)
|
||||
|
||||
|
||||
version 2016.07.13
|
||||
|
||||
New extractors
|
||||
- rudo
|
||||
|
||||
Fixed/improved extractors
|
||||
- biobiochiletv
|
||||
- tvplay
|
||||
- dbtv
|
||||
- brightcove
|
||||
- tmz
|
||||
- youtube (#10059)
|
||||
- shahid (#10062)
|
||||
- vk
|
||||
- ellentv (#10067)
|
||||
|
||||
|
||||
version 2016.07.11
|
||||
|
||||
New Extractors
|
||||
- roosterteeth (#9864)
|
||||
|
||||
Fixed/improved extractors
|
||||
- miomio (#9605)
|
||||
- vuclip
|
||||
- youtube
|
||||
- vidzi (#10058)
|
||||
|
||||
|
||||
version 2016.07.09.2
|
||||
|
||||
Fixed/improved extractors
|
||||
- vimeo (#1638)
|
||||
- facebook (#10048)
|
||||
- lynda (#10047)
|
||||
- animeondemand
|
||||
|
||||
Fixed/improved features
|
||||
- Embedding subtitles no longer throws an error with problematic inputs (#9063)
|
||||
|
||||
|
||||
version 2016.07.09.1
|
||||
|
||||
Fixed/improved extractors
|
||||
- youtube
|
||||
- ard
|
||||
- srmediatek (#9373)
|
||||
|
||||
|
||||
version 2016.07.09
|
||||
|
||||
New extractors
|
||||
- Flipagram (#9898)
|
||||
|
||||
Fixed/improved extractors
|
||||
- telecinco
|
||||
- toutv
|
||||
- radiocanada
|
||||
- tweakers (#9516)
|
||||
- lynda
|
||||
- nick (#7542)
|
||||
- polskieradio (#10028)
|
||||
- le
|
||||
- facebook (#9851)
|
||||
- mgtv
|
||||
- animeondemand (#10031)
|
||||
|
||||
Fixed/improved features
|
||||
- `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs
|
||||
on non-Windows systems
|
||||
|
||||
|
||||
version 2016.07.07
|
||||
|
||||
New extractors
|
||||
- kamcord (#10001)
|
||||
|
||||
Fixed/improved extractors
|
||||
- spiegel (#10018)
|
||||
- metacafe (#8539, #3253)
|
||||
- onet (#9950)
|
||||
- francetv (#9955)
|
||||
- brightcove (#9965)
|
||||
- daum (#9972)
|
||||
|
||||
|
||||
version 2016.07.06
|
||||
|
||||
Fixed/improved extractors
|
||||
- youtube (#10007, #10009)
|
||||
- xuite
|
||||
- stitcher
|
||||
- spiegel
|
||||
- slideshare
|
||||
- sandia
|
||||
- rtvnh
|
||||
- prosiebensat1
|
||||
- onionstudios
|
||||
|
||||
|
||||
version 2016.07.05
|
||||
|
||||
Fixed/improved extractors
|
||||
- brightcove
|
||||
- yahoo (#9995)
|
||||
- pornhub (#9997)
|
||||
- iqiyi
|
||||
- kaltura (#5557)
|
||||
- la7
|
||||
- Changed features
|
||||
- Rename --cn-verfication-proxy to --geo-verification-proxy
|
||||
Miscellaneous
|
||||
- Add script for displaying downloads statistics
|
||||
|
||||
|
||||
version 2016.07.03.1
|
||||
|
||||
Fixed/improved extractors
|
||||
- theplatform
|
||||
- aenetworks
|
||||
- nationalgeographic
|
||||
- hrti (#9482)
|
||||
- facebook (#5701)
|
||||
- buzzfeed (#5701)
|
||||
- rai (#8617, #9157, #9232, #8552, #8551)
|
||||
- nationalgeographic (#9991)
|
||||
- iqiyi
|
||||
|
||||
|
||||
version 2016.07.03
|
||||
|
||||
New extractors
|
||||
- hrti (#9482)
|
||||
|
||||
Fixed/improved extractors
|
||||
- vk (#9981)
|
||||
- facebook (#9938)
|
||||
- xtube (#9953, #9961)
|
||||
|
||||
|
||||
version 2016.07.02
|
||||
|
||||
New extractors
|
||||
- fusion (#9958)
|
||||
|
||||
Fixed/improved extractors
|
||||
- twitch (#9975)
|
||||
- vine (#9970)
|
||||
- periscope (#9967)
|
||||
- pornhub (#8696)
|
||||
|
||||
|
||||
version 2016.07.01
|
||||
|
||||
New extractors
|
||||
- 9c9media
|
||||
- ctvnews (#2156)
|
||||
- ctv (#4077)
|
||||
|
||||
Fixed/Improved extractors
|
||||
- rds
|
||||
- meta (#8789)
|
||||
- pornhub (#9964)
|
||||
- sixplay (#2183)
|
||||
|
||||
New features
|
||||
- Accept quoted strings across multiple lines (#9940)
|
4
Makefile
4
Makefile
@@ -94,7 +94,7 @@ _EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'la
|
||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
|
||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
@@ -107,7 +107,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude 'docs/_build' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl docs \
|
||||
LICENSE README.md README.txt \
|
||||
ChangeLog LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
||||
youtube-dl.zsh youtube-dl.fish setup.py \
|
||||
youtube-dl
|
||||
|
88
README.md
88
README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
|
||||
sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
If you do not have curl, you can alternatively use a recent wget:
|
||||
@@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||
--no-color Do not emit color codes in output
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
@@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
--fragment-retries RETRIES Number of retries for a fragment (default
|
||||
is 10), or "infinite" (DASH only)
|
||||
is 10), or "infinite" (DASH and hlsnative
|
||||
only)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
||||
hlsnative only)
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
@@ -201,32 +206,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-a, --batch-file FILE File containing URLs to download ('-' for
|
||||
stdin)
|
||||
--id Use only video ID in file name
|
||||
-o, --output TEMPLATE Output filename template. Use %(title)s to
|
||||
get the title, %(uploader)s for the
|
||||
uploader name, %(uploader_id)s for the
|
||||
uploader nickname if different,
|
||||
%(autonumber)s to get an automatically
|
||||
incremented number, %(ext)s for the
|
||||
filename extension, %(format)s for the
|
||||
format description (like "22 - 1280x720" or
|
||||
"HD"), %(format_id)s for the unique id of
|
||||
the format (like YouTube's itags: "137"),
|
||||
%(upload_date)s for the upload date
|
||||
(YYYYMMDD), %(extractor)s for the provider
|
||||
(youtube, metacafe, etc), %(id)s for the
|
||||
video id, %(playlist_title)s,
|
||||
%(playlist_id)s, or %(playlist)s (=title if
|
||||
present, ID otherwise) for the playlist the
|
||||
video is in, %(playlist_index)s for the
|
||||
position in the playlist. %(height)s and
|
||||
%(width)s for the width and height of the
|
||||
video format. %(resolution)s for a textual
|
||||
description of the resolution of the video
|
||||
format. %% for a literal percent. Use - to
|
||||
output to stdout. Can also be used to
|
||||
download to a different directory, for
|
||||
example with -o '/my/downloads/%(uploader)s
|
||||
/%(title)s-%(id)s.%(ext)s' .
|
||||
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
||||
TEMPLATE" for all the info
|
||||
--autonumber-size NUMBER Specify the number of digits in
|
||||
%(autonumber)s when it is present in output
|
||||
filename template or --auto-number option
|
||||
@@ -330,7 +311,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||
download.
|
||||
download when used alone or a lower bound
|
||||
of a range for randomized sleep before each
|
||||
download (minimum possible number of
|
||||
seconds to sleep) when used along with
|
||||
--max-sleep-interval.
|
||||
--max-sleep-interval SECONDS Upper bound of a range for randomized sleep
|
||||
before each download (maximum possible
|
||||
number of seconds to sleep). Must only be
|
||||
used along with --min-sleep-interval.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT Video format code, see the "FORMAT
|
||||
@@ -428,9 +417,18 @@ You can configure youtube-dl by placing any supported command line option to a c
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
# Lines starting with # are comments
|
||||
|
||||
# Always extract audio
|
||||
-x
|
||||
|
||||
# Do not copy the mtime
|
||||
--no-mtime
|
||||
|
||||
# Use this proxy
|
||||
--proxy 127.0.0.1:3128
|
||||
|
||||
# Save all videos under Movies directory in your home directory
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
```
|
||||
|
||||
@@ -660,7 +658,11 @@ $ youtube-dl -f 'best[filesize<50M]'
|
||||
|
||||
# Download best format available via direct link over HTTP/HTTPS protocol
|
||||
$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
|
||||
|
||||
# Download the best video format and the best audio format without merging them
|
||||
$ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s'
|
||||
```
|
||||
Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name.
|
||||
|
||||
|
||||
# VIDEO SELECTION
|
||||
@@ -741,7 +743,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
|
||||
|
||||
### I have downloaded a video but how can I play it?
|
||||
|
||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
|
||||
|
||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
||||
|
||||
@@ -823,10 +825,32 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
||||
|
||||
### How do I pass cookies to youtube-dl?
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||
|
||||
### How do I stream directly to media player?
|
||||
|
||||
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
|
||||
|
||||
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
|
||||
|
||||
### How do I download only new videos from a playlist?
|
||||
|
||||
Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file.
|
||||
|
||||
For example, at first,
|
||||
|
||||
youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
|
||||
|
||||
will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any:
|
||||
|
||||
youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
|
||||
|
||||
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||
|
||||
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||
@@ -1195,7 +1219,7 @@ Make sure that someone has not already opened the issue you're trying to open. S
|
||||
|
||||
### Why are existing options not enough?
|
||||
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
|
||||
### Is there enough context in your bug report?
|
||||
|
||||
|
@@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read()
|
||||
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
|
||||
template = tmplf.read()
|
||||
|
||||
md5sum = hashlib.md5(data).hexdigest()
|
||||
sha1sum = hashlib.sha1(data).hexdigest()
|
||||
sha256sum = hashlib.sha256(data).hexdigest()
|
||||
template = template.replace('@PROGRAM_VERSION@', version)
|
||||
template = template.replace('@PROGRAM_URL@', URL)
|
||||
template = template.replace('@PROGRAM_MD5SUM@', md5sum)
|
||||
template = template.replace('@PROGRAM_SHA1SUM@', sha1sum)
|
||||
template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
|
||||
template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
|
||||
template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])
|
||||
|
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
mkdir -p tmp && cd tmp
|
||||
wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
|
||||
tar zxvf srelay-0.4.8b6.tar.gz
|
||||
cd srelay-0.4.8b6
|
||||
./configure
|
||||
make
|
@@ -54,17 +54,21 @@ def filter_options(readme):
|
||||
|
||||
if in_options:
|
||||
if line.lstrip().startswith('-'):
|
||||
option, description = re.split(r'\s{2,}', line.lstrip())
|
||||
split_option = option.split(' ')
|
||||
split = re.split(r'\s{2,}', line.lstrip())
|
||||
# Description string may start with `-` as well. If there is
|
||||
# only one piece then it's a description bit not an option.
|
||||
if len(split) > 1:
|
||||
option, description = split
|
||||
split_option = option.split(' ')
|
||||
|
||||
if not split_option[-1].startswith('-'): # metavar
|
||||
option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
|
||||
if not split_option[-1].startswith('-'): # metavar
|
||||
option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
|
||||
|
||||
# Pandoc's definition_lists. See http://pandoc.org/README.html
|
||||
# for more information.
|
||||
ret += '\n%s\n: %s\n' % (option, description)
|
||||
else:
|
||||
ret += line.lstrip() + '\n'
|
||||
# Pandoc's definition_lists. See http://pandoc.org/README.html
|
||||
# for more information.
|
||||
ret += '\n%s\n: %s\n' % (option, description)
|
||||
continue
|
||||
ret += line.lstrip() + '\n'
|
||||
else:
|
||||
ret += line + '\n'
|
||||
|
||||
|
@@ -71,9 +71,12 @@ fi
|
||||
/bin/echo -e "\n### Changing version in version.py..."
|
||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||
|
||||
/bin/echo -e "\n### Changing version in ChangeLog..."
|
||||
sed -i "s/<unreleased>/$version/" ChangeLog
|
||||
|
||||
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
|
||||
make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
|
||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py
|
||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog
|
||||
git commit $gpg_sign_commits -m "release $version"
|
||||
|
||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -21,21 +22,26 @@ def format_size(bytes):
|
||||
|
||||
total_bytes = 0
|
||||
|
||||
releases = json.loads(compat_urllib_request.urlopen(
|
||||
'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
|
||||
for page in itertools.count(1):
|
||||
releases = json.loads(compat_urllib_request.urlopen(
|
||||
'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
|
||||
).read().decode('utf-8'))
|
||||
|
||||
for release in releases:
|
||||
compat_print(release['name'])
|
||||
for asset in release['assets']:
|
||||
asset_name = asset['name']
|
||||
total_bytes += asset['download_count'] * asset['size']
|
||||
if all(not re.match(p, asset_name) for p in (
|
||||
r'^youtube-dl$',
|
||||
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
|
||||
r'^youtube-dl\.exe$')):
|
||||
continue
|
||||
compat_print(
|
||||
' %s size: %s downloads: %d'
|
||||
% (asset_name, format_size(asset['size']), asset['download_count']))
|
||||
if not releases:
|
||||
break
|
||||
|
||||
for release in releases:
|
||||
compat_print(release['name'])
|
||||
for asset in release['assets']:
|
||||
asset_name = asset['name']
|
||||
total_bytes += asset['download_count'] * asset['size']
|
||||
if all(not re.match(p, asset_name) for p in (
|
||||
r'^youtube-dl$',
|
||||
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
|
||||
r'^youtube-dl\.exe$')):
|
||||
continue
|
||||
compat_print(
|
||||
' %s size: %s downloads: %d'
|
||||
% (asset_name, format_size(asset['size']), asset['download_count']))
|
||||
|
||||
compat_print('total downloads traffic: %s' % format_size(total_bytes))
|
||||
|
@@ -13,8 +13,12 @@
|
||||
- **5min**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
- **9c9media:stack**
|
||||
- **9gag**
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
- **abc.net.au:iview**
|
||||
- **Abc7News**
|
||||
- **abcnews**
|
||||
- **abcnews:video**
|
||||
@@ -34,6 +38,7 @@
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AMCNetworks**
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
@@ -45,6 +50,7 @@
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
@@ -63,6 +69,10 @@
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **auroravid**: AuroraVid
|
||||
- **AWAAN**
|
||||
- **awaan:live**
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
@@ -107,8 +117,11 @@
|
||||
- **Canvas**
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CBC**
|
||||
- **CBCPlayer**
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:watch**
|
||||
- **cbc.ca:watch:video**
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
@@ -118,6 +131,7 @@
|
||||
- **CDA**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
- **CharlieRose**
|
||||
- **Chaturbate**
|
||||
- **Chilloutzone**
|
||||
- **chirbit**
|
||||
@@ -140,7 +154,8 @@
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
@@ -156,6 +171,8 @@
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**
|
||||
- **curiositystream:collection**
|
||||
- **CWTV**
|
||||
- **DailyMail**
|
||||
- **dailymotion**
|
||||
@@ -167,10 +184,6 @@
|
||||
- **daum.net:playlist**
|
||||
- **daum.net:user**
|
||||
- **DBTV**
|
||||
- **DCN**
|
||||
- **dcn:live**
|
||||
- **dcn:season**
|
||||
- **dcn:video**
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
@@ -179,6 +192,7 @@
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **DiscoveryGo**
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
@@ -211,29 +225,31 @@
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **FacebookPluginsVideo**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **fc2:embed**
|
||||
- **Fczenit**
|
||||
- **features.aol.com**
|
||||
- **fernsehkritik.tv**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Flipagram**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **Foxgay**
|
||||
- **FoxNews**: Fox News and Fox Business Video
|
||||
- **foxnews:insider**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
- **FranceCultureEmission**
|
||||
- **FranceInter**
|
||||
- **francetv**: France 2, 3, 4, 5 and Ô
|
||||
- **francetvinfo.fr**
|
||||
@@ -243,8 +259,8 @@
|
||||
- **Funimation**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **FXNetworks**
|
||||
- **GameInformer**
|
||||
- **Gamekings**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
- **Gamersyde**
|
||||
@@ -259,9 +275,9 @@
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **Go**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GoogleDrive**
|
||||
- **Goshgay**
|
||||
@@ -274,6 +290,8 @@
|
||||
- **HellPorno**
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HGTV**
|
||||
- **hgtv.com:show**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
@@ -334,6 +352,8 @@
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **Laola1Tv**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
@@ -391,11 +411,12 @@
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **MovingImage**
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **mtg**: MTG services
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtviggy.com**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
@@ -411,7 +432,8 @@
|
||||
- **MyVidster**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:channel**
|
||||
- **natgeo:episodeguide**
|
||||
- **natgeo:video**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
@@ -435,9 +457,9 @@
|
||||
- **Newstube**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **nextmovie.com**
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **NhkVod**
|
||||
- **nhl.com**
|
||||
- **nhl.com:news**: NHL news
|
||||
- **nhl.com:videocenter**
|
||||
@@ -446,7 +468,7 @@
|
||||
- **nick.de**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NineCNineMedia**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **Noco**
|
||||
@@ -474,6 +496,7 @@
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
@@ -502,7 +525,6 @@
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
@@ -512,7 +534,9 @@
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
- **PornCom**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPlaylist**
|
||||
@@ -553,8 +577,10 @@
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **Rozhlas**
|
||||
- **RTBF**
|
||||
- **rte**: Raidió Teilifís Éireann TV
|
||||
- **rte:radio**: Raidió Teilifís Éireann radio
|
||||
@@ -565,7 +591,9 @@
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **RulePorn**
|
||||
- **rutube**: Rutube videos
|
||||
@@ -610,6 +638,7 @@
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **SonyLIV**
|
||||
- **soundcloud**
|
||||
- **soundcloud:playlist**
|
||||
- **soundcloud:search**: Soundcloud search
|
||||
@@ -636,10 +665,10 @@
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **Streamable**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
@@ -651,8 +680,8 @@
|
||||
- **SztvHu**
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tapely**
|
||||
- **Tass**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
@@ -678,8 +707,6 @@
|
||||
- **TheStar**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
- **THVideoPlaylist**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
@@ -687,13 +714,13 @@
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
- **toggle**
|
||||
- **Tosh**: Tosh.0
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **trollvids**
|
||||
- **TruTube**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
- **tudou**
|
||||
@@ -716,8 +743,8 @@
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
@@ -733,8 +760,12 @@
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Unistra**
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USANetwork**
|
||||
- **USAToday**
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
@@ -750,7 +781,9 @@
|
||||
- **VevoPlaylist**
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **Viafree**
|
||||
- **Vice**
|
||||
- **Viceland**
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
@@ -792,8 +825,10 @@
|
||||
- **vine:user**
|
||||
- **vk**: VK
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **Vodlocker**
|
||||
- **VODPlatform**
|
||||
- **VoiceRepublic**
|
||||
- **VoxMedia**
|
||||
- **Vporn**
|
||||
@@ -868,6 +903,4 @@
|
||||
- **Zapiks**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3:album**: mp3.zing.vn albums
|
||||
- **zingmp3:song**: mp3.zing.vn songs
|
||||
- **ZippCast**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
|
@@ -48,6 +48,9 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||
|
||||
def test_html_search_meta(self):
|
||||
ie = self.ie
|
||||
|
@@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
def test_audio_only_extractor_format_selection(self):
|
||||
# For extractors with incomplete formats (all formats are audio-only or
|
||||
# video-only) best and worst should fallback to corresponding best/worst
|
||||
# video-only or audio-only formats (as per
|
||||
# https://github.com/rg3/youtube-dl/pull/5556)
|
||||
formats = [
|
||||
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'best'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'high')
|
||||
|
||||
ydl = YDL({'format': 'worst'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'low')
|
||||
|
||||
def test_format_not_available(self):
|
||||
formats = [
|
||||
{'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
|
||||
{'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
# This must fail since complete video-audio format does not match filter
|
||||
# and extractor does not provide incomplete only formats (i.e. only
|
||||
# video-only or audio-only).
|
||||
ydl = YDL({'format': 'best[height>360]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_invalid_format_specs(self):
|
||||
def assert_syntax_error(format_spec):
|
||||
ydl = YDL({'format': format_spec})
|
||||
|
@@ -101,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ythistory', ['youtube:history'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||
|
||||
def test_vimeo_matching(self):
|
||||
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
|
@@ -88,6 +88,7 @@ class TestCompat(unittest.TestCase):
|
||||
def test_compat_shlex_split(self):
|
||||
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
|
||||
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
|
||||
|
||||
def test_compat_etree_fromstring(self):
|
||||
xml = '''
|
||||
|
@@ -39,9 +39,11 @@ from youtube_dl.utils import (
|
||||
is_html,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
mimetype2ext,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
parse_count,
|
||||
@@ -81,6 +83,7 @@ from youtube_dl.utils import (
|
||||
cli_option,
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
parse_codecs,
|
||||
)
|
||||
from youtube_dl.compat import (
|
||||
compat_chr,
|
||||
@@ -307,6 +310,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
|
||||
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -430,6 +434,20 @@ class TestUtil(unittest.TestCase):
|
||||
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||
'trailer.mp4')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
self.assertEqual(parse_age_limit(False), None)
|
||||
self.assertEqual(parse_age_limit('invalid'), None)
|
||||
self.assertEqual(parse_age_limit(0), 0)
|
||||
self.assertEqual(parse_age_limit(18), 18)
|
||||
self.assertEqual(parse_age_limit(21), 21)
|
||||
self.assertEqual(parse_age_limit(22), None)
|
||||
self.assertEqual(parse_age_limit('18'), 18)
|
||||
self.assertEqual(parse_age_limit('18+'), 18)
|
||||
self.assertEqual(parse_age_limit('PG-13'), 13)
|
||||
self.assertEqual(parse_age_limit('TV-14'), 14)
|
||||
self.assertEqual(parse_age_limit('TV-MA'), 17)
|
||||
|
||||
def test_parse_duration(self):
|
||||
self.assertEqual(parse_duration(None), None)
|
||||
self.assertEqual(parse_duration(False), None)
|
||||
@@ -608,6 +626,37 @@ class TestUtil(unittest.TestCase):
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_mimetype2ext(self):
|
||||
self.assertEqual(mimetype2ext(None), None)
|
||||
self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
|
||||
self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
|
||||
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||
|
||||
def test_parse_codecs(self):
|
||||
self.assertEqual(parse_codecs(''), {})
|
||||
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||
'vcodec': 'avc1.77.30',
|
||||
'acodec': 'mp4a.40.2',
|
||||
})
|
||||
self.assertEqual(parse_codecs('mp4a.40.2'), {
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp4a.40.2',
|
||||
})
|
||||
self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
|
||||
'vcodec': 'avc1.42001e',
|
||||
'acodec': 'mp4a.40.5',
|
||||
})
|
||||
self.assertEqual(parse_codecs('avc3.640028'), {
|
||||
'vcodec': 'avc3.640028',
|
||||
'acodec': 'none',
|
||||
})
|
||||
self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||
@@ -672,6 +721,9 @@ class TestUtil(unittest.TestCase):
|
||||
inp = '''{"foo":101}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"foo":101}''')
|
||||
|
||||
inp = '''{"duration": "00:01:07"}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
@@ -777,7 +829,10 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_filesize('2 MiB'), 2097152)
|
||||
self.assertEqual(parse_filesize('5 GB'), 5000000000)
|
||||
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
||||
self.assertEqual(parse_filesize('1.2tb'), 1200000000000)
|
||||
self.assertEqual(parse_filesize('1,24 KB'), 1240)
|
||||
self.assertEqual(parse_filesize('1,24 kb'), 1240)
|
||||
self.assertEqual(parse_filesize('8.5 megabytes'), 8500000)
|
||||
|
||||
def test_parse_count(self):
|
||||
self.assertEqual(parse_count(None), None)
|
||||
@@ -928,6 +983,7 @@ The first line
|
||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||
self.assertEqual(cli_option({}, '--proxy', 'proxy'), [])
|
||||
self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10'])
|
||||
|
||||
def test_cli_valueless_option(self):
|
||||
self.assertEqual(cli_valueless_option(
|
||||
|
70
test/test_verbose_output.py
Normal file
70
test/test_verbose_output.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import unittest
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
class TestVerboseOutput(unittest.TestCase):
|
||||
def test_private_info_arg(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||
'--username', 'johnsmith@gmail.com',
|
||||
'--password', 'secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'--username' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'--password' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_shortarg(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||
'-u', 'johnsmith@gmail.com',
|
||||
'-p', 'secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'-u' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_eq(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||
'--username=johnsmith@gmail.com',
|
||||
'--password=secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'--username' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'--password' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
def test_private_info_shortarg_eq(self):
|
||||
outp = subprocess.Popen(
|
||||
[
|
||||
sys.executable, 'youtube_dl/__main__.py', '-v',
|
||||
'-u=johnsmith@gmail.com',
|
||||
'-p=secret',
|
||||
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
sout, serr = outp.communicate()
|
||||
self.assertTrue(b'-u' in serr)
|
||||
self.assertTrue(b'johnsmith' not in serr)
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import collections
|
||||
import contextlib
|
||||
import copy
|
||||
import datetime
|
||||
import errno
|
||||
import fileinput
|
||||
@@ -248,7 +249,16 @@ class YoutubeDL(object):
|
||||
source_address: (Experimental) Client-side IP address to bind to.
|
||||
call_home: Boolean, true iff we are allowed to contact the
|
||||
youtube-dl servers for debugging.
|
||||
sleep_interval: Number of seconds to sleep before each download.
|
||||
sleep_interval: Number of seconds to sleep before each download when
|
||||
used alone or a lower bound of a range for randomized
|
||||
sleep before each download (minimum possible number
|
||||
of seconds to sleep) when used along with
|
||||
max_sleep_interval.
|
||||
max_sleep_interval:Upper bound of a range for randomized sleep before each
|
||||
download (maximum possible number of seconds to sleep).
|
||||
Must only be used along with sleep_interval.
|
||||
Actual sleep time will be a random float from range
|
||||
[sleep_interval; max_sleep_interval].
|
||||
listformats: Print an overview of available video formats and exit.
|
||||
list_thumbnails: Print a table of all thumbnails and exit.
|
||||
match_filter: A function that gets called with the info_dict of
|
||||
@@ -1051,9 +1061,9 @@ class YoutubeDL(object):
|
||||
if isinstance(selector, list):
|
||||
fs = [_build_selector_function(s) for s in selector]
|
||||
|
||||
def selector_function(formats):
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
for format in f(formats):
|
||||
for format in f(ctx):
|
||||
yield format
|
||||
return selector_function
|
||||
elif selector.type == GROUP:
|
||||
@@ -1061,17 +1071,17 @@ class YoutubeDL(object):
|
||||
elif selector.type == PICKFIRST:
|
||||
fs = [_build_selector_function(s) for s in selector.selector]
|
||||
|
||||
def selector_function(formats):
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
picked_formats = list(f(formats))
|
||||
picked_formats = list(f(ctx))
|
||||
if picked_formats:
|
||||
return picked_formats
|
||||
return []
|
||||
elif selector.type == SINGLE:
|
||||
format_spec = selector.selector
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
if format_spec == 'all':
|
||||
@@ -1084,9 +1094,10 @@ class YoutubeDL(object):
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
yield audiovideo_formats[format_idx]
|
||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||
elif (all(f.get('acodec') != 'none' for f in formats) or
|
||||
all(f.get('vcodec') != 'none' for f in formats)):
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) we will fallback to best/worst
|
||||
# {video,audio}-only format
|
||||
elif ctx['incomplete_formats']:
|
||||
yield formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
@@ -1160,17 +1171,18 @@ class YoutubeDL(object):
|
||||
}
|
||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
for pair in itertools.product(video_selector(formats), audio_selector(formats)):
|
||||
def selector_function(ctx):
|
||||
for pair in itertools.product(
|
||||
video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
|
||||
yield _merge(pair)
|
||||
|
||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||
|
||||
def final_selector(formats):
|
||||
def final_selector(ctx):
|
||||
ctx_copy = copy.deepcopy(ctx)
|
||||
for _filter in filters:
|
||||
formats = list(filter(_filter, formats))
|
||||
return selector_function(formats)
|
||||
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
||||
return selector_function(ctx_copy)
|
||||
return final_selector
|
||||
|
||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||
@@ -1244,8 +1256,10 @@ class YoutubeDL(object):
|
||||
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
||||
if thumbnails:
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
t.get('id'), t.get('url')))
|
||||
t.get('preference') if t.get('preference') is not None else -1,
|
||||
t.get('width') if t.get('width') is not None else -1,
|
||||
t.get('height') if t.get('height') is not None else -1,
|
||||
t.get('id') if t.get('id') is not None else '', t.get('url')))
|
||||
for i, t in enumerate(thumbnails):
|
||||
t['url'] = sanitize_url(t['url'])
|
||||
if t.get('width') and t.get('height'):
|
||||
@@ -1287,7 +1301,7 @@ class YoutubeDL(object):
|
||||
for subtitle_format in subtitle:
|
||||
if subtitle_format.get('url'):
|
||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||
if 'ext' not in subtitle_format:
|
||||
if subtitle_format.get('ext') is None:
|
||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||
|
||||
if self.params.get('listsubtitles', False):
|
||||
@@ -1342,7 +1356,7 @@ class YoutubeDL(object):
|
||||
note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||
)
|
||||
# Automatically determine file extension if missing
|
||||
if 'ext' not in format:
|
||||
if format.get('ext') is None:
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
# Automatically determine protocol if missing (useful for format
|
||||
# selection purposes)
|
||||
@@ -1377,7 +1391,34 @@ class YoutubeDL(object):
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
formats_to_download = list(format_selector(formats))
|
||||
|
||||
# While in format selection we may need to have an access to the original
|
||||
# format set in order to calculate some metrics or do some processing.
|
||||
# For now we need to be able to guess whether original formats provided
|
||||
# by extractor are incomplete or not (i.e. whether extractor provides only
|
||||
# video-only or audio-only formats) for proper formats selection for
|
||||
# extractors with such incomplete formats (see
|
||||
# https://github.com/rg3/youtube-dl/pull/5556).
|
||||
# Since formats may be filtered during format selection and may not match
|
||||
# the original formats the results may be incorrect. Thus original formats
|
||||
# or pre-calculated metrics should be passed to format selection routines
|
||||
# as well.
|
||||
# We will pass a context object containing all necessary additional data
|
||||
# instead of just formats.
|
||||
# This fixes incorrect format selection issue (see
|
||||
# https://github.com/rg3/youtube-dl/issues/10083).
|
||||
incomplete_formats = (
|
||||
# All formats are video-only or
|
||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
|
||||
# all formats are audio-only
|
||||
all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
|
||||
|
||||
ctx = {
|
||||
'formats': formats,
|
||||
'incomplete_formats': incomplete_formats,
|
||||
}
|
||||
|
||||
formats_to_download = list(format_selector(ctx))
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
@@ -1564,7 +1605,9 @@ class YoutubeDL(object):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
|
||||
else:
|
||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
# Use newline='' to prevent conversion of newline characters
|
||||
# See https://github.com/rg3/youtube-dl/issues/10268
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||
subfile.write(sub_data)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||
|
@@ -145,6 +145,16 @@ def _real_main(argv=None):
|
||||
if numeric_limit is None:
|
||||
parser.error('invalid max_filesize specified')
|
||||
opts.max_filesize = numeric_limit
|
||||
if opts.sleep_interval is not None:
|
||||
if opts.sleep_interval < 0:
|
||||
parser.error('sleep interval must be positive or 0')
|
||||
if opts.max_sleep_interval is not None:
|
||||
if opts.max_sleep_interval < 0:
|
||||
parser.error('max sleep interval must be positive or 0')
|
||||
if opts.max_sleep_interval < opts.sleep_interval:
|
||||
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
||||
else:
|
||||
opts.max_sleep_interval = opts.sleep_interval
|
||||
|
||||
def parse_retries(retries):
|
||||
if retries in ('inf', 'infinite'):
|
||||
@@ -308,6 +318,7 @@ def _real_main(argv=None):
|
||||
'nooverwrites': opts.nooverwrites,
|
||||
'retries': opts.retries,
|
||||
'fragment_retries': opts.fragment_retries,
|
||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'continuedl': opts.continue_dl,
|
||||
@@ -370,6 +381,7 @@ def _real_main(argv=None):
|
||||
'source_address': opts.source_address,
|
||||
'call_home': opts.call_home,
|
||||
'sleep_interval': opts.sleep_interval,
|
||||
'max_sleep_interval': opts.max_sleep_interval,
|
||||
'external_downloader': opts.external_downloader,
|
||||
'list_thumbnails': opts.list_thumbnails,
|
||||
'playlist_items': opts.playlist_items,
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
@@ -2594,15 +2595,19 @@ except ImportError: # Python < 3.3
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
if sys.version_info >= (2, 7, 3):
|
||||
try:
|
||||
args = shlex.split('中文')
|
||||
assert (isinstance(args, list) and
|
||||
isinstance(args[0], compat_str) and
|
||||
args[0] == '中文')
|
||||
compat_shlex_split = shlex.split
|
||||
else:
|
||||
except (AssertionError, UnicodeEncodeError):
|
||||
# Working around shlex issue with unicode strings on some python 2
|
||||
# versions (see http://bugs.python.org/issue1548891)
|
||||
def compat_shlex_split(s, comments=False, posix=True):
|
||||
if isinstance(s, compat_str):
|
||||
s = s.encode('utf-8')
|
||||
return shlex.split(s, comments, posix)
|
||||
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
|
@@ -4,6 +4,7 @@ import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import random
|
||||
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
@@ -342,8 +343,11 @@ class FileDownloader(object):
|
||||
})
|
||||
return True
|
||||
|
||||
sleep_interval = self.params.get('sleep_interval')
|
||||
if sleep_interval:
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
print(min_sleep_interval, max_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
||||
time.sleep(sleep_interval)
|
||||
|
||||
|
@@ -38,8 +38,10 @@ class DashSegmentsFD(FragmentFD):
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
def append_url_to_file(target_url, tmp_filename, segment_name):
|
||||
def process_segment(segment, tmp_filename, fatal):
|
||||
target_url, segment_name = segment
|
||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
@@ -52,26 +54,35 @@ class DashSegmentsFD(FragmentFD):
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attemps
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# So, we will retry all fragments that fail with 404 HTTP error for now.
|
||||
if err.code != 404:
|
||||
raise
|
||||
# Retry fragment
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(segment_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(segment_name)
|
||||
return True
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
return True
|
||||
|
||||
if initialization_url:
|
||||
append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
|
||||
segments_to_download = [(initialization_url, 'Init')] if initialization_url else []
|
||||
segments_to_download.extend([
|
||||
(segment_url, 'Seg%d' % i)
|
||||
for i, segment_url in enumerate(segment_urls)])
|
||||
|
||||
for i, segment in enumerate(segments_to_download):
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
if not process_segment(segment, ctx['tmpfilename'], fatal):
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@@ -96,6 +96,12 @@ class CurlFD(ExternalFD):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||
cmd += self._valueless_option('--silent', 'noprogress')
|
||||
cmd += self._valueless_option('--verbose', 'verbose')
|
||||
cmd += self._option('--limit-rate', 'ratelimit')
|
||||
cmd += self._option('--retry', 'retries')
|
||||
cmd += self._option('--max-filesize', 'max_filesize')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._valueless_option('--insecure', 'nocheckcertificate')
|
||||
@@ -103,6 +109,16 @@ class CurlFD(ExternalFD):
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
|
||||
self._debug_cmd(cmd)
|
||||
|
||||
# curl writes the progress to stderr so don't capture it.
|
||||
p = subprocess.Popen(cmd)
|
||||
p.communicate()
|
||||
return p.returncode
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
AVAILABLE_OPT = '-V'
|
||||
@@ -204,6 +220,12 @@ class FFmpegFD(ExternalFD):
|
||||
if proxy:
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
proxy = 'http://%s' % proxy
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
self.report_warning(
|
||||
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||
|
||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||
# We could switch to the following code if we are able to detect version properly
|
||||
|
@@ -6,6 +6,7 @@ import time
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
)
|
||||
@@ -22,13 +23,19 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
Available options:
|
||||
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH only)
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
|
||||
and hlsnative only)
|
||||
skip_unavailable_fragments:
|
||||
Skip unavailable fragments (DASH and hlsnative only)
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, fragment_name, count, retries):
|
||||
def report_retry_fragment(self, err, fragment_name, count, retries):
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (fragment_name, count, self.format_retries(retries)))
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
|
||||
|
||||
def report_skip_fragment(self, fragment_name):
|
||||
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
|
@@ -13,6 +13,7 @@ from .fragment import FragmentFD
|
||||
from .external import FFmpegFD
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_error,
|
||||
compat_urlparse,
|
||||
compat_struct_pack,
|
||||
)
|
||||
@@ -20,6 +21,7 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
parse_m3u8_attributes,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -82,6 +84,14 @@ class HlsFD(FragmentFD):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@@ -94,13 +104,37 @@ class HlsFD(FragmentFD):
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
frag_name = 'Frag%d' % i
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
# See https://github.com/rg3/youtube-dl/issues/10165,
|
||||
# https://github.com/rg3/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
self.report_skip_fragment(frag_name)
|
||||
continue
|
||||
self.report_error(
|
||||
'giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
frag_content = AES.new(
|
||||
@@ -108,7 +142,7 @@ class HlsFD(FragmentFD):
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
frags_filenames.append(frag_sanitized)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
if test:
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
@@ -116,10 +150,12 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
if 'IV' in decrypt_info:
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
|
@@ -7,6 +7,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -93,3 +94,57 @@ class ABCIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
|
||||
class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
|
||||
'md5': '979d10b2939101f0d27a06b79edad536',
|
||||
'info_dict': {
|
||||
'id': 'FA1505V024S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 27 Ep 24',
|
||||
'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d',
|
||||
'upload_date': '20160820',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1471719600,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_params = self._parse_json(self._search_regex(
|
||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
||||
title = video_params['title']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
|
||||
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
src_vtt = stream.get('captions', {}).get('src-vtt')
|
||||
if src_vtt:
|
||||
subtitles['en'] = [{
|
||||
'url': src_vtt,
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
||||
'duration': int_or_none(video_params.get('eventDuration')),
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': video_params.get('seriesTitle'),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)),
|
||||
'episode': self._html_search_meta('episode_title', webpage),
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
137
youtube_dl/extractor/adobepass.py
Normal file
137
youtube_dl/extractor/adobepass.py
Normal file
@@ -0,0 +1,137 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class AdobePassIE(InfoExtractor):
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
channel = etree.Element('channel')
|
||||
channel_title = etree.SubElement(channel, 'title')
|
||||
channel_title.text = provider_id
|
||||
item = etree.SubElement(channel, 'item')
|
||||
resource_title = etree.SubElement(item, 'title')
|
||||
resource_title.text = title
|
||||
resource_guid = etree.SubElement(item, 'guid')
|
||||
resource_guid.text = guid
|
||||
resource_rating = etree.SubElement(item, 'media:rating')
|
||||
resource_rating.attrib = {'scheme': 'urn:v-chip'}
|
||||
resource_rating.text = rating
|
||||
return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
|
||||
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
|
||||
def xml_text(xml_str, tag):
|
||||
return self._search_regex(
|
||||
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
|
||||
|
||||
def is_expired(token, date_ele):
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
|
||||
return token_expires and token_expires <= int(time.time())
|
||||
|
||||
mvpd_headers = {
|
||||
'ap_42': 'anonymous',
|
||||
'ap_11': 'Linux i686',
|
||||
'ap_z': self._USER_AGENT,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
|
||||
guid = xml_text(resource, 'guid')
|
||||
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
|
||||
authn_token = None
|
||||
if not authn_token:
|
||||
# TODO add support for other TV Providers
|
||||
mso_id = 'DTV'
|
||||
username, password = self._get_netrc_login_info(mso_id)
|
||||
if not username or not password:
|
||||
return ''
|
||||
|
||||
def post_form(form_page, note, data={}):
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
return self._download_webpage(
|
||||
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
provider_redirect_page = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
provider_login_page = post_form(
|
||||
provider_redirect_page, 'Downloading Provider Login Page')
|
||||
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
post_form(mvpd_confirm_page, 'Confirming Login')
|
||||
|
||||
session = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in session:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
||||
requestor_info['authn_token'] = authn_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
authz_token = requestor_info.get(guid)
|
||||
if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
|
||||
authz_token = None
|
||||
if not authz_token:
|
||||
authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
|
||||
'Retrieving Authorization Token', data=urlencode_postdata({
|
||||
'resource_id': resource,
|
||||
'requestor_id': requestor_id,
|
||||
'authentication_token': authn_token,
|
||||
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
|
||||
'userMeta': '1',
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in authorize:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
||||
requestor_info[guid] = authz_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
mvpd_headers.update({
|
||||
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
|
||||
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
|
||||
})
|
||||
|
||||
short_authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
|
||||
video_id, 'Retrieving Media Token', data=urlencode_postdata({
|
||||
'authz_token': authz_token,
|
||||
'requestor_id': requestor_id,
|
||||
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
|
||||
'hashed_guid': 'false',
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in short_authorize:
|
||||
self._downloader.cache.store('mvpd', requestor_id, {})
|
||||
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
return short_authorize
|
@@ -3,16 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AdultSwimIE(InfoExtractor):
|
||||
class AdultSwimIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -83,6 +81,21 @@ class AdultSwimIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# heroMetadata.trailer
|
||||
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
|
||||
'info_dict': {
|
||||
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Decker - Inside Decker: A New Hero',
|
||||
'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
|
||||
'duration': 249.008,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -133,79 +146,56 @@ class AdultSwimIE(InfoExtractor):
|
||||
if video_info is None:
|
||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||
video_info = bootstrapped_data['slugged_video']
|
||||
else:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get(
|
||||
'heroMetadata', {}).get('trailer', {}).get('video')
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
|
||||
if not video_info:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
|
||||
show = bootstrapped_data['show']
|
||||
show_title = show['title']
|
||||
stream = video_info.get('stream')
|
||||
clips = [stream] if stream else video_info.get('clips')
|
||||
if not clips:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.'
|
||||
if video_info.get('auth') is True else 'Unable to find stream or clips',
|
||||
expected=True)
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in clips]
|
||||
if stream and stream.get('videoPlaybackID'):
|
||||
segment_ids = [stream['videoPlaybackID']]
|
||||
elif video_info.get('clips'):
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
elif video_info.get('videoPlaybackID'):
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
if video_info.get('auth') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream or clips')
|
||||
|
||||
episode_id = video_info['id']
|
||||
episode_title = video_info['title']
|
||||
episode_description = video_info['description']
|
||||
episode_duration = video_info.get('duration')
|
||||
episode_description = video_info.get('description')
|
||||
episode_duration = int_or_none(video_info.get('duration'))
|
||||
view_count = int_or_none(video_info.get('views'))
|
||||
|
||||
entries = []
|
||||
for part_num, segment_id in enumerate(segment_ids):
|
||||
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id
|
||||
|
||||
segement_info = self._extract_cvp_info(
|
||||
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
|
||||
segment_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
||||
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
})
|
||||
segment_title = '%s - %s' % (show_title, episode_title)
|
||||
if len(segment_ids) > 1:
|
||||
segment_title += ' Part %d' % (part_num + 1)
|
||||
|
||||
idoc = self._download_xml(
|
||||
segment_url, segment_title,
|
||||
'Downloading segment information', 'Unable to download segment information')
|
||||
|
||||
segment_duration = float_or_none(
|
||||
xpath_text(idoc, './/trt', 'segment duration').strip())
|
||||
|
||||
formats = []
|
||||
file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
|
||||
|
||||
unique_urls = []
|
||||
unique_file_els = []
|
||||
for file_el in file_els:
|
||||
media_url = file_el.text
|
||||
if not media_url or determine_ext(media_url) == 'f4m':
|
||||
continue
|
||||
if file_el.text not in unique_urls:
|
||||
unique_urls.append(file_el.text)
|
||||
unique_file_els.append(file_el)
|
||||
|
||||
for file_el in unique_file_els:
|
||||
bitrate = file_el.attrib.get('bitrate')
|
||||
ftype = file_el.attrib.get('type')
|
||||
media_url = file_el.text
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, segment_title, 'mp4', preference=0,
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (bitrate, ftype),
|
||||
'url': file_el.text.strip(),
|
||||
# The bitrate may not be a number (for example: 'iphone')
|
||||
'tbr': int(bitrate) if bitrate.isdigit() else None,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
segement_info.update({
|
||||
'id': segment_id,
|
||||
'title': segment_title,
|
||||
'formats': formats,
|
||||
'duration': segment_duration,
|
||||
'description': episode_description
|
||||
'description': episode_description,
|
||||
})
|
||||
entries.append(segement_info)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
@@ -214,5 +204,6 @@ class AdultSwimIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
'title': '%s - %s' % (show_title, episode_title),
|
||||
'description': episode_description,
|
||||
'duration': episode_duration
|
||||
'duration': episode_duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
@@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
|
91
youtube_dl/extractor/amcnetworks.py
Normal file
91
youtube_dl/extractor/amcnetworks.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 's3MX01Nl4vPH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maron - Season 4 - Step 1',
|
||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
||||
'age_limit': 17,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462468831,
|
||||
'uploader': 'AMCN',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ifc.com/movies/chaos',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = theplatform_metadata['ratings'][0]['rating']
|
||||
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
|
||||
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'age_limit': parse_age_limit(parse_age_limit(rating)),
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
series = theplatform_metadata.get(ns + '$show')
|
||||
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle')
|
||||
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
|
||||
if season_number:
|
||||
title = 'Season %d - %s' % (season_number, title)
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
})
|
||||
return info
|
@@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
@@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# Film wording is used instead of Episode
|
||||
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles
|
||||
# Episodes without titles, jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -110,35 +115,12 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||
@@ -161,7 +143,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
format_id_list.append(lang)
|
||||
if kind:
|
||||
format_id_list.append(kind)
|
||||
if not format_id_list:
|
||||
if not format_id_list and num is not None:
|
||||
format_id_list.append(compat_str(num))
|
||||
format_id = '-'.join(format_id_list)
|
||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||
@@ -215,28 +197,74 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
})
|
||||
formats.extend(file_formats)
|
||||
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def extract_entries(html, video_id, common_info, num=None):
|
||||
info = extract_info(html, video_id, num)
|
||||
|
||||
if info['formats']:
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
})
|
||||
f.update(info)
|
||||
entries.append(f)
|
||||
|
||||
# Extract teaser only when full episode is not available
|
||||
if not formats:
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
||||
episode_html)
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||
html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-teaser' % f['id'],
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
extract_entries(episode_html, video_id, common_info)
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
'id': anime_id,
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
extract_entries(html, video_id, common_info)
|
||||
|
||||
extract_episodes(webpage)
|
||||
|
||||
if not entries:
|
||||
extract_film(webpage, anime_id)
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||
|
@@ -123,6 +123,10 @@ class AolFeaturesIE(InfoExtractor):
|
||||
'title': 'What To Watch - February 17, 2016',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
'params': {
|
||||
# encrypted m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -15,7 +13,7 @@ class AparatIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
|
||||
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||
'info_dict': {
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
@@ -31,13 +29,13 @@ class AparatIE(InfoExtractor):
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
|
||||
video_id + '/vt/frame')
|
||||
embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
|
||||
r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
|
||||
for i, video_url in enumerate(video_urls):
|
||||
file_list = self._parse_json(self._search_regex(
|
||||
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
|
||||
for i, item in enumerate(file_list[0]):
|
||||
video_url = item['file']
|
||||
req = HEADRequest(video_url)
|
||||
res = self._request_webpage(
|
||||
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
||||
|
@@ -1,67 +1,65 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
class ArchiveOrgIE(JWPlatformBaseIE):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||
'info_dict': {
|
||||
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'ext': 'ogv',
|
||||
'ext': 'ogg',
|
||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||
'description': 'md5:1780b464abaca9991d8968c877bb53ed',
|
||||
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
||||
'upload_date': '19681210',
|
||||
'uploader': 'SRI International'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
'md5': '18f2a19e6d89af8425671da1cf3d4e04',
|
||||
'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
|
||||
'info_dict': {
|
||||
'id': 'Cops1922',
|
||||
'ext': 'ogv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:70f72ee70882f713d4578725461ffcc3',
|
||||
'description': 'md5:b4544662605877edd99df22f9620d858',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://archive.org/embed/' + video_id, video_id)
|
||||
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);",
|
||||
webpage, 'jwplayer playlist'), video_id)
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
|
||||
json_url = url + ('&' if '?' in url else '?') + 'output=json'
|
||||
data = self._download_json(json_url, video_id)
|
||||
def get_optional(metadata, field):
|
||||
return metadata.get(field, [None])[0]
|
||||
|
||||
def get_optional(data_dict, field):
|
||||
return data_dict['metadata'].get(field, [None])[0]
|
||||
|
||||
title = get_optional(data, 'title')
|
||||
description = get_optional(data, 'description')
|
||||
uploader = get_optional(data, 'creator')
|
||||
upload_date = unified_strdate(get_optional(data, 'date'))
|
||||
|
||||
formats = [
|
||||
{
|
||||
'format': fdata['format'],
|
||||
'url': 'http://' + data['server'] + data['dir'] + fn,
|
||||
'file_size': int(fdata['size']),
|
||||
}
|
||||
for fn, fdata in data['files'].items()
|
||||
if 'Video' in fdata['format']]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': data.get('misc', {}).get('image'),
|
||||
}
|
||||
metadata = self._download_json(
|
||||
'http://archive.org/details/' + video_id, video_id, query={
|
||||
'output': 'json',
|
||||
})['metadata']
|
||||
info.update({
|
||||
'title': get_optional(metadata, 'title') or info.get('title'),
|
||||
'description': clean_html(get_optional(metadata, 'description')),
|
||||
})
|
||||
if info.get('_type') != 'playlist':
|
||||
info.update({
|
||||
'uploader': get_optional(metadata, 'creator'),
|
||||
'upload_date': unified_strdate(get_optional(metadata, 'date')),
|
||||
})
|
||||
return info
|
||||
|
@@ -13,13 +13,14 @@ from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
@@ -34,6 +35,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
@@ -44,6 +46,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
@@ -55,9 +58,22 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'md5': '4e8f00631aac0395fee17368ac0e9867',
|
||||
'info_dict': {
|
||||
'id': '30796318',
|
||||
'ext': 'mp3',
|
||||
'title': 'Vor dem Fest',
|
||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||
'duration': 3287,
|
||||
},
|
||||
'skip': 'Video is no longer available',
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
@@ -113,11 +129,14 @@ class ARDMediathekIE(InfoExtractor):
|
||||
continue
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False))
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
@@ -231,7 +250,8 @@ class ARDIE(InfoExtractor):
|
||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||
'upload_date': '20140804',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
115
youtube_dl/extractor/arkena.py
Normal file
115
youtube_dl/extractor/arkena.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
account_id = mobj.group('account_id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||
|
||||
media_info = playlist['MediaInfo']
|
||||
title = media_info['Title']
|
||||
media_files = playlist['MediaFiles']
|
||||
|
||||
is_live = False
|
||||
formats = []
|
||||
for kind_case, kind_formats in media_files.items():
|
||||
kind = kind_case.lower()
|
||||
for f in kind_formats:
|
||||
f_url = f.get('Url')
|
||||
if not f_url:
|
||||
continue
|
||||
is_live = f.get('Live') == 'true'
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f_url, video_id, f4m_id=kind, fatal=False))
|
||||
elif kind == 'dash' or 'mpd' in exts:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
f_url, video_id, mpd_id=kind, fatal=False))
|
||||
elif kind == 'silverlight':
|
||||
# TODO: process when ism is supported (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8118)
|
||||
continue
|
||||
else:
|
||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media_info.get('Description')
|
||||
video_id = media_info.get('VideoId') or video_id
|
||||
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||
thumbnails = [{
|
||||
'url': thumbnail['Url'],
|
||||
'width': int_or_none(thumbnail.get('Size')),
|
||||
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@@ -12,46 +12,41 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
class AWAANIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
if video_id and int(video_id) > 0:
|
||||
return self.url_result(
|
||||
'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo')
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
|
||||
elif season_id and int(season_id) > 0:
|
||||
return self.url_result(smuggle_url(
|
||||
'http://www.dcndigital.ae/program/season/%s' % season_id,
|
||||
{'show_id': show_id}), 'DCNSeason')
|
||||
'http://awaan.ae/program/season/%s' % season_id,
|
||||
{'show_id': show_id}), 'AWAANSeason')
|
||||
else:
|
||||
return self.url_result(
|
||||
'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason')
|
||||
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
|
||||
|
||||
|
||||
class DCNBaseIE(InfoExtractor):
|
||||
def _extract_video_info(self, video_data, video_id, is_live):
|
||||
class AWAANBaseIE(InfoExtractor):
|
||||
def _parse_video_data(self, video_data, video_id, is_live):
|
||||
title = video_data.get('title_en') or video_data['title_ar']
|
||||
img = video_data.get('img')
|
||||
thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
|
||||
duration = int_or_none(video_data.get('duration'))
|
||||
description = video_data.get('description_en') or video_data.get('description_ar')
|
||||
timestamp = parse_iso8601(video_data.get('create_time'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'description': video_data.get('description_en') or video_data.get('description_ar'),
|
||||
'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None,
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
@@ -62,11 +57,9 @@ class DCNBaseIE(InfoExtractor):
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# format_url_base + '/manifest.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url_base + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
@@ -77,11 +70,12 @@ class DCNBaseIE(InfoExtractor):
|
||||
return formats
|
||||
|
||||
|
||||
class DCNVideoIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:video'
|
||||
class AWAANVideoIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
|
||||
'md5': '5f61c33bfc7794315c671a62d43116aa',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '17375',
|
||||
@@ -92,10 +86,6 @@ class DCNVideoIE(DCNBaseIE):
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
'only_matching': True,
|
||||
@@ -104,11 +94,10 @@ class DCNVideoIE(DCNBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = sanitized_Request(
|
||||
video_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
video_data = self._download_json(request, video_id)
|
||||
info = self._extract_video_info(video_data, video_id, False)
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
|
||||
@@ -123,19 +112,31 @@ class DCNVideoIE(DCNBaseIE):
|
||||
return info
|
||||
|
||||
|
||||
class DCNLiveIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:live'
|
||||
class AWAANLiveIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://awaan.ae/live/6/dubai-tv',
|
||||
'info_dict': {
|
||||
'id': '6',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'upload_date': '20150107',
|
||||
'timestamp': 1420588800,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
request = sanitized_Request(
|
||||
channel_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
|
||||
channel_data = self._download_json(request, channel_id)
|
||||
info = self._extract_video_info(channel_data, channel_id, True)
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
|
||||
@@ -150,8 +151,8 @@ class DCNLiveIE(DCNBaseIE):
|
||||
return info
|
||||
|
||||
|
||||
class DCNSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'dcn:season'
|
||||
class AWAANSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'awaan:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
|
||||
@@ -172,21 +173,17 @@ class DCNSeasonIE(InfoExtractor):
|
||||
data['season'] = season_id
|
||||
show_id = smuggled_data.get('show_id')
|
||||
if show_id is None:
|
||||
request = sanitized_Request(
|
||||
season = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
season = self._download_json(request, season_id)
|
||||
season_id, headers={'Origin': 'http://awaan.ae'})
|
||||
show_id = season['id']
|
||||
data['show_id'] = show_id
|
||||
request = sanitized_Request(
|
||||
show = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/show',
|
||||
urlencode_postdata(data),
|
||||
{
|
||||
'Origin': 'http://www.dcndigital.ae',
|
||||
show_id, data=urlencode_postdata(data), headers={
|
||||
'Origin': 'http://awaan.ae',
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
|
||||
show = self._download_json(request, show_id)
|
||||
if not season_id:
|
||||
season_id = show['default_season']
|
||||
for season in show['seasons']:
|
||||
@@ -197,6 +194,6 @@ class DCNSeasonIE(InfoExtractor):
|
||||
for video in show['videos']:
|
||||
video_id = compat_str(video['id'])
|
||||
entries.append(self.url_result(
|
||||
'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id))
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
|
||||
|
||||
return self.playlist_result(entries, season_id, title)
|
@@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# with escaped quote in title
|
||||
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
|
||||
'info_dict': {
|
||||
'title': '"Entropy" EP',
|
||||
'uploader_id': 'jstrecords',
|
||||
'id': 'entropy-ep',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
title = self._search_regex(
|
||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = title.replace(r'\"', '"')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
|
@@ -2,19 +2,23 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
@@ -229,51 +233,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
kind = connection.get('kind')
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Skip DASH until supported
|
||||
elif transfer_format == 'dash':
|
||||
pass
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=supplier, fatal=False))
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier or kind or protocol,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
|
||||
@@ -294,46 +253,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _extract_connections(self, media):
|
||||
return self._findall_ns(media, './{%s}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int_or_none(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
if service:
|
||||
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int_or_none(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
@@ -379,13 +298,87 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _process_media_selector(self, media_selection, programme_id):
|
||||
formats = []
|
||||
subtitles = None
|
||||
urls = []
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
if kind in ('video', 'audio'):
|
||||
bitrate = int_or_none(media.get('bitrate'))
|
||||
encoding = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
href = connection.get('href')
|
||||
if href in urls:
|
||||
continue
|
||||
if href:
|
||||
urls.append(href)
|
||||
conn_kind = connection.get('kind')
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
format_id = supplier or conn_kind or protocol
|
||||
if service:
|
||||
format_id = '%s_%s' % (service, format_id)
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, format_id),
|
||||
})
|
||||
elif transfer_format == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, programme_id, mpd_id=format_id, fatal=False))
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
if not service and not supplier and bitrate:
|
||||
format_id += '-%d' % bitrate
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'filesize': file_size,
|
||||
}
|
||||
if kind == 'video':
|
||||
fmt.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': bitrate,
|
||||
'vcodec': encoding,
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'abr': bitrate,
|
||||
'acodec': encoding,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
if protocol == 'http':
|
||||
# Direct link
|
||||
fmt.update({
|
||||
'url': href,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
fmt.update({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(fmt)
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
return formats, subtitles
|
||||
@@ -590,6 +583,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
},
|
||||
@@ -603,6 +597,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
},
|
||||
@@ -652,6 +647,23 @@ class BBCIE(BBCCoUkIE):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video embedded with Morph
|
||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||
'info_dict': {
|
||||
'id': 'p041vhd0',
|
||||
'ext': 'mp4',
|
||||
'title': "Nigeria v Japan - Men's First Round",
|
||||
'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
|
||||
'duration': 7980,
|
||||
'uploader': 'BBC Sport',
|
||||
'uploader_id': 'bbc_sport',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to UK',
|
||||
}, {
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
@@ -749,7 +761,7 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
|
||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
|
||||
timestamp = json_ld_info.get('timestamp')
|
||||
|
||||
playlist_title = json_ld_info.get('title')
|
||||
@@ -818,8 +830,29 @@ class BBCIE(BBCCoUkIE):
|
||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||
if playlist:
|
||||
entries.append(self._extract_from_playlist_sxml(
|
||||
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
||||
entry = None
|
||||
for key in ('streaming', 'progressiveDownload'):
|
||||
playlist_url = playlist.get('%sUrl' % key)
|
||||
if not playlist_url:
|
||||
continue
|
||||
try:
|
||||
info = self._extract_from_playlist_sxml(
|
||||
playlist_url, playlist_id, timestamp)
|
||||
if not entry:
|
||||
entry = info
|
||||
else:
|
||||
entry['title'] = info['title']
|
||||
entry['formats'].extend(info['formats'])
|
||||
except Exception as e:
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
continue
|
||||
raise
|
||||
if entry:
|
||||
self._sort_formats(entry['formats'])
|
||||
entries.append(entry)
|
||||
|
||||
if entries:
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
@@ -852,6 +885,50 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
morph_payload = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||
webpage, 'morph payload', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
if morph_payload:
|
||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||
for component in components:
|
||||
if not isinstance(component, dict):
|
||||
continue
|
||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||
if not lead_media:
|
||||
continue
|
||||
identifiers = lead_media.get('identifiers')
|
||||
if not identifiers or not isinstance(identifiers, dict):
|
||||
continue
|
||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||
if not programme_id:
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
duration = None
|
||||
duration_d = lead_media.get('duration')
|
||||
if isinstance(duration_d, dict):
|
||||
duration = parse_duration(dict_get(
|
||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
@@ -869,7 +946,7 @@ class BBCIE(BBCCoUkIE):
|
||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
||||
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
|
||||
playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||
@@ -981,27 +1058,43 @@ class BBCCoUkArticleIE(InfoExtractor):
|
||||
|
||||
|
||||
class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
def _entries(self, webpage, url, playlist_id):
|
||||
single_page = 'page' in compat_urlparse.parse_qs(
|
||||
compat_urlparse.urlparse(url).query)
|
||||
for page_num in itertools.count(2):
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
|
||||
yield self.url_result(
|
||||
self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
if single_page:
|
||||
return
|
||||
next_page = self._search_regex(
|
||||
r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||
webpage, 'next page url', default=None, group='url')
|
||||
if not next_page:
|
||||
break
|
||||
webpage = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, next_page), playlist_id,
|
||||
'Downloading page %d' % page_num, page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
|
||||
|
||||
title, description = self._extract_title_and_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.playlist_result(
|
||||
self._entries(webpage, url, playlist_id),
|
||||
playlist_id, title, description)
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||
'info_dict': {
|
||||
'id': 'b05rcz9v',
|
||||
@@ -1009,7 +1102,17 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}
|
||||
'skip': 'This programme is not currently available on BBC iPlayer',
|
||||
}, {
|
||||
# Available for over a year unlike 30 days for most other programmes
|
||||
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||
'info_dict': {
|
||||
'id': 'p02tcc32',
|
||||
'title': 'Bohemian Icons',
|
||||
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
|
||||
def _extract_title_and_description(self, webpage):
|
||||
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
@@ -1032,6 +1135,24 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# multipage playlist, explicit page
|
||||
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
|
||||
'info_dict': {
|
||||
'id': 'b00mfl7n',
|
||||
'title': 'Frozen Planet - Clips - BBC One',
|
||||
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
}, {
|
||||
# multipage playlist, all pages
|
||||
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
|
||||
'info_dict': {
|
||||
'id': 'b00mfl7n',
|
||||
'title': 'Frozen Planet - Clips - BBC One',
|
||||
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
|
||||
},
|
||||
'playlist_mincount': 142,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
|
||||
'only_matching': True,
|
||||
|
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class BetIE(MTVServicesInfoExtractor):
|
||||
@@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor):
|
||||
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return compat_urllib_parse_urlencode({
|
||||
return {
|
||||
'uuid': uri,
|
||||
})
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
|
||||
|
@@ -11,22 +11,13 @@ from ..compat import compat_urllib_parse_unquote
|
||||
class BigflixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
|
||||
'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
|
||||
'info_dict': {
|
||||
'id': '16537',
|
||||
'ext': 'mp4',
|
||||
'title': 'Singham Returns',
|
||||
'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d',
|
||||
}
|
||||
}, {
|
||||
# 2 formats
|
||||
'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070',
|
||||
'info_dict': {
|
||||
'id': '16070',
|
||||
'ext': 'mp4',
|
||||
'title': 'Madarasapatinam',
|
||||
'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca',
|
||||
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
'params': {
|
||||
|
@@ -1,22 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,13 +18,13 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'ext': 'flv',
|
||||
'id': '1074402',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'duration': 308.315,
|
||||
'timestamp': 1398012660,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
@@ -42,76 +35,42 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
'info_dict': {
|
||||
'id': '1041170',
|
||||
'ext': 'mp4',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'duration': 3382.259,
|
||||
'timestamp': 1396530060,
|
||||
'upload_date': '20140403',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'uploader': '枫叶逝去',
|
||||
'uploader_id': '520116',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '4808130',
|
||||
'ext': 'mp4',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'duration': 1493.995,
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
|
||||
'info_dict': {
|
||||
'id': '4808130_part1',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '926f9f67d0c482091872fbd8eca7ea3d',
|
||||
'info_dict': {
|
||||
'id': '4808130_part2',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '4b7b225b968402d7c32348c646f1fd83',
|
||||
'info_dict': {
|
||||
'id': '4808130_part3',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '7b795e214166501e9141139eea236e91',
|
||||
'info_dict': {
|
||||
'id': '4808130_part4',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# Missing upload time
|
||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||
'info_dict': {
|
||||
'id': '2880301',
|
||||
'ext': 'flv',
|
||||
'id': '1867637',
|
||||
'ext': 'mp4',
|
||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||
'duration': 5760.0,
|
||||
'uploader': '黑夜为猫',
|
||||
'uploader_id': '610729',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# Just to test metadata extraction
|
||||
@@ -120,86 +79,61 @@ class BiliBiliIE(InfoExtractor):
|
||||
'expected_warnings': ['upload time'],
|
||||
}]
|
||||
|
||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||
# the Android client
|
||||
# TODO: find the sign algorithm used in the flash player
|
||||
_APP_KEY = '86385cdc024c0f6c'
|
||||
_APP_KEY = '6f90a59ac58a4123'
|
||||
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
params = compat_parse_qs(self._search_regex(
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))
|
||||
cid = params['cid'][0]
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
|
||||
info_xml_str = self._download_webpage(
|
||||
'http://interface.bilibili.com/v_cdn_play',
|
||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||
note='Downloading video info page')
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
err_msg = None
|
||||
durls = None
|
||||
info_xml = None
|
||||
try:
|
||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||
err_msg = (info_json or {}).get('error_text')
|
||||
else:
|
||||
err_msg = xpath_text(info_xml, './message')
|
||||
|
||||
if info_xml is not None:
|
||||
durls = info_xml.findall('./durl')
|
||||
if not durls:
|
||||
if err_msg:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||
else:
|
||||
raise ExtractorError('No videos found!')
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page')
|
||||
|
||||
entries = []
|
||||
|
||||
for durl in durls:
|
||||
size = xpath_text(durl, ['./filesize', './size'])
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl.find('./url').text,
|
||||
'filesize': int_or_none(size),
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.findall('./backup_url/url'):
|
||||
for backup_url in durl['backup_url']:
|
||||
formats.append({
|
||||
'url': backup_url.text,
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': compat_str(cid),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
|
@@ -2,11 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
from .rudo import RudoIE
|
||||
|
||||
|
||||
class BioBioChileTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
_VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
||||
@@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Fernando Atria',
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
# different uploader layout
|
||||
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
||||
@@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||
'info_dict': {
|
||||
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
||||
'ext': 'mp4',
|
||||
'uploader': '(none)',
|
||||
'upload_date': '20160708',
|
||||
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||
'only_matching': True,
|
||||
@@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
rudo_url = RudoIE._extract_url(webpage)
|
||||
if not rudo_url:
|
||||
raise ExtractorError('No videos found')
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
||||
|
||||
file_url = self._search_regex(
|
||||
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'file url', group='url')
|
||||
|
||||
base_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
|
||||
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
|
||||
group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
f = {
|
||||
'url': '%s%s' % (base_url, file_url),
|
||||
'format_id': 'http',
|
||||
'protocol': 'http',
|
||||
'preference': 1,
|
||||
}
|
||||
if formats:
|
||||
f_copy = formats[-1].copy()
|
||||
f_copy.update(f)
|
||||
f = f_copy
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
|
||||
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': rudo_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -24,7 +24,8 @@ class BIQLEIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Ребенок в шоке от автоматической мойки',
|
||||
'uploader': 'Dmitry Kotov',
|
||||
}
|
||||
},
|
||||
'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -20,6 +21,18 @@ class BloombergIE(InfoExtractor):
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
# video ID in BPlayer(...)
|
||||
'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/',
|
||||
'info_dict': {
|
||||
'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74',
|
||||
'ext': 'flv',
|
||||
'title': 'Meet the Real-Life Tech Wizards of Middle Earth',
|
||||
'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[format_id^=hds]',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
|
||||
'only_matching': True,
|
||||
@@ -33,7 +46,11 @@ class BloombergIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(
|
||||
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'id', group='url')
|
||||
webpage, 'id', group='url', default=None)
|
||||
if not video_id:
|
||||
bplayer_data = self._parse_json(self._search_regex(
|
||||
r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
|
||||
video_id = bplayer_data['id']
|
||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||
|
||||
embed_info = self._download_json(
|
||||
|
@@ -1,31 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
|
||||
_TEST = {
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
|
||||
'md5': 'd60cdf68904e854fac669bd26cccf801',
|
||||
'md5': '9086d0b7ef0ea2aabc4781d75f4e5863',
|
||||
'info_dict': {
|
||||
'id': 'LitrBdX64qLn',
|
||||
'id': 'zHyk1_HU_mPy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Last Chance Kitchen Returns',
|
||||
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
|
||||
'timestamp': 1448926740,
|
||||
'upload_date': '20151130',
|
||||
'title': 'LCK Ep 12: Fishy Finale',
|
||||
'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20160302',
|
||||
'timestamp': 1456945320,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
|
||||
release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
|
||||
{'force_smil_url': True}), 'ThePlatform', release_pid)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
|
||||
display_id)
|
||||
info = {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
account_pid, release_pid = [None] * 2
|
||||
tve = settings.get('sharedTVE')
|
||||
if tve:
|
||||
query['manifest'] = 'm3u'
|
||||
account_pid = 'HNK2IC'
|
||||
release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('adobePass', {})
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'bravo'),
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
|
||||
else:
|
||||
shared_playlist = settings['shared_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
||||
release_pid = metadata['release_pid']
|
||||
info.update({
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'season_number': int_or_none(metadata.get('season_num')),
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid),
|
||||
query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
|
@@ -26,6 +26,8 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
clean_html,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -544,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
container = source.get('container')
|
||||
source_type = source.get('type')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if source_type == 'application/x-mpegURL' or container == 'M2TS':
|
||||
if ext == 'ism':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'application/dash+xml':
|
||||
elif ext == 'mpd':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
||||
@@ -567,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'container': container,
|
||||
'ext': container.lower(),
|
||||
'ext': ext or container.lower(),
|
||||
}
|
||||
if width == 0 and height == 0:
|
||||
f.update({
|
||||
@@ -620,7 +624,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': json_data.get('description'),
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,8 +9,10 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
clean_html,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'creator': 'ss11spring',
|
||||
'duration': 1591,
|
||||
'upload_date': '20130114',
|
||||
'timestamp': 1358154556,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# With non-empty description
|
||||
# webpage returns "No permission or not login"
|
||||
'url': 'http://www.camdemy.com/media/13885',
|
||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||
'info_dict': {
|
||||
@@ -41,64 +42,71 @@ class CamdemyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'EverCam + Camdemy QuickStart',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
||||
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||
'creator': 'evercam',
|
||||
'upload_date': '20140620',
|
||||
'timestamp': 1403271569,
|
||||
'duration': 318,
|
||||
}
|
||||
}, {
|
||||
# External source
|
||||
# External source (YouTube)
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
||||
'info_dict': {
|
||||
'id': '2vsYQzNIsJo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'upload_date': '20130211',
|
||||
'uploader': 'Hun Kim',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'uploader_id': 'hunkimtutorials',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
src_from = self._html_search_regex(
|
||||
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
||||
'external source', default=None)
|
||||
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
|
||||
webpage, 'external source', default=None, group='url')
|
||||
if src_from:
|
||||
return self.url_result(src_from)
|
||||
|
||||
oembed_obj = self._download_json(
|
||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||
|
||||
title = oembed_obj['title']
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Filelist XML')
|
||||
video_id, 'Downloading filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'creation time', fatal=False),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'view count', fatal=False))
|
||||
# Some URLs return "No permission or not login" in a webpage despite being
|
||||
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'>published on ([^<]+)<', webpage,
|
||||
'upload date', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
|
||||
webpage, 'view count', default=None))
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, default=None) or clean_html(
|
||||
oembed_obj.get('description'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': oembed_obj['title'],
|
||||
'title': title,
|
||||
'thumbnail': thumb_url,
|
||||
'description': self._html_search_meta('description', page),
|
||||
'creator': oembed_obj['author_name'],
|
||||
'duration': oembed_obj['duration'],
|
||||
'timestamp': timestamp,
|
||||
'description': description,
|
||||
'creator': oembed_obj.get('author_name'),
|
||||
'duration': parse_duration(oembed_obj.get('duration')),
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
36
youtube_dl/extractor/cartoonnetwork.py
Normal file
36
youtube_dl/extractor/cartoonnetwork.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
||||
'info_dict': {
|
||||
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starfire the Cat Lady',
|
||||
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
||||
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
||||
return self._extract_cvp_info(
|
||||
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://apple-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
})
|
@@ -4,13 +4,24 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
xpath_with_ns,
|
||||
find_xpath_attr,
|
||||
parse_iso8601,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
@@ -25,8 +36,22 @@ class CBCIE(InfoExtractor):
|
||||
'upload_date': '20160203',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# with clipId
|
||||
# with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
|
||||
'md5': '162adfa070274b144f4fdc3c3b8207db',
|
||||
'info_dict': {
|
||||
'id': '2414435309',
|
||||
'ext': 'mp4',
|
||||
'title': '22 Minutes Update: What Not To Wear Quebec',
|
||||
'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
|
||||
'upload_date': '20131025',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'timestamp': 1382717907,
|
||||
},
|
||||
}, {
|
||||
# with clipId, feed only available via tpfeed.cbc.ca
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||
'info_dict': {
|
||||
@@ -64,6 +89,7 @@ class CBCIE(InfoExtractor):
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}],
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -81,9 +107,15 @@ class CBCIE(InfoExtractor):
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
feed = self._download_json(
|
||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||
clip_id, fatal=False)
|
||||
if feed:
|
||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||
if not media_id:
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
else:
|
||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||
@@ -91,6 +123,7 @@ class CBCIE(InfoExtractor):
|
||||
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
@@ -104,6 +137,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
@@ -143,3 +177,165 @@ class CBCPlayerIE(InfoExtractor):
|
||||
}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class CBCWatchBaseIE(InfoExtractor):
|
||||
_device_id = None
|
||||
_device_token = None
|
||||
_API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
|
||||
_NS_MAP = {
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||
result = self._download_xml(url, video_id, headers={
|
||||
'X-Clearleap-DeviceId': self._device_id,
|
||||
'X-Clearleap-DeviceToken': self._device_token,
|
||||
})
|
||||
error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
|
||||
if error_message:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
|
||||
return result
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._device_id or not self._device_token:
|
||||
device = self._downloader.cache.load('cbcwatch', 'device') or {}
|
||||
self._device_id, self._device_token = device.get('id'), device.get('token')
|
||||
if not self._device_id or not self._device_token:
|
||||
result = self._download_xml(
|
||||
self._API_BASE_URL + 'device/register',
|
||||
None, data=b'<device><type>web</type></device>')
|
||||
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
||||
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||
self._downloader.cache.store(
|
||||
'cbcwatch', 'device', {
|
||||
'id': self._device_id,
|
||||
'token': self._device_token,
|
||||
})
|
||||
|
||||
def _parse_rss_feed(self, rss):
|
||||
channel = xpath_element(rss, 'channel', fatal=True)
|
||||
|
||||
def _add_ns(path):
|
||||
return xpath_with_ns(path, self._NS_MAP)
|
||||
|
||||
entries = []
|
||||
for item in channel.findall('item'):
|
||||
guid = xpath_text(item, 'guid', fatal=True)
|
||||
title = xpath_text(item, 'title', fatal=True)
|
||||
|
||||
media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
|
||||
content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
|
||||
content_url = content.attrib['url']
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail.get('profile'),
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
timestamp = None
|
||||
release_date = find_xpath_attr(
|
||||
item, _add_ns('media:credit'), 'role', 'releaseDate')
|
||||
if release_date is not None:
|
||||
timestamp = parse_iso8601(release_date.text)
|
||||
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': content_url,
|
||||
'id': guid,
|
||||
'title': title,
|
||||
'description': xpath_text(item, 'description'),
|
||||
'timestamp': timestamp,
|
||||
'duration': int_or_none(content.get('duration')),
|
||||
'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
|
||||
'episode': xpath_text(item, _add_ns('clearleap:episode')),
|
||||
'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
|
||||
'series': xpath_text(item, _add_ns('clearleap:series')),
|
||||
'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
|
||||
'thumbnails': thumbnails,
|
||||
'ie_key': 'CBCWatchVideo',
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, xpath_text(channel, 'guid'),
|
||||
xpath_text(channel, 'title'),
|
||||
xpath_text(channel, 'description'))
|
||||
|
||||
|
||||
class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||
IE_NAME = 'cbc.ca:watch:video'
|
||||
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
result = self._call_api(url, video_id)
|
||||
|
||||
m3u8_url = xpath_text(result, 'url', fatal=True)
|
||||
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||
if len(formats) < 2:
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
# Despite metadata in m3u8 all video+audio formats are
|
||||
# actually video-only (no audio)
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = 'none'
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
rss = xpath_element(result, 'rss')
|
||||
if rss:
|
||||
info.update(self._parse_rss_feed(rss)['entries'][0])
|
||||
del info['url']
|
||||
del info['_type']
|
||||
del info['ie_key']
|
||||
return info
|
||||
|
||||
|
||||
class CBCWatchIE(CBCWatchBaseIE):
|
||||
IE_NAME = 'cbc.ca:watch'
|
||||
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
|
||||
'info_dict': {
|
||||
'id': '38e815a-009e3ab12e4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Customer (Dis)Service',
|
||||
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
|
||||
'upload_date': '20160219',
|
||||
'timestamp': 1455840000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||
'info_dict': {
|
||||
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||
'title': 'Arthur',
|
||||
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
rss = self._call_api('web/browse/' + video_id, video_id)
|
||||
return self._parse_rss_feed(rss)
|
||||
|
@@ -4,6 +4,7 @@ from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -17,19 +18,6 @@ class CBSBaseIE(ThePlatformFeedIE):
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info(
|
||||
'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: {
|
||||
'series': entry.get('cbs$SeriesTitle'),
|
||||
'season_number': int_or_none(entry.get('cbs$SeasonNumber')),
|
||||
'episode': entry.get('cbs$EpisodeTitle'),
|
||||
'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')),
|
||||
}, {
|
||||
'StreamPack': {
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||
@@ -38,7 +26,6 @@ class CBSIE(CBSBaseIE):
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
'info_dict': {
|
||||
'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_',
|
||||
'display_id': 'connect-chat-feat-garth-brooks',
|
||||
'ext': 'mp4',
|
||||
'title': 'Connect Chat feat. Garth Brooks',
|
||||
'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
|
||||
@@ -47,7 +34,10 @@ class CBSIE(CBSBaseIE):
|
||||
'upload_date': '20131127',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
@@ -56,8 +46,31 @@ class CBSIE(CBSBaseIE):
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||
|
||||
def _extract_video_info(self, guid):
|
||||
path = 'dJ5BDC/media/guid/2198311517/' + guid
|
||||
smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid)
|
||||
for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
|
||||
try:
|
||||
tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0])
|
||||
formats.extend(tp_formats)
|
||||
except ExtractorError:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
metadata = self._download_theplatform_metadata(path, guid)
|
||||
info = self._parse_theplatform_metadata(metadata)
|
||||
info.update({
|
||||
'id': guid,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': metadata.get('cbs$SeriesTitle'),
|
||||
'season_number': int_or_none(metadata.get('cbs$SeasonNumber')),
|
||||
'episode': metadata.get('cbs$EpisodeTitle'),
|
||||
'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
return self._extract_video_info('byGuid=%s' % content_id, content_id)
|
||||
return self._extract_video_info(content_id)
|
||||
|
@@ -1,12 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
|
||||
from .anvato import AnvatoIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
@@ -43,13 +41,8 @@ class CBSLocalIE(AnvatoIE):
|
||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'upload_date': '20160516',
|
||||
'timestamp': 1463433840,
|
||||
'duration': 49,
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
@@ -62,19 +55,15 @@ class CBSLocalIE(AnvatoIE):
|
||||
|
||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||
if sendtonews_url:
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, sendtonews_url),
|
||||
}
|
||||
else:
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
return self.url_result(
|
||||
compat_urlparse.urljoin(url, sendtonews_url),
|
||||
ie=SendtoNewsIE.ie_key())
|
||||
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||
timestamp = None
|
||||
if time_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(
|
||||
time_str, '%b %d, %Y %I:%M %p').timetuple())
|
||||
timestamp = unified_timestamp(time_str)
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
|
@@ -2,13 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .cbs import CBSBaseIE
|
||||
from .cbs import CBSIE
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class CBSNewsIE(CBSBaseIE):
|
||||
class CBSNewsIE(CBSIE):
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
@@ -26,6 +26,7 @@ class CBSNewsIE(CBSBaseIE):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Subscribers only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
@@ -34,7 +35,8 @@ class CBSNewsIE(CBSBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '20140404',
|
||||
'timestamp': 1396650660,
|
||||
'uploader': 'CBSI-NEW',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 205,
|
||||
@@ -62,13 +64,14 @@ class CBSNewsIE(CBSBaseIE):
|
||||
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
guid = item['mpxRefId']
|
||||
return self._extract_video_info('byGuid=%s' % guid, guid)
|
||||
return self._extract_video_info(guid)
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
@@ -77,6 +80,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -23,6 +23,9 @@ class CBSSportsIE(CBSBaseIE):
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||
|
51
youtube_dl/extractor/charlierose.py
Normal file
51
youtube_dl/extractor/charlierose.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class CharlieRoseIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://charlierose.com/videos/27996',
|
||||
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
||||
'info_dict': {
|
||||
'id': '27996',
|
||||
'ext': 'mp4',
|
||||
'title': 'Remembering Zaha Hadid',
|
||||
'thumbnail': 're:^https?://.*\.jpg\?\d+',
|
||||
'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.',
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'ext': 'vtt',
|
||||
}],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://charlierose.com/videos/27996',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - Charlie Rose')
|
||||
|
||||
info_dict = self._parse_html5_media_entries(
|
||||
self._PLAYER_BASE % video_id, webpage, video_id,
|
||||
m3u8_entry_protocol='m3u8_native')[0]
|
||||
|
||||
self._sort_formats(info_dict['formats'])
|
||||
self._remove_duplicate_formats(info_dict['formats'])
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
})
|
||||
|
||||
return info_dict
|
@@ -17,7 +17,8 @@ class ChaturbateIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'Room is offline',
|
||||
}, {
|
||||
'url': 'https://en.chaturbate.com/siswet19/',
|
||||
'only_matching': True,
|
||||
|
@@ -1,30 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class ChirbitIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://chirb.it/PrIPv5',
|
||||
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
||||
'url': 'http://chirb.it/be2abG',
|
||||
'info_dict': {
|
||||
'id': 'PrIPv5',
|
||||
'id': 'be2abG',
|
||||
'ext': 'mp3',
|
||||
'title': 'Фасадстрой',
|
||||
'duration': 52,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
|
||||
'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
|
||||
'duration': 306,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chirb.it/wp/MN58c2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -33,27 +36,30 @@ class ChirbitIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://chirb.it/%s' % audio_id, audio_id)
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
||||
data_fd = self._search_regex(
|
||||
r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'data fd', group='url')
|
||||
|
||||
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
|
||||
# for soundURL)
|
||||
audio_url = base64.b64decode(
|
||||
data_fd[::-1].encode('ascii')).decode('utf-8')
|
||||
|
||||
title = self._search_regex(
|
||||
r'itemprop="name">([^<]+)', webpage, 'title')
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'itemprop="playCount"\s*>(\d+)', webpage,
|
||||
'listen count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'>(\d+) Comments?:', webpage,
|
||||
'comment count', fatal=False))
|
||||
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>',
|
||||
webpage, 'description', default=None)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'class=["\']c-length["\'][^>]*>([^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -23,7 +23,7 @@ class CliphunterIE(InfoExtractor):
|
||||
(?P<id>[0-9]+)/
|
||||
(?P<seo>.+?)(?:$|[#\?])
|
||||
'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
||||
'info_dict': {
|
||||
@@ -32,8 +32,19 @@ class CliphunterIE(InfoExtractor):
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
|
||||
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
|
||||
'info_dict': {
|
||||
'id': '2019449',
|
||||
'ext': 'mp4',
|
||||
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -6,7 +6,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,37 +16,26 @@ from ..utils import (
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
_IE_DESC = 'cloudy.ec and videoraj.ch'
|
||||
_IE_DESC = 'cloudy.ec'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
|
||||
https?://(?:www\.)?cloudy\.ec/
|
||||
(?:v/|embed\.php\?id=)
|
||||
(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
||||
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
||||
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
|
||||
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
|
||||
_MAX_TRIES = 2
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||
'info_dict': {
|
||||
'id': 'af511e2527aac',
|
||||
'ext': 'flv',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.videoraj.to/v/47f399fd8bb60',
|
||||
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
|
||||
'info_dict': {
|
||||
'id': '47f399fd8bb60',
|
||||
'ext': 'flv',
|
||||
'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
|
||||
}
|
||||
_TEST = {
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||
'info_dict': {
|
||||
'id': 'af511e2527aac',
|
||||
'ext': 'flv',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
|
||||
def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
|
||||
|
||||
if try_num > self._MAX_TRIES - 1:
|
||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||
@@ -64,9 +52,8 @@ class CloudyIE(InfoExtractor):
|
||||
'errorUrl': error_url,
|
||||
})
|
||||
|
||||
data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form))
|
||||
player_data = self._download_webpage(
|
||||
data_url, video_id, 'Downloading player data')
|
||||
self._API_URL, video_id, 'Downloading player data', query=form)
|
||||
data = compat_parse_qs(player_data)
|
||||
|
||||
try_num += 1
|
||||
@@ -88,7 +75,7 @@ class CloudyIE(InfoExtractor):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||
return self._extract_video(video_host, video_id, file_key, video_url, try_num)
|
||||
return self._extract_video(video_id, file_key, video_url, try_num)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -98,14 +85,13 @@ class CloudyIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = self._EMBED_URL % (video_host, video_id)
|
||||
url = self._EMBED_URL % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||
webpage, 'file_key')
|
||||
|
||||
return self._extract_video(video_host, video_id, file_key)
|
||||
return self._extract_video(video_id, file_key)
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
@@ -16,7 +18,27 @@ class CMTIE(MTVIE):
|
||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
'description': 'Blame It All On My Roots',
|
||||
},
|
||||
'skip': 'Video not available',
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
|
||||
'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
|
||||
'info_dict': {
|
||||
'id': '1504699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Still The King Ep. 109 in 3 Minutes',
|
||||
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
|
||||
'timestamp': 1469421000.0,
|
||||
'upload_date': '20160725',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
if 'error_not_available.swf' in rtmp_video_url:
|
||||
raise ExtractorError(
|
||||
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
||||
|
||||
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
|
@@ -3,15 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
url_basename,
|
||||
)
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
|
||||
_TESTS = [{
|
||||
@@ -25,6 +22,7 @@ class CNNIE(InfoExtractor):
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
@@ -34,7 +32,8 @@ class CNNIE(InfoExtractor):
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||
'upload_date': '20130821',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
@@ -44,80 +43,61 @@ class CNNIE(InfoExtractor):
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
'info_dict': {
|
||||
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney',
|
||||
'ext': 'mp4',
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_CONFIG = {
|
||||
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
|
||||
'edition': {
|
||||
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
|
||||
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
|
||||
},
|
||||
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
|
||||
'money': {
|
||||
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
|
||||
'media_src': 'http://ht3.cdn.turner.com/money/big',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
path = mobj.group('path')
|
||||
page_title = mobj.group('title')
|
||||
info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path
|
||||
info = self._download_xml(info_url, page_title)
|
||||
|
||||
formats = []
|
||||
rex = re.compile(r'''(?x)
|
||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
(?:_(?P<bitrate>[0-9]+)k)?
|
||||
''')
|
||||
for f in info.findall('files/file'):
|
||||
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip())
|
||||
fdct = {
|
||||
'format_id': f.attrib['bitrate'],
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
mf = rex.match(f.attrib['bitrate'])
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mf = rex.search(f.text)
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
|
||||
if mi:
|
||||
if mi.group(1) == 'audio':
|
||||
fdct['vcodec'] = 'none'
|
||||
fdct['ext'] = 'm4a'
|
||||
else:
|
||||
fdct['tbr'] = int(mi.group(1))
|
||||
|
||||
formats.append(fdct)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'height': int(t.attrib['height']),
|
||||
'width': int(t.attrib['width']),
|
||||
'url': t.text,
|
||||
} for t in info.findall('images/image')]
|
||||
|
||||
metas_el = info.find('metas')
|
||||
upload_date = (
|
||||
metas_el.attrib.get('version') if metas_el is not None else None)
|
||||
|
||||
duration_el = info.find('length')
|
||||
duration = parse_duration(duration_el.text)
|
||||
|
||||
return {
|
||||
'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': info.find('description').text,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
@@ -132,6 +112,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
@@ -146,7 +127,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
|
||||
@@ -154,9 +135,10 @@ class CNNArticleIE(InfoExtractor):
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
|
@@ -1,17 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
@@ -26,8 +16,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
'ext': 'mp4',
|
||||
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
|
||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
'timestamp': 1376798400,
|
||||
'upload_date': '20130818',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||
@@ -35,241 +27,92 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow
|
||||
# urls for episodes like:
|
||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/
|
||||
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
|
||||
(?:/[^/?#]?|[?#]|$))))
|
||||
'''
|
||||
class ToshIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'Tosh.0'
|
||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
'info_dict': {
|
||||
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121213',
|
||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow kristen-stewart part 1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
|
||||
'info_dict': {
|
||||
'id': 'sarah-chayes-extended-interview',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'title': 'thedailyshow Sarah Chayes Extended Interview',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'info_dict': {
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'title': 'Twitter Users Share Summer Plans',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||
'info_dict': {
|
||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
# It's really reported to be published on year 2077
|
||||
'upload_date': '20770610',
|
||||
'timestamp': 3390510600,
|
||||
'subtitles': {
|
||||
'en': 'mincount:3',
|
||||
},
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
|
||||
return new_urls
|
||||
|
||||
_video_extensions = {
|
||||
'3500': 'mp4',
|
||||
'2200': 'mp4',
|
||||
'1700': 'mp4',
|
||||
'1200': 'mp4',
|
||||
'750': 'mp4',
|
||||
'400': 'mp4',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'3500': (1280, 720),
|
||||
'2200': (960, 540),
|
||||
'1700': (768, 432),
|
||||
'1200': (640, 360),
|
||||
'750': (512, 288),
|
||||
'400': (384, 216),
|
||||
}
|
||||
|
||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if mobj.group('shortname'):
|
||||
return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if mobj.group('clip'):
|
||||
if mobj.group('videotitle'):
|
||||
epTitle = mobj.group('videotitle')
|
||||
elif mobj.group('showname') == 'thedailyshow':
|
||||
epTitle = mobj.group('tdstitle')
|
||||
else:
|
||||
epTitle = mobj.group('cntitle')
|
||||
dlNewest = False
|
||||
elif mobj.group('interview'):
|
||||
epTitle = mobj.group('interview_title')
|
||||
dlNewest = False
|
||||
else:
|
||||
dlNewest = not mobj.group('episode')
|
||||
if dlNewest:
|
||||
epTitle = mobj.group('showname')
|
||||
else:
|
||||
epTitle = mobj.group('episode')
|
||||
show_name = mobj.group('showname')
|
||||
mrss_url = self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'mrss url', group='url')
|
||||
|
||||
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
if dlNewest:
|
||||
url = htmlHandle.geturl()
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
||||
if mobj.group('episode') == '':
|
||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
||||
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||
if len(mMovieParams) == 0:
|
||||
# The Colbert Report embeds the information in a without
|
||||
# a URL prefix; so extract the alternate reference
|
||||
# and then add the URL prefix manually.
|
||||
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
||||
if len(altMovieParams) == 0:
|
||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
||||
else:
|
||||
mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
|
||||
class ComedyCentralShortnameIE(InfoExtractor):
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
|
||||
_TESTS = [{
|
||||
'url': ':tds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':thedailyshow',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
# Correct cc.com in uri
|
||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
|
||||
|
||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
|
||||
idoc = self._download_xml(
|
||||
index_url, epTitle,
|
||||
'Downloading show index', 'Unable to download episode index')
|
||||
|
||||
title = idoc.find('./channel/title').text
|
||||
description = idoc.find('./channel/description').text
|
||||
|
||||
entries = []
|
||||
item_els = idoc.findall('.//item')
|
||||
for part_num, itemEl in enumerate(item_els):
|
||||
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
||||
|
||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
duration = float_or_none(content.attrib.get('duration'))
|
||||
mediagen_url = content.attrib['url']
|
||||
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
||||
|
||||
cdoc = self._download_xml(
|
||||
mediagen_url, epTitle,
|
||||
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
||||
|
||||
turls = []
|
||||
for rendition in cdoc.findall('.//rendition'):
|
||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||
turls.append(finfo)
|
||||
|
||||
formats = []
|
||||
for format, rtmp_video_url in turls:
|
||||
w, h = self._video_dimensions.get(format, (None, None))
|
||||
formats.append({
|
||||
'format_id': 'vhttp-%s' % format,
|
||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self._extract_subtitles(cdoc, guid)
|
||||
|
||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||
entries.append({
|
||||
'id': guid,
|
||||
'title': virtual_id,
|
||||
'formats': formats,
|
||||
'uploader': show_name,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': epTitle,
|
||||
'entries': entries,
|
||||
'title': show_name + ' ' + title,
|
||||
'description': description,
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
shortcut_map = {
|
||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
}
|
||||
return self.url_result(shortcut_map[video_id])
|
||||
|
@@ -44,6 +44,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
@@ -54,6 +55,8 @@ from ..utils import (
|
||||
update_Request,
|
||||
update_url_query,
|
||||
parse_m3u8_attributes,
|
||||
extract_attributes,
|
||||
parse_codecs,
|
||||
)
|
||||
|
||||
|
||||
@@ -161,6 +164,7 @@ class InfoExtractor(object):
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -658,6 +662,24 @@ class InfoExtractor(object):
|
||||
else:
|
||||
return res
|
||||
|
||||
def _get_netrc_login_info(self, netrc_machine=None):
|
||||
username = None
|
||||
password = None
|
||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||
|
||||
if self._downloader.params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(netrc_machine)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
||||
|
||||
return (username, password)
|
||||
|
||||
def _get_login_info(self):
|
||||
"""
|
||||
Get the login info as (username, password)
|
||||
@@ -675,16 +697,8 @@ class InfoExtractor(object):
|
||||
if downloader_params.get('username') is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
||||
else:
|
||||
username, password = self._get_netrc_login_info()
|
||||
|
||||
return (username, password)
|
||||
|
||||
@@ -723,9 +737,14 @@ class InfoExtractor(object):
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
if not isinstance(prop, (list, tuple)):
|
||||
prop = [prop]
|
||||
if name is None:
|
||||
name = 'OpenGraph %s' % prop
|
||||
escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
|
||||
name = 'OpenGraph %s' % prop[0]
|
||||
og_regexes = []
|
||||
for p in prop:
|
||||
og_regexes.extend(self._og_regexes(p))
|
||||
escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
|
||||
if escaped is None:
|
||||
return None
|
||||
return unescapeHTML(escaped)
|
||||
@@ -803,40 +822,66 @@ class InfoExtractor(object):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
def _search_json_ld(self, html, video_id, **kwargs):
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
if not json_ld:
|
||||
return {}
|
||||
return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
|
||||
return default if default is not NO_DEFAULT else {}
|
||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||
# At the same time `default` may be passed that assumes `fatal=False`
|
||||
# for _search_regex. Let's simulate the same behavior here as well.
|
||||
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
||||
return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True):
|
||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
if isinstance(json_ld, compat_str):
|
||||
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
||||
if not json_ld:
|
||||
return {}
|
||||
info = {}
|
||||
if json_ld.get('@context') == 'http://schema.org':
|
||||
item_type = json_ld.get('@type')
|
||||
if item_type == 'TVEpisode':
|
||||
info.update({
|
||||
'episode': unescapeHTML(json_ld.get('name')),
|
||||
'episode_number': int_or_none(json_ld.get('episodeNumber')),
|
||||
'description': unescapeHTML(json_ld.get('description')),
|
||||
})
|
||||
part_of_season = json_ld.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
part_of_series = json_ld.get('partOfSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Article':
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(json_ld.get('datePublished')),
|
||||
'title': unescapeHTML(json_ld.get('headline')),
|
||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
||||
})
|
||||
if not isinstance(json_ld, (list, tuple, dict)):
|
||||
return info
|
||||
if isinstance(json_ld, dict):
|
||||
json_ld = [json_ld]
|
||||
for e in json_ld:
|
||||
if e.get('@context') == 'http://schema.org':
|
||||
item_type = e.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type == 'TVEpisode':
|
||||
info.update({
|
||||
'episode': unescapeHTML(e.get('name')),
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
})
|
||||
part_of_season = e.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Article':
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||
'title': unescapeHTML(e.get('headline')),
|
||||
'description': unescapeHTML(e.get('articleBody')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl'),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
'height': int_or_none(e.get('height')),
|
||||
})
|
||||
break
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
@@ -890,7 +935,8 @@ class InfoExtractor(object):
|
||||
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
|
||||
preference -= 0.5
|
||||
|
||||
proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1
|
||||
protocol = f.get('protocol') or determine_protocol(f)
|
||||
proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
|
||||
|
||||
if f.get('vcodec') == 'none': # audio only
|
||||
preference -= 50
|
||||
@@ -1107,7 +1153,7 @@ class InfoExtractor(object):
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
'protocol': 'm3u8',
|
||||
'preference': preference - 1 if preference else -1,
|
||||
'preference': preference - 100 if preference else -100,
|
||||
'resolution': 'multiple',
|
||||
'format_note': 'Quality selection URL',
|
||||
}
|
||||
@@ -1117,13 +1163,6 @@ class InfoExtractor(object):
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
@@ -1134,6 +1173,13 @@ class InfoExtractor(object):
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
# We should try extracting formats only from master playlists [1], i.e.
|
||||
# playlists that describe available qualities. On the other hand media
|
||||
# playlists [2] should be returned as is since they contain just the media
|
||||
@@ -1155,27 +1201,44 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
last_info = None
|
||||
last_media = None
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
last_media = parse_m3u8_attributes(line)
|
||||
media = parse_m3u8_attributes(line)
|
||||
media_type = media.get('TYPE')
|
||||
if media_type in ('VIDEO', 'AUDIO'):
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
format_id = []
|
||||
for v in (media.get('GROUP-ID'), media.get('NAME')):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(media_url),
|
||||
'language': media.get('LANGUAGE'),
|
||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
})
|
||||
else:
|
||||
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||
# data be used by regular URI lines below
|
||||
last_media = media
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
if last_info is None:
|
||||
formats.append({'url': format_url(line)})
|
||||
continue
|
||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||
tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media_name
|
||||
stream_name = last_info.get('NAME') or last_media.get('NAME')
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
@@ -1186,6 +1249,7 @@ class InfoExtractor(object):
|
||||
'url': format_url(line.strip()),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
@@ -1194,29 +1258,20 @@ class InfoExtractor(object):
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
vcodec, acodec = [None] * 2
|
||||
va_codecs = codecs.split(',')
|
||||
if len(va_codecs) == 1:
|
||||
# Audio only entries usually come with single codec and
|
||||
# no resolution. For more robustness we also check it to
|
||||
# be mp4 audio.
|
||||
if not resolution and va_codecs[0].startswith('mp4a'):
|
||||
vcodec, acodec = 'none', va_codecs[0]
|
||||
else:
|
||||
vcodec = va_codecs[0]
|
||||
else:
|
||||
vcodec, acodec = va_codecs[:2]
|
||||
# Unified Streaming Platform
|
||||
mobj = re.search(
|
||||
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||
if mobj:
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
f.update({
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
@@ -1466,6 +1521,13 @@ class InfoExtractor(object):
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
|
||||
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||
"""
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
@@ -1498,8 +1560,16 @@ class InfoExtractor(object):
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
else:
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
@@ -1536,7 +1606,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
representation_attrib = adaptation_set.attrib.copy()
|
||||
representation_attrib.update(representation.attrib)
|
||||
# According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
|
||||
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
||||
mime_type = representation_attrib['mimeType']
|
||||
content_type = mime_type.split('/')[0]
|
||||
if content_type == 'text':
|
||||
@@ -1580,16 +1650,40 @@ class InfoExtractor(object):
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth')}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template:
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
representation_ms_info['segment_urls'] = []
|
||||
segment_time = 0
|
||||
|
||||
def add_segment_url():
|
||||
representation_ms_info['segment_urls'].append(
|
||||
media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
)
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
add_segment_url()
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += s['d']
|
||||
add_segment_url()
|
||||
segment_time += s['d']
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
@@ -1616,6 +1710,88 @@ class InfoExtractor(object):
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
|
||||
def parse_content_type(content_type):
|
||||
if not content_type:
|
||||
return {}
|
||||
ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
|
||||
if ctr:
|
||||
mimetype, codecs = ctr.groups()
|
||||
f = parse_codecs(codecs)
|
||||
f['ext'] = mimetype2ext(mimetype)
|
||||
return f
|
||||
return {}
|
||||
|
||||
def _media_formats(src, cur_media_type):
|
||||
full_url = absolute_url(src)
|
||||
if determine_ext(full_url) == 'm3u8':
|
||||
is_plain_url = False
|
||||
formats = self._extract_m3u8_formats(
|
||||
full_url, video_id, ext='mp4',
|
||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
|
||||
else:
|
||||
is_plain_url = True
|
||||
formats = [{
|
||||
'url': full_url,
|
||||
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
||||
}]
|
||||
return is_plain_url, formats
|
||||
|
||||
entries = []
|
||||
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = media_attributes.get('src')
|
||||
if src:
|
||||
_, formats = _media_formats(src, media_type)
|
||||
media_info['formats'].extend(formats)
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
if media_content:
|
||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||
source_attributes = extract_attributes(source_tag)
|
||||
src = source_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
is_plain_url, formats = _media_formats(src, media_type)
|
||||
if is_plain_url:
|
||||
f = parse_content_type(source_attributes.get('type'))
|
||||
f.update(formats[0])
|
||||
media_info['formats'].append(f)
|
||||
else:
|
||||
media_info['formats'].extend(formats)
|
||||
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind == 'subtitles':
|
||||
src = track_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
if media_info['formats']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id):
|
||||
formats = []
|
||||
f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(f4m_url, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@@ -1676,7 +1852,7 @@ class InfoExtractor(object):
|
||||
|
||||
any_restricted = False
|
||||
for tc in self.get_testcases(include_onlymatching=False):
|
||||
if 'playlist' in tc:
|
||||
if tc.get('playlist', []):
|
||||
tc = tc['playlist'][0]
|
||||
is_restricted = age_restricted(
|
||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||
|
@@ -5,13 +5,17 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
remove_end,
|
||||
extract_attributes,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -58,6 +62,9 @@ class CondeNastIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '3D Printed Speakers Lit With LED',
|
||||
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||
'uploader': 'wired',
|
||||
'upload_date': '20130314',
|
||||
'timestamp': 1363219200,
|
||||
}
|
||||
}, {
|
||||
# JS embed
|
||||
@@ -67,70 +74,93 @@ class CondeNastIE(InfoExtractor):
|
||||
'id': '55f9cf8b61646d1acf00000c',
|
||||
'ext': 'mp4',
|
||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||
'uploader': 'arstechnica',
|
||||
'upload_date': '20150916',
|
||||
'timestamp': 1442434955,
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_series(self, url, webpage):
|
||||
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||
webpage, 'series title', flags=re.DOTALL)
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||
webpage, 'series title')
|
||||
url_object = compat_urllib_parse_urlparse(url)
|
||||
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
||||
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
|
||||
webpage, flags=re.DOTALL)
|
||||
m_paths = re.finditer(
|
||||
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||
paths = orderedSet(m.group(1) for m in m_paths)
|
||||
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video(self, webpage, url_type):
|
||||
if url_type != 'embed':
|
||||
description = self._html_search_regex(
|
||||
[
|
||||
r'<div class="cne-video-description">(.+?)</div>',
|
||||
r'<div class="video-post-content">(.+?)</div>',
|
||||
],
|
||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||
query = {}
|
||||
params = self._search_regex(
|
||||
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||
if params:
|
||||
query.update({
|
||||
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
|
||||
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
|
||||
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
|
||||
})
|
||||
else:
|
||||
description = None
|
||||
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
||||
'player params', flags=re.DOTALL)
|
||||
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
|
||||
player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
|
||||
target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
|
||||
data = compat_urllib_parse_urlencode({'videoId': video_id,
|
||||
'playerId': player_id,
|
||||
'target': target,
|
||||
})
|
||||
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
|
||||
webpage, 'base info url',
|
||||
default='http://player.cnevids.com/player/loader.js?')
|
||||
info_url = base_info_url + data
|
||||
info_page = self._download_webpage(info_url, video_id,
|
||||
'Downloading video info')
|
||||
video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info')
|
||||
video_info = self._parse_json(video_info, video_id)
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(<[^>]+data-js="video-player"[^>]+>)',
|
||||
webpage, 'player params element'))
|
||||
query.update({
|
||||
'videoId': params['data-video'],
|
||||
'playerId': params['data-player'],
|
||||
'target': params['id'],
|
||||
})
|
||||
video_id = query['videoId']
|
||||
video_info = None
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/video.js',
|
||||
video_id, 'Downloading video info', query=query, fatal=False)
|
||||
if info_page:
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
|
||||
else:
|
||||
info_page = self._download_webpage(
|
||||
'http://player.cnevids.com/player/loader.js',
|
||||
video_id, 'Downloading loader info', query=query)
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
|
||||
title = video_info['title']
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
|
||||
'url': fdata['src'],
|
||||
'ext': fdata['type'].split('/')[-1],
|
||||
'quality': 1 if fdata['quality'] == 'high' else 0,
|
||||
} for fdata in video_info['sources'][0]]
|
||||
formats = []
|
||||
for fdata in video_info.get('sources', [{}])[0]:
|
||||
src = fdata.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
|
||||
quality = fdata.get('quality')
|
||||
formats.append({
|
||||
'format_id': ext + ('-%s' % quality if quality else ''),
|
||||
'url': src,
|
||||
'ext': ext,
|
||||
'quality': 1 if quality == 'high' else 0,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
info = self._search_json_ld(
|
||||
webpage, video_id, fatal=False) if url_type != 'embed' else {}
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_info['title'],
|
||||
'thumbnail': video_info['poster_frame'],
|
||||
'description': description,
|
||||
}
|
||||
'title': title,
|
||||
'thumbnail': video_info.get('poster_frame'),
|
||||
'uploader': video_info.get('brand'),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'tags': video_info.get('tags'),
|
||||
'series': video_info.get('series_title'),
|
||||
'season': video_info.get('season_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||
})
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site = mobj.group('site')
|
||||
url_type = mobj.group('type')
|
||||
item_id = mobj.group('id')
|
||||
site, url_type, item_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
# Convert JS embed to regular embed
|
||||
if url_type == 'embedjs':
|
||||
|
@@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
@@ -8,12 +8,22 @@ from ..utils import int_or_none
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
||||
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||
'info_dict': {
|
||||
'id': '2496419',
|
||||
'id': '2498934',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heavy Lies the Head',
|
||||
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
||||
'title': 'Everybody Respects A Bloody Nose',
|
||||
'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 906,
|
||||
'series': 'Comedians In Cars Getting Coffee',
|
||||
'season_number': 8,
|
||||
'episode_number': 4,
|
||||
'subtitles': {
|
||||
'en-US': [{
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -21,12 +31,8 @@ class CrackleIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
||||
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
||||
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
@@ -48,16 +54,21 @@ class CrackleIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config_doc = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
|
||||
video_id, 'Downloading config')
|
||||
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id).find('i')
|
||||
title = item.attrib['t']
|
||||
|
||||
thumbnail = None
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
thumbnail = None
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||
@@ -76,7 +87,7 @@ class CrackleIE(InfoExtractor):
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
subtitles[locale] = [{
|
||||
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
||||
'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v),
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
@@ -85,7 +96,7 @@ class CrackleIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': item.attrib.get('d'),
|
||||
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
||||
'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
|
@@ -114,6 +114,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
|
||||
'info_dict': {
|
||||
'id': '702409',
|
||||
'ext': 'mp4',
|
||||
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
|
||||
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'TV TOKYO',
|
||||
'upload_date': '20160508',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
@@ -336,9 +351,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
video_play_path = xpath_text(stream_info, './file')
|
||||
if not video_url or not video_play_path:
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
@@ -353,7 +377,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'url': direct_video_url,
|
||||
@@ -363,7 +387,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
format_info.update({
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
@@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor):
|
||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
}]
|
||||
|
||||
|
@@ -1,13 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, ExtractorError
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
IE_DESC = '華視新聞'
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||
@@ -16,7 +15,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
@@ -28,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'description': 'md5:f183feeba3752b683827aab71adad584',
|
||||
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
'upload_date': '20130903',
|
||||
@@ -36,8 +35,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
}, {
|
||||
# With Youtube embedded video
|
||||
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
||||
'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
|
||||
'add_ie': ['Youtube'],
|
||||
'md5': 'e4726b2ccd70ba2c319865e28f0a91d1',
|
||||
'info_dict': {
|
||||
'id': 'OVbfO7d0_hQ',
|
||||
'ext': 'mp4',
|
||||
@@ -47,42 +45,37 @@ class CtsNewsIE(InfoExtractor):
|
||||
'upload_date': '20150128',
|
||||
'uploader_id': 'TBSCTS',
|
||||
'uploader': '中華電視公司',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
|
||||
if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
|
||||
feed_url = self._html_search_regex(
|
||||
r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
|
||||
page, 'feed url')
|
||||
video_url = self._download_webpage(
|
||||
feed_url, news_id, note='Fetching feed')
|
||||
news_id = self._hidden_inputs(page).get('get_id')
|
||||
|
||||
if news_id:
|
||||
mp4_feed = self._download_json(
|
||||
'http://news.cts.com.tw/action/test_mp4feed.php',
|
||||
news_id, note='Fetching feed', query={'news_id': news_id})
|
||||
video_url = mp4_feed['source_url']
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
|
||||
default=None)
|
||||
if not youtube_url:
|
||||
raise ExtractorError('The news includes no videos!', expected=True)
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
description = self._html_search_meta('description', page)
|
||||
title = self._html_search_meta('title', page)
|
||||
title = self._html_search_meta('title', page, fatal=True)
|
||||
thumbnail = self._html_search_meta('image', page)
|
||||
|
||||
datetime_str = self._html_search_regex(
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
|
||||
# Transform into ISO 8601 format with timezone info
|
||||
datetime_str = datetime_str.replace('/', '-') + ':00+0800'
|
||||
timestamp = parse_iso8601(datetime_str, delimiter=' ')
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = unified_timestamp(datetime_str) - 8 * 3600
|
||||
|
||||
return {
|
||||
'id': news_id,
|
||||
|
@@ -1,11 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
@@ -18,13 +20,27 @@ class CTVIE(InfoExtractor):
|
||||
'timestamp': 1442624700,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
if domain == 'thecomedynetwork':
|
||||
domain = 'comedy'
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:ctv_web:%s' % video_id,
|
||||
'url': '9c9media:%s_web:%s' % (domain, video_id),
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
|
@@ -1,9 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
)
|
||||
|
||||
|
||||
class CultureUnpluggedIE(InfoExtractor):
|
||||
@@ -32,6 +36,9 @@ class CultureUnpluggedIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
# request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
|
||||
self._request_webpage(HEADRequest(
|
||||
'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
|
||||
movie_data = self._download_json(
|
||||
'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id)
|
||||
|
||||
|
120
youtube_dl/extractor/curiositystream.py
Normal file
120
youtube_dl/extractor/curiositystream.py
Normal file
@@ -0,0 +1,120 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CuriosityStreamBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'curiositystream'
|
||||
_auth_token = None
|
||||
_API_BASE_URL = 'https://api.curiositystream.com/v1/'
|
||||
|
||||
def _handle_errors(self, result):
|
||||
error = result.get('error', {}).get('message')
|
||||
if error:
|
||||
if isinstance(error, dict):
|
||||
error = ', '.join(error.values())
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
headers = {}
|
||||
if self._auth_token:
|
||||
headers['X-Auth-Token'] = self._auth_token
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + path, video_id, headers=headers)
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
def _real_initialize(self):
|
||||
(email, password) = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + 'login', None, data=urlencode_postdata({
|
||||
'email': email,
|
||||
'password': password,
|
||||
}))
|
||||
self._handle_errors(result)
|
||||
self._auth_token = result['message']['auth_token']
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
limelight_media_id = media['limelight_media_id']
|
||||
title = media['title']
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
if not sub_url:
|
||||
continue
|
||||
lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'limelight:media:' + limelight_media_id,
|
||||
'title': title,
|
||||
'description': media.get('description'),
|
||||
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'tags': media.get('tags'),
|
||||
'subtitles': subtitles,
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
'timestamp': 1448388615,
|
||||
'upload_date': '20151124',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
return self._extract_media_info(media)
|
||||
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_id = self._match_id(url)
|
||||
collection = self._call_api(
|
||||
'collections/' + collection_id, collection_id)
|
||||
entries = []
|
||||
for media in collection.get('media', []):
|
||||
entries.append(self._extract_media_info(media))
|
||||
return self.playlist_result(
|
||||
entries, collection_id,
|
||||
collection.get('title'), collection.get('description'))
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
@@ -28,7 +28,8 @@ class CWTVIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'redirect to http://cwtv.com/shows/arrow/',
|
||||
}, {
|
||||
'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
|
||||
'info_dict': {
|
||||
@@ -44,22 +45,43 @@ class CWTVIE(InfoExtractor):
|
||||
'upload_date': '20151006',
|
||||
'timestamp': 1444107300,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/132?format=json' % video_id, video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
|
||||
video_data = None
|
||||
formats = []
|
||||
for partner in (154, 213):
|
||||
vdata = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
|
||||
if not vdata:
|
||||
continue
|
||||
video_data = vdata
|
||||
for quality, quality_data in vdata.get('videos', {}).items():
|
||||
quality_url = quality_data.get('uri')
|
||||
if not quality_url:
|
||||
continue
|
||||
if quality == 'variantplaylist':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = int_or_none(quality_data.get('bitrate'))
|
||||
format_id = 'http' + ('-%d' % tbr if tbr else '')
|
||||
if self._is_valid_url(quality_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': quality_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
|
@@ -5,19 +5,20 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_protocol,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DailyMailIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
|
||||
'md5': '2f639d446394f53f3a33658b518b6615',
|
||||
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
|
||||
'md5': 'f6129624562251f628296c3a9ffde124',
|
||||
'info_dict': {
|
||||
'id': '1288527',
|
||||
'id': '1295863',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turn any video into an impressionist masterpiece',
|
||||
'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
|
||||
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
|
||||
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,7 +27,7 @@ class DailyMailIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||
title = video_data['title']
|
||||
title = unescapeHTML(video_data['title'])
|
||||
video_sources = self._download_json(video_data.get(
|
||||
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
||||
|
||||
@@ -55,7 +56,7 @@ class DailyMailIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('descr'),
|
||||
'description': unescapeHTML(video_data.get('descr')),
|
||||
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -331,7 +331,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
for video_id in re.findall(r'data-xid="(.+?)"', webpage):
|
||||
if video_id not in video_ids:
|
||||
yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
yield self.url_result(
|
||||
'http://www.dailymotion.com/video/%s' % video_id,
|
||||
DailymotionIE.ie_key(), video_id)
|
||||
video_ids.add(video_id)
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
|
@@ -4,78 +4,53 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
|
||||
'md5': '2e24f67936517b143a234b4cadf792ec',
|
||||
'info_dict': {
|
||||
'id': '33100',
|
||||
'id': '3649835190001',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1404039863.438,
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'timestamp': 1404039863,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
'uploader_id': '1027729757001',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew']
|
||||
}, {
|
||||
'url': 'http://dbtv.no/3649835190001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/vice/5000634109001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/filmtrailer/3359293614001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
|
||||
|
||||
video = data['playlist'][0]
|
||||
|
||||
formats = [{
|
||||
'url': f['URL'],
|
||||
'vcodec': f.get('container'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'vbr': float_or_none(f.get('rate'), 1000),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
} for f in video['renditions'] if 'URL' in f]
|
||||
|
||||
if not formats:
|
||||
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
|
||||
if url_key in video:
|
||||
formats.append({
|
||||
'url': video[url_key],
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
return {
|
||||
'id': compat_str(video['id']),
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
'thumbnail': video.get('splash') or video.get('thumb'),
|
||||
'timestamp': float_or_none(video.get('publishedAt'), 1000),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'categories': video.get('tags'),
|
||||
'formats': formats,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
116
youtube_dl/extractor/discoverygo.py
Normal file
116
youtube_dl/extractor/discoverygo.py
Normal file
@@ -0,0 +1,116 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class DiscoveryGoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocitychannel
|
||||
)go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'''
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
|
||||
'info_dict': {
|
||||
'id': '57a33c536b66d1cd0345eeb1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kiss First, Ask Questions Later!',
|
||||
'description': 'md5:fe923ba34050eae468bffae10831cb22',
|
||||
'duration': 2579,
|
||||
'series': 'Love at First Kiss',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'age_limit': 14,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
container = extract_attributes(
|
||||
self._search_regex(
|
||||
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
|
||||
webpage, 'video container'))
|
||||
|
||||
video = self._parse_json(
|
||||
unescapeHTML(container.get('data-video') or container.get('data-json')),
|
||||
display_id)
|
||||
|
||||
title = video['name']
|
||||
|
||||
stream = video.get('stream')
|
||||
if not stream:
|
||||
if video.get('authenticated') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream')
|
||||
STREAM_URL_SUFFIX = 'streamUrl'
|
||||
formats = []
|
||||
for stream_kind in ('', 'hds'):
|
||||
suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
|
||||
stream_url = stream.get('%s%s' % (stream_kind, suffix))
|
||||
if not stream_url:
|
||||
continue
|
||||
if stream_kind == '':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif stream_kind == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, display_id, f4m_id=stream_kind, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = video.get('id') or display_id
|
||||
description = video.get('description', {}).get('detailed')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
series = video.get('show', {}).get('name')
|
||||
season_number = int_or_none(video.get('season', {}).get('number'))
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
|
||||
tags = video.get('tags')
|
||||
age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
|
||||
|
||||
subtitles = {}
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = caption.get('fileUrl')
|
||||
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||
not subtitle_url.startswith('http')):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'tags': tags,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -10,18 +10,18 @@ from ..utils import (
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
'md5': '0914d4d69605090f623b7ac329fea66e',
|
||||
'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
|
||||
'md5': '21c7ff600f545358134fea762a6d42b6',
|
||||
'info_dict': {
|
||||
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09',
|
||||
'ext': 'flv',
|
||||
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
||||
'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||
'duration': 3169,
|
||||
'uploader': '4v4l0n42',
|
||||
'timestamp': 1292248482.625,
|
||||
'upload_date': '20101213',
|
||||
'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever',
|
||||
'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p',
|
||||
'duration': 198,
|
||||
'uploader': 'liuxt',
|
||||
'timestamp': 1385778501.104,
|
||||
'upload_date': '20131130',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
@@ -3,9 +3,17 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (ExtractorError, unescapeHTML)
|
||||
from ..compat import (compat_str, compat_basestring)
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
@@ -21,7 +29,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 're:.*m7show@163\.com.*',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@@ -37,7 +44,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'douyu小漠',
|
||||
'uploader_id': '3769985',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@@ -54,7 +60,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 're:.*m7show@163\.com.*',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@@ -65,6 +70,10 @@ class DouyuTVIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
|
||||
# is encrypted originally, but ffdec can dump memory to get the decrypted one.
|
||||
_API_KEY = 'A12Svb&%1UUmf@hC'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -75,74 +84,56 @@ class DouyuTVIE(InfoExtractor):
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||
|
||||
config = None
|
||||
# Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache"
|
||||
# Retry with different parameters - same parameters cause same errors
|
||||
for i in range(5):
|
||||
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||
room_id, int(time.time()))
|
||||
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||
room = self._download_json(
|
||||
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
|
||||
note='Downloading room info')['data']
|
||||
|
||||
config_page = self._download_webpage(
|
||||
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||
video_id)
|
||||
try:
|
||||
config = self._parse_json(config_page, video_id, fatal=False)
|
||||
except ExtractorError:
|
||||
# Wait some time before retrying to get a different time() value
|
||||
self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. '
|
||||
'Waiting for %(timeout)s seconds before retrying')
|
||||
continue
|
||||
else:
|
||||
break
|
||||
if config is None:
|
||||
raise ExtractorError('Unable to fetch API result')
|
||||
|
||||
data = config['data']
|
||||
|
||||
error_code = config.get('error', 0)
|
||||
if error_code is not 0:
|
||||
error_desc = 'Server reported error %i' % error_code
|
||||
if isinstance(data, (compat_str, compat_basestring)):
|
||||
error_desc += ': ' + data
|
||||
raise ExtractorError(error_desc, expected=True)
|
||||
|
||||
show_status = data.get('show_status')
|
||||
# 1 = live, 2 = offline
|
||||
if show_status == '2':
|
||||
if room.get('show_status') == '2':
|
||||
raise ExtractorError('Live stream is offline', expected=True)
|
||||
|
||||
tt = compat_str(int(time.time() / 60))
|
||||
did = uuid.uuid4().hex.upper()
|
||||
|
||||
sign_content = ''.join((room_id, did, self._API_KEY, tt))
|
||||
sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest()
|
||||
|
||||
flv_data = compat_urllib_parse_urlencode({
|
||||
'cdn': 'ws',
|
||||
'rate': '0',
|
||||
'tt': tt,
|
||||
'did': did,
|
||||
'sign': sign,
|
||||
})
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id,
|
||||
data=flv_data, note='Downloading video info',
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
error_code = video_info.get('error', 0)
|
||||
if error_code is not 0:
|
||||
raise ExtractorError(
|
||||
'Live stream is offline', expected=True)
|
||||
'%s reported error %i' % (self.IE_NAME, error_code),
|
||||
expected=True)
|
||||
|
||||
base_url = data['rtmp_url']
|
||||
live_path = data['rtmp_live']
|
||||
base_url = video_info['data']['rtmp_url']
|
||||
live_path = video_info['data']['rtmp_live']
|
||||
|
||||
title = self._live_title(unescapeHTML(data['room_name']))
|
||||
description = data.get('show_details')
|
||||
thumbnail = data.get('room_src')
|
||||
video_url = '%s/%s' % (base_url, live_path)
|
||||
|
||||
uploader = data.get('nickname')
|
||||
uploader_id = data.get('owner_uid')
|
||||
|
||||
multi_formats = data.get('rtmp_multi_bitrate')
|
||||
if not isinstance(multi_formats, dict):
|
||||
multi_formats = {}
|
||||
multi_formats['live'] = live_path
|
||||
|
||||
formats = [{
|
||||
'url': '%s/%s' % (base_url, format_path),
|
||||
'format_id': format_id,
|
||||
'preference': 1 if format_id == 'live' else 0,
|
||||
} for format_id, format_path in multi_formats.items()]
|
||||
self._sort_formats(formats)
|
||||
title = self._live_title(unescapeHTML(room['room_name']))
|
||||
description = room.get('notice')
|
||||
thumbnail = room.get('room_src')
|
||||
uploader = room.get('nickname')
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@@ -17,8 +17,12 @@ class DreiSatIE(ZDFIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'uploader': '3sat',
|
||||
'uploader': 'SCHWEIZWEIT',
|
||||
'uploader_id': '100000210',
|
||||
'upload_date': '20140913'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class DrTuberIE(InfoExtractor):
|
||||
@@ -17,7 +20,6 @@ class DrTuberIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'hot perky blonde naked golf',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
@@ -36,25 +38,29 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'],
|
||||
(r'class="title_watch"[^>]*><p>([^<]+)<',
|
||||
r'<p[^>]+class="title_substrate">([^<]+)</p>',
|
||||
r'<title>([^<]+) - \d+'),
|
||||
webpage, 'title')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
def extract_count(id_, name):
|
||||
def extract_count(id_, name, default=NO_DEFAULT):
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
|
||||
webpage, '%s count' % name, fatal=False))
|
||||
webpage, '%s count' % name, default=default, fatal=False))
|
||||
|
||||
like_count = extract_count('rate_likes', 'like')
|
||||
dislike_count = extract_count('rate_dislikes', 'dislike')
|
||||
dislike_count = extract_count('rate_dislikes', 'dislike', default=None)
|
||||
comment_count = extract_count('comments_count', 'comment')
|
||||
|
||||
cats_str = self._search_regex(
|
||||
r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||
r'<div[^>]+class="categories_list">(.+?)</div>',
|
||||
webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(
|
||||
r'<a title="([^"]+)"', cats_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -4,26 +4,45 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
|
||||
'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
'info_dict': {
|
||||
'id': 'panisk-paske-5',
|
||||
'id': 'klassen-darlig-taber-10',
|
||||
'ext': 'mp4',
|
||||
'title': 'Panisk Påske (5)',
|
||||
'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
|
||||
'timestamp': 1426984612,
|
||||
'upload_date': '20150322',
|
||||
'duration': 1455,
|
||||
'title': 'Klassen - Dårlig taber (10)',
|
||||
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
|
||||
'timestamp': 1471991907,
|
||||
'upload_date': '20160823',
|
||||
'duration': 606.84,
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||
'md5': '2c37175c718155930f939ef59952474a',
|
||||
'info_dict': {
|
||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
||||
'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
|
||||
'timestamp': 1472800279,
|
||||
'upload_date': '20160902',
|
||||
'duration': 131.4,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -35,7 +54,8 @@ class DRTVIE(InfoExtractor):
|
||||
'Video %s is not available' % video_id, expected=True)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id')
|
||||
|
||||
programcard = self._download_json(
|
||||
@@ -43,9 +63,12 @@ class DRTVIE(InfoExtractor):
|
||||
video_id, 'Downloading video JSON')
|
||||
data = programcard['Data'][0]
|
||||
|
||||
title = data['Title']
|
||||
description = data['Description']
|
||||
timestamp = parse_iso8601(data['CreatedTime'])
|
||||
title = remove_end(self._og_search_title(
|
||||
webpage, default=None), ' | TV | DR') or data['Title']
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or data.get('Description')
|
||||
|
||||
timestamp = parse_iso8601(data.get('CreatedTime'))
|
||||
|
||||
thumbnail = None
|
||||
duration = None
|
||||
@@ -56,16 +79,18 @@ class DRTVIE(InfoExtractor):
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
if asset['Kind'] == 'Image':
|
||||
thumbnail = asset['Uri']
|
||||
elif asset['Kind'] == 'VideoResource':
|
||||
duration = asset['DurationInMilliseconds'] / 1000.0
|
||||
restricted_to_denmark = asset['RestrictedToDenmark']
|
||||
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
|
||||
for link in asset['Links']:
|
||||
uri = link['Uri']
|
||||
target = link['Target']
|
||||
format_id = target
|
||||
if asset.get('Kind') == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
elif asset.get('Kind') == 'VideoResource':
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
||||
for link in asset.get('Links', []):
|
||||
uri = link.get('Uri')
|
||||
if not uri:
|
||||
continue
|
||||
target = link.get('Target')
|
||||
format_id = target or ''
|
||||
preference = None
|
||||
if spoken_subtitles:
|
||||
preference = -1
|
||||
@@ -76,8 +101,8 @@ class DRTVIE(InfoExtractor):
|
||||
video_id, preference, f4m_id=format_id))
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', preference=preference,
|
||||
m3u8_id=format_id))
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=preference, m3u8_id=format_id))
|
||||
else:
|
||||
bitrate = link.get('Bitrate')
|
||||
if bitrate:
|
||||
@@ -85,7 +110,7 @@ class DRTVIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': format_id,
|
||||
'tbr': bitrate,
|
||||
'tbr': int_or_none(bitrate),
|
||||
'ext': link.get('FileFormat'),
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
@@ -94,12 +119,18 @@ class DRTVIE(InfoExtractor):
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
lang = subs['Language']
|
||||
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
|
||||
if not subs.get('Uri'):
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': subs['Uri'],
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
raise ExtractorError(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
|
||||
self.raise_geo_restricted(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.',
|
||||
expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -52,11 +52,24 @@ class EaglePlatformIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# Regular iframe embedding
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
# Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
|
||||
mobj = re.search(
|
||||
r'''(?xs)
|
||||
<script[^>]+
|
||||
src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
|
||||
.+?
|
||||
<div[^>]+
|
||||
class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
|
||||
data-id=["\'](?P<id>\d+)
|
||||
''', webpage)
|
||||
if mobj is not None:
|
||||
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
|
||||
|
||||
@staticmethod
|
||||
def _handle_error(response):
|
||||
|
@@ -6,12 +6,13 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
)
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
|
||||
'md5': '4294cf98bc165f218aaa0b89e0fd8042',
|
||||
'info_dict': {
|
||||
@@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor):
|
||||
'timestamp': 1428035648,
|
||||
'upload_date': '20150403',
|
||||
'uploader_id': 'batchUser',
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# not available via http://widgets.ellentube.com/
|
||||
'url': 'http://www.ellentv.com/videos/1-szkgu2m2/',
|
||||
'info_dict': {
|
||||
'id': '1_szkgu2m2',
|
||||
'ext': 'flv',
|
||||
'title': "Ellen's Amazingly Talented Audience",
|
||||
'description': 'md5:86ff1e376ff0d717d7171590e273f0a5',
|
||||
'timestamp': 1255140900,
|
||||
'upload_date': '20091010',
|
||||
'uploader_id': 'ellenkaltura@gmail.com',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://widgets.ellentube.com/videos/%s' % video_id,
|
||||
video_id)
|
||||
URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id')
|
||||
for num, url_ in enumerate(URLS, 1):
|
||||
webpage = self._download_webpage(
|
||||
url_, video_id, fatal=num == len(URLS))
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
[r'id="kaltura_player_([^"]+)"',
|
||||
r"_wb_entry_id\s*:\s*'([^']+)",
|
||||
r'data-kaltura-entry-id="([^"]+)'],
|
||||
webpage, 'kaltura id')
|
||||
default = NO_DEFAULT if num == len(URLS) else None
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id',
|
||||
default=default)
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
[r'id="kaltura_player_([^"]+)"',
|
||||
r"_wb_entry_id\s*:\s*'([^']+)",
|
||||
r'data-kaltura-entry-id="([^"]+)'],
|
||||
webpage, 'kaltura id', default=default)
|
||||
|
||||
if partner_id and kaltura_id:
|
||||
break
|
||||
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
|
||||
|
||||
|
@@ -4,9 +4,10 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
'url': 'http://www.engadget.com/video/518153925/',
|
||||
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
||||
'info_dict': {
|
||||
@@ -15,8 +16,12 @@ class EngadgetIE(InfoExtractor):
|
||||
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
|
||||
},
|
||||
'add_ie': ['FiveMin'],
|
||||
}
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
return self.url_result('aol-video:%s' % video_id)
|
||||
|
@@ -4,19 +4,23 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
'info_dict': {
|
||||
'id': '95008',
|
||||
'id': 'qlDUmNsj6VS',
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
@@ -28,34 +32,72 @@ class EpornerIE(InfoExtractor):
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?) - EPORNER', webpage, 'title')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
|
||||
player_code = self._download_webpage(
|
||||
redirect_url, display_id, note='Downloading player config')
|
||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
||||
|
||||
sources = self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
|
||||
hash = self._search_regex(
|
||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?) - EPORNER', webpage, 'title')
|
||||
|
||||
# Reverse engineered from vjs.js
|
||||
def calc_hash(s):
|
||||
return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.eporner.com/xhr/video/%s' % video_id,
|
||||
display_id, note='Downloading video JSON',
|
||||
query={
|
||||
'hash': calc_hash(hash),
|
||||
'device': 'generic',
|
||||
'domain': 'www.eporner.com',
|
||||
'fallback': 'false',
|
||||
})
|
||||
|
||||
if video.get('available') is False:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, video['message']), expected=True)
|
||||
|
||||
sources = video['sources']
|
||||
|
||||
formats = []
|
||||
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(\d+)', format_id)
|
||||
if m:
|
||||
fmt['height'] = int(m.group(1))
|
||||
formats.append(fmt)
|
||||
for kind, formats_dict in sources.items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=kind, fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', format_id, 'height', default=None))
|
||||
fps = int_or_none(self._search_regex(
|
||||
r'(\d+)fps', format_id, 'fps', default=None))
|
||||
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
@@ -5,7 +5,7 @@ from ..utils import remove_end
|
||||
|
||||
|
||||
class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
@@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip?id=10365079',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,58 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ExfmIE(InfoExtractor):
|
||||
IE_NAME = 'exfm'
|
||||
IE_DESC = 'ex.fm'
|
||||
_VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
|
||||
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://ex.fm/song/eh359',
|
||||
'md5': 'e45513df5631e6d760970b14cc0c11e7',
|
||||
'info_dict': {
|
||||
'id': '44216187',
|
||||
'ext': 'mp3',
|
||||
'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive',
|
||||
'uploader': 'deadjournalist',
|
||||
'upload_date': '20120424',
|
||||
'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||
},
|
||||
'note': 'Soundcloud song',
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
{
|
||||
'url': 'http://ex.fm/song/wddt8',
|
||||
'md5': '966bd70741ac5b8570d8e45bfaed3643',
|
||||
'info_dict': {
|
||||
'id': 'wddt8',
|
||||
'ext': 'mp3',
|
||||
'title': 'Safe and Sound',
|
||||
'uploader': 'Capital Cities',
|
||||
},
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group('id')
|
||||
info_url = 'http://ex.fm/api/v3/song/%s' % song_id
|
||||
info = self._download_json(info_url, song_id)['song']
|
||||
song_url = info['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream', ''), 'Soundcloud')
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
'ext': 'mp3',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['image']['large'],
|
||||
'uploader': info['artist'],
|
||||
'view_count': info['loved_count'],
|
||||
}
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -12,23 +10,22 @@ from ..utils import (
|
||||
class ExpoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561',
|
||||
'md5': '2985e6d7a392b2f7a05e0ca350fe41d0',
|
||||
'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
|
||||
'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
|
||||
'info_dict': {
|
||||
'id': '17561',
|
||||
'id': '667916',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20060212',
|
||||
'title': 'My Favorite Online Scrapbook Store',
|
||||
'view_count': int,
|
||||
'description': 'You\'ll find most everything you need at this virtual store front.',
|
||||
'uploader': 'Anna T.',
|
||||
'title': 'NYX Butter Lipstick Little Susie',
|
||||
'description': 'Goes on like butter, but looks better!',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Stephanie S.',
|
||||
'upload_date': '20150520',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_key = self._search_regex(
|
||||
@@ -66,7 +63,7 @@ class ExpoTVIE(InfoExtractor):
|
||||
fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
|
||||
fatal=False))
|
||||
fatal=False), day_first=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,7 +1,10 @@
|
||||
# flake8: noqa
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .abc import ABCIE
|
||||
from .abc import (
|
||||
ABCIE,
|
||||
ABCIViewIE,
|
||||
)
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .abcnews import (
|
||||
AbcNewsIE,
|
||||
@@ -29,6 +32,7 @@ from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
@@ -44,6 +48,7 @@ from .appletrailers import (
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
@@ -66,6 +71,12 @@ from .atttechchannel import ATTTechChannelIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audioboom import AudioBoomIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .awaan import (
|
||||
AWAANIE,
|
||||
AWAANVideoIE,
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@@ -116,9 +127,12 @@ from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
)
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
CBCWatchVideoIE,
|
||||
CBCWatchIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
@@ -132,6 +146,7 @@ from .ccc import CCCIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .charlierose import CharlieRoseIE
|
||||
from .chaturbate import ChaturbateIE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .chirbit import (
|
||||
@@ -156,7 +171,12 @@ from .cnn import (
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralIE,
|
||||
ComedyCentralShortnameIE,
|
||||
ComedyCentralTVIE,
|
||||
ToshIE,
|
||||
)
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
@@ -174,6 +194,10 @@ from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
CuriosityStreamIE,
|
||||
CuriosityStreamCollectionIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
@@ -189,12 +213,6 @@ from .daum import (
|
||||
DaumUserIE,
|
||||
)
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import (
|
||||
DCNIE,
|
||||
DCNVideoIE,
|
||||
DCNLiveIE,
|
||||
DCNSeasonIE,
|
||||
)
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .democracynow import DemocracynowIE
|
||||
@@ -215,6 +233,7 @@ from .dvtv import DVTVIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .discoverygo import DiscoveryGoIE
|
||||
from .dispeak import DigitallySpeakingIE
|
||||
from .dropbox import DropboxIE
|
||||
from .dw import (
|
||||
@@ -242,13 +261,18 @@ from .espn import ESPNIE
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import FacebookIE
|
||||
from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPluginsVideoIE,
|
||||
)
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fc2 import (
|
||||
FC2IE,
|
||||
FC2EmbedIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
@@ -256,18 +280,19 @@ from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .flipagram import FlipagramIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import (
|
||||
FranceCultureIE,
|
||||
FranceCultureEmissionIE,
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsInsiderIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
PluzzIE,
|
||||
@@ -282,8 +307,8 @@ from .freevideo import FreeVideoIE
|
||||
from .funimation import FunimationIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
GameOnePlaylistIE,
|
||||
@@ -302,9 +327,9 @@ from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .googleplus import GooglePlusIE
|
||||
@@ -319,6 +344,10 @@ from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import (
|
||||
HGTVIE,
|
||||
HGTVComShowIE,
|
||||
)
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
@@ -392,6 +421,10 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
LcpIE,
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
@@ -466,11 +499,11 @@ from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
@@ -482,8 +515,9 @@ from .myvi import MyviIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicVideoIE,
|
||||
NationalGeographicIE,
|
||||
NationalGeographicChannelIE,
|
||||
NationalGeographicEpisodeGuideIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
@@ -520,9 +554,9 @@ from .nextmedia import (
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyIE,
|
||||
)
|
||||
from .nextmovie import NextMovieIE
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhk import NhkVodIE
|
||||
from .nhl import (
|
||||
NHLVideocenterIE,
|
||||
NHLNewsIE,
|
||||
@@ -534,8 +568,13 @@ from .nick import (
|
||||
NickDeIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaStackIE,
|
||||
NineCNineMediaIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
@@ -580,6 +619,7 @@ from .nytimes import (
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .onet import (
|
||||
@@ -613,7 +653,6 @@ from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
@@ -624,8 +663,10 @@ from .pluralsight import (
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import PokemonIE
|
||||
from .polskieradio import PolskieRadioIE
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
@@ -679,16 +720,19 @@ from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .roosterteeth import RoosterTeethIE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rozhlas import RozhlasIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE, RteRadioIE
|
||||
from .rtlnl import RtlNlIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
from .rutube import (
|
||||
@@ -739,6 +783,7 @@ from .smotri import (
|
||||
)
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sonyliv import SonyLIVIE
|
||||
from .soundcloud import (
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
@@ -775,9 +820,9 @@ from .srgssr import (
|
||||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
@@ -793,8 +838,8 @@ from .tagesschau import (
|
||||
TagesschauPlayerIE,
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
@@ -837,16 +882,11 @@ from .tnaflix import (
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
THVideoPlaylistIE
|
||||
)
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trollvids import TrollvidsIE
|
||||
from .trutube import TruTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
from .tudou import (
|
||||
@@ -877,10 +917,14 @@ from .tvc import (
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
)
|
||||
from .tvplay import TVPlayIE
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
)
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
@@ -909,8 +953,14 @@ from .udemy import (
|
||||
from .udn import UDNEmbedIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .uol import UOLIE
|
||||
from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usanetwork import USANetworkIE
|
||||
from .usatoday import USATodayIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import (
|
||||
@@ -937,6 +987,7 @@ from .vice import (
|
||||
ViceIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .viceland import VicelandIE
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
@@ -986,9 +1037,11 @@ from .viki import (
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
from .voxmedia import VoxMediaIE
|
||||
from .vporn import VpornIE
|
||||
@@ -1081,8 +1134,4 @@ from .youtube import (
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
from .zippcast import ZippCastIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
|
@@ -1,20 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
from ..utils import str_to_int
|
||||
from .keezmovies import KeezMoviesIE
|
||||
|
||||
|
||||
class ExtremeTubeIE(InfoExtractor):
|
||||
class ExtremeTubeIE(KeezMoviesIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
|
||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||
'info_dict': {
|
||||
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'ext': 'mp4',
|
||||
@@ -35,58 +29,22 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, info = self._extract_info(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
if not info['title']:
|
||||
info['title'] = self._search_regex(
|
||||
r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
uploader = self._html_search_regex(
|
||||
r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
flash_vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for quality_key, video_url in flash_vars.items():
|
||||
height = int_or_none(self._search_regex(
|
||||
r'quality_(\d+)[pP]$', quality_key, 'height', default=None))
|
||||
if not height:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
}
|
||||
mobj = re.search(
|
||||
r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
f.update({
|
||||
'format_id': '%dp-%dk' % (height, bitrate),
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'format_id': '%dp' % height,
|
||||
'height': height,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
info.update({
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
@@ -15,6 +15,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
@@ -27,7 +28,7 @@ class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
(?:\w+\.)?facebook\.com/
|
||||
(?:[\w-]+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
@@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
'uploader': 'Tennis on Facebook',
|
||||
'upload_date': '20140908',
|
||||
'timestamp': 1410199200,
|
||||
}
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
@@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
@@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
'md5': '54706e4db4f5ad58fbad82dde1f1213f',
|
||||
'md5': 'b2c28d528273b323abe5c6ab59f0f030',
|
||||
'info_dict': {
|
||||
'id': '957955867617029',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||
'uploader': 'Demy de Zeeuw',
|
||||
'upload_date': '20160110',
|
||||
'timestamp': 1452431627,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
@@ -127,6 +134,9 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -219,12 +229,25 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
|
||||
if m:
|
||||
swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
|
||||
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
|
||||
|
||||
for m in re.findall(PATTERN, webpage):
|
||||
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
|
||||
data = dict(json.loads(swf_params))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data = json.loads(params_raw)['video_data']
|
||||
video_data_candidate = json.loads(params_raw)['video_data']
|
||||
for _, f in video_data_candidate.items():
|
||||
if not f:
|
||||
continue
|
||||
if isinstance(f, dict):
|
||||
f = [f]
|
||||
if not isinstance(f, list):
|
||||
continue
|
||||
if f[0].get('video_id') == video_id:
|
||||
video_data = video_data_candidate
|
||||
break
|
||||
if video_data:
|
||||
break
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
@@ -290,12 +313,16 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
@@ -324,3 +351,32 @@ class FacebookIE(InfoExtractor):
|
||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||
video_id, fatal_if_no_video=True)
|
||||
return info_dict
|
||||
|
||||
|
||||
class FacebookPluginsVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
|
||||
'md5': '5954e92cdfe51fe5782ae9bda7058a07',
|
||||
'info_dict': {
|
||||
'id': '10154383743583686',
|
||||
'ext': 'mp4',
|
||||
'title': 'What to do during the haze?',
|
||||
'uploader': 'Gov.sg',
|
||||
'upload_date': '20160826',
|
||||
'timestamp': 1472184808,
|
||||
},
|
||||
'add_ie': [FacebookIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
compat_urllib_parse_unquote(self._match_id(url)),
|
||||
FacebookIE.ie_key())
|
||||
|
@@ -1,10 +1,12 @@
|
||||
#! -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -16,7 +18,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
@@ -75,12 +77,17 @@ class FC2IE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._login()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
webpage = None
|
||||
if not url.startswith('fc2:'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
title = 'FC2 video %s' % video_id
|
||||
thumbnail = None
|
||||
if webpage is not None:
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
|
||||
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
@@ -113,3 +120,41 @@ class FC2IE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class FC2EmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
|
||||
IE_NAME = 'fc2:embed'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
|
||||
'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
|
||||
'info_dict': {
|
||||
'id': '201403223kCqB3Ez',
|
||||
'ext': 'flv',
|
||||
'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query = compat_parse_qs(mobj.group('query'))
|
||||
|
||||
video_id = query['i'][-1]
|
||||
title = query.get('tl', ['FC2 video %s' % video_id])[0]
|
||||
|
||||
sj = query.get('sj', [None])[0]
|
||||
thumbnail = None
|
||||
if sj:
|
||||
# See thumbnailImagePath() in ServerConst.as of flv2.swf
|
||||
thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
|
||||
sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FC2IE.ie_key(),
|
||||
'url': 'fc2:%s' % video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -2,44 +2,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_xpath
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
xpath_attr,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = '1tv'
|
||||
IE_DESC = 'Первый канал'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single format via video_materials.json API
|
||||
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
|
||||
'md5': '82a2777648acae812d58b3f5bd42882b',
|
||||
# single format
|
||||
'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
|
||||
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||
'info_dict': {
|
||||
'id': '35930',
|
||||
'id': '40049',
|
||||
'ext': 'mp4',
|
||||
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
|
||||
'description': 'md5:357933adeede13b202c7c21f91b871b2',
|
||||
'description': 'md5:36a39c1d19618fec57d12efe212a8370',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20150212',
|
||||
'duration': 2694,
|
||||
},
|
||||
}, {
|
||||
# multiple formats via video_materials.json API
|
||||
'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641',
|
||||
# multiple formats
|
||||
'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
|
||||
'info_dict': {
|
||||
'id': '113641',
|
||||
'id': '364746',
|
||||
'ext': 'mp4',
|
||||
'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
|
||||
'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2',
|
||||
'description': 'md5:a242eea0031fd180a4497d52640a9572',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20160407',
|
||||
'duration': 179,
|
||||
@@ -48,84 +44,47 @@ class FirstTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API
|
||||
'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038',
|
||||
'md5': '519d306c5b5669761fd8906c39dbee23',
|
||||
'info_dict': {
|
||||
'id': '47038',
|
||||
'ext': 'mp4',
|
||||
'title': '"Побег". Второй сезон. 3 серия',
|
||||
'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'upload_date': '20120516',
|
||||
'duration': 3080,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/videoarchive/9967',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
# Videos with multiple formats only available via this API
|
||||
video = self._download_json(
|
||||
'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id,
|
||||
video_id, fatal=False)
|
||||
|
||||
description, thumbnail, upload_date, duration = [None] * 4
|
||||
|
||||
if video:
|
||||
item = video[0]
|
||||
title = item['title']
|
||||
quality = qualities(('ld', 'sd', 'hd', ))
|
||||
formats = [{
|
||||
'url': f['src'],
|
||||
'format_id': f.get('name'),
|
||||
'quality': quality(f.get('name')),
|
||||
} for f in item['mbr'] if f.get('src')]
|
||||
thumbnail = item.get('poster')
|
||||
else:
|
||||
# Some videos are not available via video_materials.json
|
||||
video = self._download_xml(
|
||||
'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id,
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
item = xpath_element(video, './channel/item', fatal=True)
|
||||
title = xpath_text(item, './title', fatal=True)
|
||||
formats = [{
|
||||
'url': content.attrib['url'],
|
||||
} for content in item.findall(
|
||||
compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')]
|
||||
thumbnail = xpath_attr(
|
||||
item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
playlist_url = compat_urlparse.urljoin(url, self._search_regex(
|
||||
r'data-playlist-url="([^"]+)', webpage, 'playlist url'))
|
||||
|
||||
item = self._download_json(playlist_url, display_id)[0]
|
||||
video_id = item['id']
|
||||
quality = qualities(('ld', 'sd', 'hd', ))
|
||||
formats = []
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src:
|
||||
continue
|
||||
fname = f.get('name')
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': fname,
|
||||
'quality': quality(fname),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False)
|
||||
if webpage:
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"),
|
||||
webpage, 'title', default=None) or title
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"),
|
||||
webpage, 'title', default=None) or item['title']
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
|
@@ -1,24 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
replace_extension,
|
||||
)
|
||||
|
||||
|
||||
class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
|
||||
_VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -29,8 +16,16 @@ class FiveMinIE(InfoExtractor):
|
||||
'id': '518013791',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPad Mini with Retina Display Review',
|
||||
'description': 'iPad mini with Retina Display review',
|
||||
'duration': 177,
|
||||
'uploader': 'engadget',
|
||||
'upload_date': '20131115',
|
||||
'timestamp': 1384515288,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
|
||||
@@ -44,108 +39,16 @@ class FiveMinIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
},
|
||||
{
|
||||
'url': 'http://embed.5min.com/518726732/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://delivery.vidible.tv/aol?playList=518013791',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
_ERRORS = {
|
||||
'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.',
|
||||
'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.',
|
||||
'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.',
|
||||
'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.',
|
||||
'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
|
||||
'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
|
||||
}
|
||||
_QUALITIES = {
|
||||
1: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
2: {
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
4: {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
8: {
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
},
|
||||
16: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
32: {
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
},
|
||||
64: {
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
},
|
||||
128: {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
sid = mobj.group('sid')
|
||||
|
||||
if mobj.group('query'):
|
||||
qs = compat_parse_qs(mobj.group('query'))
|
||||
if not qs.get('playList'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
video_id = qs['playList'][0]
|
||||
if qs.get('sid'):
|
||||
sid = qs['sid'][0]
|
||||
|
||||
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
||||
if not sid:
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed page')
|
||||
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
||||
|
||||
response = self._download_json(
|
||||
'https://syn.5min.com/handlers/SenseHandler.ashx?' +
|
||||
compat_urllib_parse_urlencode({
|
||||
'func': 'GetResults',
|
||||
'playlist': video_id,
|
||||
'sid': sid,
|
||||
'isPlayerSeed': 'true',
|
||||
'url': embed_url,
|
||||
}),
|
||||
video_id)
|
||||
if not response['success']:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (
|
||||
self.IE_NAME,
|
||||
self._ERRORS.get(response['errorMessage'], response['errorMessage'])),
|
||||
expected=True)
|
||||
info = response['binding'][0]
|
||||
|
||||
formats = []
|
||||
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
|
||||
for rendition in info['Renditions']:
|
||||
if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
|
||||
continue
|
||||
else:
|
||||
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
|
||||
quality = self._QUALITIES.get(rendition['ID'], {})
|
||||
formats.append({
|
||||
'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']),
|
||||
'url': rendition_url,
|
||||
'width': quality.get('width'),
|
||||
'height': quality.get('height'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['Title'],
|
||||
'thumbnail': info.get('ThumbURL'),
|
||||
'duration': parse_duration(info.get('Duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('aol-video:%s' % video_id)
|
||||
|
115
youtube_dl/extractor/flipagram.py
Normal file
115
youtube_dl/extractor/flipagram.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FlipagramIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||
'md5': '888dcf08b7ea671381f00fab74692755',
|
||||
'info_dict': {
|
||||
'id': 'nyvTSJMKId',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
|
||||
'duration': 35.571,
|
||||
'timestamp': 1461244995,
|
||||
'upload_date': '20160421',
|
||||
'uploader': 'kitty juria',
|
||||
'uploader_id': 'sjuria101',
|
||||
'creator': 'kitty juria',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
|
||||
video_id)
|
||||
|
||||
flipagram = video_data['flipagram']
|
||||
video = flipagram['video']
|
||||
|
||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
title = json_ld.get('title') or flipagram['captionText']
|
||||
description = json_ld.get('description') or flipagram.get('captionText')
|
||||
|
||||
formats = [{
|
||||
'url': video['url'],
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': int_or_none(video_data.get('size')),
|
||||
}]
|
||||
|
||||
preview_url = try_get(
|
||||
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
|
||||
if preview_url:
|
||||
formats.append({
|
||||
'url': preview_url,
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
counts = flipagram.get('counts', {})
|
||||
user = flipagram.get('user', {})
|
||||
video_data = flipagram.get('video', {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(cover['url']),
|
||||
'width': int_or_none(cover.get('width')),
|
||||
'height': int_or_none(cover.get('height')),
|
||||
'filesize': int_or_none(cover.get('size')),
|
||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||
|
||||
# Note that this only retrieves comments that are initally loaded.
|
||||
# For videos with large amounts of comments, most won't be retrieved.
|
||||
comments = []
|
||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||
text = comment.get('comment')
|
||||
if not text or not isinstance(text, list):
|
||||
continue
|
||||
comments.append({
|
||||
'author': comment.get('user', {}).get('name'),
|
||||
'author_id': comment.get('user', {}).get('username'),
|
||||
'id': comment.get('id'),
|
||||
'text': text[0],
|
||||
'timestamp': unified_timestamp(comment.get('created')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': float_or_none(flipagram.get('duration'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
|
||||
'uploader': user.get('name'),
|
||||
'uploader_id': user.get('username'),
|
||||
'creator': user.get('name'),
|
||||
'view_count': int_or_none(counts.get('plays')),
|
||||
'like_count': int_or_none(counts.get('likes')),
|
||||
'repost_count': int_or_none(counts.get('reflips')),
|
||||
'comment_count': int_or_none(counts.get('comments')),
|
||||
'comments': comments,
|
||||
'formats': formats,
|
||||
}
|
@@ -5,8 +5,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class Formula1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
@@ -15,7 +15,10 @@ class Formula1IE(InfoExtractor):
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
@@ -43,14 +43,14 @@ class FourTubeIE(InfoExtractor):
|
||||
'uploadDate', webpage))
|
||||
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
||||
r'<a class="item-to-subscribe" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
||||
r'<a class="item-to-subscribe" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
categories_html = self._search_regex(
|
||||
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>',
|
||||
r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="[^"]*?list[^"]*?">(.*?)</ul>',
|
||||
webpage, 'categories', fatal=False)
|
||||
categories = None
|
||||
if categories_html:
|
||||
@@ -59,10 +59,10 @@ class FourTubeIE(InfoExtractor):
|
||||
r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserPlays:([0-9,]+)">',
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserLikes:([0-9,]+)">',
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
|
||||
webpage, 'like count', fatal=False))
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
||||
|
@@ -2,7 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FOXIE(InfoExtractor):
|
||||
@@ -29,11 +32,12 @@ class FOXIE(InfoExtractor):
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url'] + '&switch=http'
|
||||
video_id)['release_url']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(release_url, {'force_smil_url': True}),
|
||||
'url': smuggle_url(update_url_query(
|
||||
release_url, {'switch': 'http'}), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user