Compare commits
645 Commits
2016.05.21
...
2016.07.26
Author | SHA1 | Date | |
---|---|---|---|
|
2a73a95b8c | ||
|
3c519ad54d | ||
|
3e050d51d4 | ||
|
ced70c8640 | ||
|
9a700deea4 | ||
|
dc35ba0eba | ||
|
88bd486b9a | ||
|
7f8b92e3cf | ||
|
35f6e0ff36 | ||
|
326fa4e6e5 | ||
|
c74299a72c | ||
|
10a1bb3a78 | ||
|
4d3e543c73 | ||
|
05d1e7aaa9 | ||
|
a3aa814b77 | ||
|
5c32a77cad | ||
|
14a28e705b | ||
|
cc99d4f826 | ||
|
712c7530ff | ||
|
0a147785e8 | ||
|
59eaf69e33 | ||
|
e8be2943a7 | ||
|
8fdc538b46 | ||
|
9513c1eb17 | ||
|
ae6fff4e64 | ||
|
5a65668e25 | ||
|
f75e6890db | ||
|
d9cb92c840 | ||
|
94c04a3c79 | ||
|
f094834857 | ||
|
111de00289 | ||
|
b4a131e1a5 | ||
|
f1991ce928 | ||
|
6548030a17 | ||
|
3a8947650b | ||
|
1979969f91 | ||
|
0673741af3 | ||
|
c8e170b209 | ||
|
bbe1f3634a | ||
|
4671dd41b2 | ||
|
f164b97123 | ||
|
5275efe30d | ||
|
b13647cf3c | ||
|
add7d2a0e2 | ||
|
e298d3a08c | ||
|
fd8c8c7dcd | ||
|
9158af16cc | ||
|
c6668e4ad1 | ||
|
84e8cca48b | ||
|
790b06b7d4 | ||
|
740d7c49c2 | ||
|
4e51ec5f57 | ||
|
05087d1b4c | ||
|
a66a73ee90 | ||
|
8188b923db | ||
|
d993a1354d | ||
|
e8882e7043 | ||
|
1056821799 | ||
|
890e6d3309 | ||
|
246080d378 | ||
|
b1ea680270 | ||
|
45550d1039 | ||
|
7cdfc4c90f | ||
|
af21f56f98 | ||
|
1a8f0773b6 | ||
|
59cc5bd8bf | ||
|
49bc16b95e | ||
|
a2f9ca1e67 | ||
|
371ddb14fe | ||
|
998895dffa | ||
|
aadd3ce21f | ||
|
ae7b846203 | ||
|
21ba7d0981 | ||
|
691fbe7f98 | ||
|
2e221ca3a8 | ||
|
317f7ab634 | ||
|
23495d6a39 | ||
|
224db034ab | ||
|
ad27649be3 | ||
|
84571be645 | ||
|
7b0d333a7e | ||
|
342f0c3682 | ||
|
38e0f16a94 | ||
|
e910fe2fe4 | ||
|
233b58dec7 | ||
|
c39b2ed990 | ||
|
35ec86689c | ||
|
c485959034 | ||
|
a0560d8ab8 | ||
|
0385aa6199 | ||
|
00f4764cb7 | ||
|
51c2cd0b83 | ||
|
5f5a9d6158 | ||
|
2d19fb5072 | ||
|
9d865a1af6 | ||
|
41aa44259d | ||
|
381ff44756 | ||
|
7f29cf545a | ||
|
7d1219f3e0 | ||
|
f1b4af7d79 | ||
|
8a8590a617 | ||
|
4a7a5e41f7 | ||
|
2a49d01600 | ||
|
b99af8a51c | ||
|
8e7020daef | ||
|
a26bcc61c1 | ||
|
5c4dcf8172 | ||
|
e9fb6a4bbe | ||
|
e2dbcaa1bf | ||
|
ae01850165 | ||
|
c3baaedfc8 | ||
|
0b68de3cc1 | ||
|
39e9d524e5 | ||
|
865b087224 | ||
|
3121b25639 | ||
|
0286b85c79 | ||
|
ab52bb5137 | ||
|
61a98b8623 | ||
|
6daf34a045 | ||
|
c03adf90bd | ||
|
0ece114b7b | ||
|
5b6a74856b | ||
|
ce43100a01 | ||
|
8cc9b4016d | ||
|
31eeab9f41 | ||
|
9558dcec9c | ||
|
6e6b70d65f | ||
|
d417fd88d0 | ||
|
9e4f5dc1e9 | ||
|
1251565ee0 | ||
|
1f7258a367 | ||
|
0af985069b | ||
|
0de168f7ed | ||
|
95b31e266b | ||
|
6b3a3098b5 | ||
|
2de624fdd5 | ||
|
3fee7f636c | ||
|
89e2fff2b7 | ||
|
cedc70b292 | ||
|
07d7689f2e | ||
|
ae8cb5328d | ||
|
2e32ac0b9a | ||
|
672f01c370 | ||
|
e2d616dd30 | ||
|
0ab7f4fe2b | ||
|
29c4a07776 | ||
|
826e911e41 | ||
|
30d22dae8e | ||
|
ec3518725b | ||
|
5f87d845eb | ||
|
571808a7aa | ||
|
dfe5fa49ae | ||
|
01a0c511eb | ||
|
b3d30315ce | ||
|
882af14d7d | ||
|
47335a0efa | ||
|
34bc2d9dfd | ||
|
08c7af4afa | ||
|
f7291a0b7c | ||
|
c65aa4e9e1 | ||
|
ad213a1d74 | ||
|
43f1e4e41e | ||
|
54b0e909d5 | ||
|
f8752b86ac | ||
|
84c237fb8a | ||
|
ab49d7a9fa | ||
|
b4173f1551 | ||
|
2817b99cf2 | ||
|
001fffd004 | ||
|
0e94b4713d | ||
|
a6d3b89feb | ||
|
6c26815d63 | ||
|
73c4ac2c95 | ||
|
84f214d840 | ||
|
e3f88be7a9 | ||
|
31af3e35e0 | ||
|
94a5cff91d | ||
|
77082c7b9e | ||
|
252a1f75d2 | ||
|
5abf513cf8 | ||
|
c6054e3201 | ||
|
4080530624 | ||
|
c25f1a9b63 | ||
|
dfaa86b75e | ||
|
d9163ae3b6 | ||
|
dafafe7cf1 | ||
|
81953d1ae5 | ||
|
3a212ed62e | ||
|
195f084542 | ||
|
aa7a455b2e | ||
|
6a4e659c93 | ||
|
40f3666f6b | ||
|
dd801bbe18 | ||
|
38cce791c7 | ||
|
bf3ae6a543 | ||
|
bff98341d5 | ||
|
2644e911be | ||
|
a5f67895d3 | ||
|
15e4b6b758 | ||
|
2b28b892d8 | ||
|
7507fc98cb | ||
|
477b7a8474 | ||
|
034a884957 | ||
|
64436cb1a4 | ||
|
f138873900 | ||
|
e793338c88 | ||
|
369bb06206 | ||
|
2cb31d288e | ||
|
c723d1cd8d | ||
|
1f55234057 | ||
|
04006fae8d | ||
|
4cb13d0d6a | ||
|
a1f6f5c768 | ||
|
05c7feec77 | ||
|
bf83024826 | ||
|
a0cfd82dda | ||
|
1b734adb2d | ||
|
9b724d7277 | ||
|
c3a5dd3b5d | ||
|
e3755a624b | ||
|
95cf60e826 | ||
|
6b03e1e25d | ||
|
712b0b5b70 | ||
|
6a424391d9 | ||
|
dbf0157a26 | ||
|
7deef1ba67 | ||
|
fd6ca38262 | ||
|
bdafd88da0 | ||
|
7a1e71575e | ||
|
ac2d8f54d1 | ||
|
14ff6baa0e | ||
|
bb08101ec4 | ||
|
bc4b2d75ba | ||
|
35fc3021ba | ||
|
347227237b | ||
|
564dc3c6e8 | ||
|
9f4576a7eb | ||
|
f11315e8d4 | ||
|
0c2ac64bb8 | ||
|
a9eede3913 | ||
|
9e29ef13a3 | ||
|
eaaaaec042 | ||
|
3cb3b60064 | ||
|
044e3d91b5 | ||
|
c9e538a3b1 | ||
|
76dad392f5 | ||
|
9617b557aa | ||
|
bf4fa24414 | ||
|
20361b4f25 | ||
|
05a0068a76 | ||
|
66a42309fa | ||
|
fd94e2671a | ||
|
8ff6697861 | ||
|
eafa643715 | ||
|
049da7cb6c | ||
|
7dbeee7e22 | ||
|
93ad6c6bfa | ||
|
329179073b | ||
|
4d86d2008e | ||
|
ab47b6e881 | ||
|
df43389ade | ||
|
397b305cfe | ||
|
e496fa50cd | ||
|
06a96da15b | ||
|
70157c2c43 | ||
|
c58ed8563d | ||
|
4c7821227c | ||
|
42362fdb5e | ||
|
97124e572d | ||
|
32616c14cc | ||
|
8174d0fe95 | ||
|
8704778d95 | ||
|
c287f2bc60 | ||
|
9ea5c04c0d | ||
|
fd7a7498a4 | ||
|
e3a6747d8f | ||
|
f41ffc00d1 | ||
|
81fda15369 | ||
|
427cd050a3 | ||
|
b0c200f1ec | ||
|
92747e664a | ||
|
f1f336322d | ||
|
bf8dd79045 | ||
|
c6781156aa | ||
|
59bbe4911a | ||
|
4f3c5e0627 | ||
|
f484c5fa25 | ||
|
88d9f6c0c4 | ||
|
3c9c088f9c | ||
|
fc3996bfe1 | ||
|
5b6ad8630c | ||
|
30105f4ac0 | ||
|
1143535d76 | ||
|
7d52c052ef | ||
|
a2406fce3c | ||
|
3b34ab538c | ||
|
ac782306f1 | ||
|
0c00e889f3 | ||
|
ce96ed05f4 | ||
|
0463b77a1f | ||
|
2d185706ea | ||
|
b72b44318c | ||
|
46f59e89ea | ||
|
b4241e308e | ||
|
3d4b08dfc7 | ||
|
be49068d65 | ||
|
525cedb971 | ||
|
de3c7fe0d4 | ||
|
896cc72750 | ||
|
c1ff6e1ad0 | ||
|
fee70322d7 | ||
|
8065d6c55f | ||
|
494172d2e5 | ||
|
6e3c2047f8 | ||
|
011bd3221b | ||
|
b46eabecd3 | ||
|
0437307a41 | ||
|
22b7ac13ef | ||
|
96f88e91b7 | ||
|
3331a4644d | ||
|
adf1921dc1 | ||
|
97674f0419 | ||
|
73843ae8ac | ||
|
f2bb8c036a | ||
|
75ca6bcee2 | ||
|
089657ed1f | ||
|
b5eab86c24 | ||
|
c8e3e0974b | ||
|
dfc8f46e1c | ||
|
c143ddce5d | ||
|
169d836feb | ||
|
6ae938b295 | ||
|
cf40fdf5c1 | ||
|
23bdae0955 | ||
|
ca74c90bf5 | ||
|
7cfc1e2a10 | ||
|
1ac5705f62 | ||
|
e4f90ea0a7 | ||
|
cdfc187cd5 | ||
|
feef925f49 | ||
|
19e2d1cdea | ||
|
8369a4fe76 | ||
|
1f749b6658 | ||
|
819707920a | ||
|
43518503a6 | ||
|
5839d556e4 | ||
|
6c83e583b3 | ||
|
6aeb64b673 | ||
|
6cd64b6806 | ||
|
e154c65128 | ||
|
a50fd6e026 | ||
|
6a55bb66ee | ||
|
7c05097633 | ||
|
589568789f | ||
|
7577d849a6 | ||
|
cb23192bc4 | ||
|
41c1023300 | ||
|
90b6288cce | ||
|
c1823c8ad9 | ||
|
d7c6c656c5 | ||
|
b0b128049a | ||
|
e8f13f2637 | ||
|
b5aad37f6b | ||
|
6d0d4fc26d | ||
|
0278aa443f | ||
|
1f35745758 | ||
|
573c35272f | ||
|
09e3f91e40 | ||
|
1b6cf16be7 | ||
|
26264cb056 | ||
|
a72df5f36f | ||
|
c878e635de | ||
|
0f47cc2e92 | ||
|
5fc2757682 | ||
|
e3944c2621 | ||
|
667d96480b | ||
|
e6fe993c31 | ||
|
d0d93f76ea | ||
|
20a6a154fe | ||
|
f011876076 | ||
|
6929569403 | ||
|
eb451890da | ||
|
ded7511a70 | ||
|
d2161cade5 | ||
|
27e5fa8198 | ||
|
efbd1eb51a | ||
|
369ff75081 | ||
|
47212f7bcb | ||
|
4c93ee8d14 | ||
|
8bc4dbb1af | ||
|
6c3760292c | ||
|
4cef70db6c | ||
|
ff4af6ec59 | ||
|
d01fb21d4c | ||
|
a4ea28eee6 | ||
|
bc2a871f3e | ||
|
1759672eed | ||
|
fea55ef4a9 | ||
|
16b6bd01d2 | ||
|
14d0f4e0f3 | ||
|
778f969447 | ||
|
79cd8b3d8a | ||
|
b4663f12b1 | ||
|
b50e02c1e4 | ||
|
33b72ce64e | ||
|
cf2bf840ba | ||
|
bccdac6874 | ||
|
e69f9f5d68 | ||
|
77a9a9c295 | ||
|
84dcd1c4e4 | ||
|
971e3b7520 | ||
|
4e79011729 | ||
|
a936ac321c | ||
|
98960c911c | ||
|
329ca3bef6 | ||
|
2c3322e36e | ||
|
80ae228b34 | ||
|
6d28c408cf | ||
|
c83b35d4aa | ||
|
94e5d6aedb | ||
|
531a74968c | ||
|
c5edd147d1 | ||
|
856150d056 | ||
|
03ebea89b0 | ||
|
15d106787e | ||
|
7aab3696dd | ||
|
47787efa2b | ||
|
4a420119a6 | ||
|
33751818d3 | ||
|
698f127c1a | ||
|
fe458b6596 | ||
|
21ac1a8ac3 | ||
|
79027c0ea0 | ||
|
4cad2929cd | ||
|
62666af99f | ||
|
9ddc289f88 | ||
|
6626c214e1 | ||
|
d845622b2e | ||
|
1058f56e96 | ||
|
0434358823 | ||
|
3841256c2c | ||
|
bdf16f8140 | ||
|
836ab0c554 | ||
|
6c0376fe4f | ||
|
1fa309da40 | ||
|
daa0df9e8b | ||
|
09728d5fbc | ||
|
c16f8a4659 | ||
|
a225238530 | ||
|
55b2f099c0 | ||
|
9631a94fb5 | ||
|
cc4444662c | ||
|
de3eb07ed6 | ||
|
5de008e8c3 | ||
|
3e74b444e7 | ||
|
e1e0a10c56 | ||
|
436214baf7 | ||
|
506d0e9693 | ||
|
55290788d3 | ||
|
bc7e7adf51 | ||
|
b0aebe702c | ||
|
416878f41f | ||
|
c0fed3bda5 | ||
|
bb1e44cc8e | ||
|
21efee5f8b | ||
|
e2713d32f4 | ||
|
e21c26daf9 | ||
|
1594a4932f | ||
|
6869d634c6 | ||
|
50918c4ee0 | ||
|
6c33d24b46 | ||
|
be6217b261 | ||
|
9d51a0a9a1 | ||
|
39da509f67 | ||
|
a479b8f687 | ||
|
48a5eabc48 | ||
|
11380753b5 | ||
|
411c590a1f | ||
|
6da8d7de69 | ||
|
c6308b3153 | ||
|
fc0a45fa41 | ||
|
e6e90515db | ||
|
22a0a95247 | ||
|
50ce1c331c | ||
|
7264e38591 | ||
|
33d9f3707c | ||
|
a26a9d6239 | ||
|
a4a8201c02 | ||
|
a6571f1073 | ||
|
57b6e9652e | ||
|
3d9b3605a3 | ||
|
74193838f7 | ||
|
fb94e260b5 | ||
|
345dec937f | ||
|
4315f74fa8 | ||
|
e67f688025 | ||
|
db59b37d0b | ||
|
244fe977fe | ||
|
7b0d1c2859 | ||
|
21d0a8e48b | ||
|
47f12ad3e3 | ||
|
8f1aaa97a1 | ||
|
9d78524cbe | ||
|
bc270284b5 | ||
|
c93b4eaceb | ||
|
71b9cb3107 | ||
|
633b444fd2 | ||
|
51c4d85ce7 | ||
|
631d4c87ee | ||
|
1e236d7e23 | ||
|
2c34735267 | ||
|
39b32571df | ||
|
db56f281d9 | ||
|
e92b552a10 | ||
|
1ae6c83bce | ||
|
0fc832e1b2 | ||
|
7def35712a | ||
|
cad88f96dc | ||
|
762d44c956 | ||
|
4d8856d511 | ||
|
c917106be4 | ||
|
76e9cd7f24 | ||
|
bf4c6a38e1 | ||
|
7f3c3dfa52 | ||
|
9c3c447eb3 | ||
|
ad73083ff0 | ||
|
1e8b59243f | ||
|
c88270271e | ||
|
b96f007eeb | ||
|
9a4aec8b7e | ||
|
54fb199681 | ||
|
8c32e5dc32 | ||
|
0ea590076f | ||
|
4a684895c0 | ||
|
f4e4aa9b6b | ||
|
5e3856a2c5 | ||
|
6e6b9f600f | ||
|
6a1df4fb5f | ||
|
dde1ce7c06 | ||
|
811586ebcf | ||
|
0ff3749bfe | ||
|
28bab13348 | ||
|
877032314f | ||
|
e7d85c4ef7 | ||
|
8ec2b2c41c | ||
|
197a5da1d0 | ||
|
abbb2938fa | ||
|
f657b1a5f2 | ||
|
86a52881c6 | ||
|
8267423652 | ||
|
917a3196f8 | ||
|
56bd028a0f | ||
|
681b923b5c | ||
|
9ed6d8c6c5 | ||
|
f3fb420b82 | ||
|
165e3561e9 | ||
|
27f17c0eab | ||
|
44c8892369 | ||
|
f574103d7c | ||
|
6d138e98e3 | ||
|
2a329110b9 | ||
|
2bee7b25f3 | ||
|
92cf872a48 | ||
|
6461f2b7ec | ||
|
807cf7b07f | ||
|
de7d76af52 | ||
|
11c70deba7 | ||
|
f36532404d | ||
|
77b8b4e696 | ||
|
2615fa7584 | ||
|
3a686853e1 | ||
|
949fc42e00 | ||
|
33a1ff7113 | ||
|
bec2c14f2c | ||
|
37f972954d | ||
|
3874e6ea66 | ||
|
fac2af3c51 | ||
|
6f8cb24219 | ||
|
448bb5f333 | ||
|
293c255688 | ||
|
ac88d2316e | ||
|
5950cb1d6d | ||
|
761052db92 | ||
|
240b60453e | ||
|
85b0fe7d64 | ||
|
0a5685b26f | ||
|
6f748df43f | ||
|
b410cb83d4 | ||
|
da9d82840a | ||
|
4ee0b8afdb | ||
|
1de32771e1 | ||
|
688c634b7d | ||
|
0d6ee97508 | ||
|
6b43132ce9 | ||
|
a4690b3244 | ||
|
444417edb5 | ||
|
277c7465f5 | ||
|
25bcd3550e | ||
|
a4760d204f | ||
|
e8593f346a | ||
|
05b651e3a5 | ||
|
42a7439717 | ||
|
b1e9ebd080 | ||
|
0c50eeb987 | ||
|
4b464a6a78 | ||
|
5db9df622f | ||
|
5181759c0d | ||
|
e54373204a | ||
|
102810ef04 | ||
|
78d3b3e213 | ||
|
7a46542f97 | ||
|
eb7941e3e6 | ||
|
db3b8b2103 | ||
|
c5f5155100 | ||
|
4a12077855 | ||
|
a4a7c44bd3 | ||
|
70346165fe | ||
|
c776b99691 | ||
|
e9297256d4 | ||
|
e5871c672b | ||
|
9b06b0fb92 | ||
|
4f3a25c2b4 | ||
|
21a19aa94d | ||
|
c6b9cf05e1 | ||
|
4d8819d249 | ||
|
898f4b49cc | ||
|
0150a00f33 | ||
|
c8831015f4 | ||
|
92d221ad48 | ||
|
0db9a05f88 | ||
|
e03b35b8f9 | ||
|
d2fee3c99e | ||
|
93fdb14177 | ||
|
370d4eb8ad | ||
|
3452c3a27c | ||
|
81f35fee2f | ||
|
0fdbe3146c | ||
|
8d93c21466 | ||
|
1dbfd78754 | ||
|
22e35adefd | ||
|
833b644fff | ||
|
57cf9b7f06 | ||
|
ccff2c404d | ||
|
14f7a2b8af | ||
|
c0837a12c8 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.1**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.26.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.26.1**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.05.21.1
|
||||
[debug] youtube-dl version 2016.07.26.1
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
22
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
22
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
## Please follow the guide below
|
||||
|
||||
- You will be asked some questions, please read them **carefully** and answer honestly
|
||||
- Put an `x` into all the boxes [ ] relevant to your *pull request* (like that [x])
|
||||
- Use *Preview* tab to see how your *pull request* will actually look like
|
||||
|
||||
---
|
||||
|
||||
### Before submitting a *pull request* make sure you have:
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
|
||||
### What is the purpose of your *pull request*?
|
||||
- [ ] Bug fix
|
||||
- [ ] New extractor
|
||||
- [ ] New feature
|
||||
|
||||
---
|
||||
|
||||
### Description of your *pull request* and other information
|
||||
|
||||
Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible.
|
6
.gitignore
vendored
6
.gitignore
vendored
@@ -28,12 +28,16 @@ updates_key.pem
|
||||
*.mp4
|
||||
*.m4a
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
test/local_parameters.json
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
|
||||
# IntelliJ related files
|
||||
.idea
|
||||
.idea/*
|
||||
*.iml
|
||||
|
||||
tmp/
|
||||
|
@@ -7,14 +7,10 @@ python:
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
sudo: false
|
||||
install:
|
||||
- bash ./devscripts/install_srelay.sh
|
||||
- export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
- phihag@phihag.de
|
||||
- yasoob.khld@gmail.com
|
||||
# irc:
|
||||
# channels:
|
||||
|
7
AUTHORS
7
AUTHORS
@@ -172,3 +172,10 @@ blahgeek
|
||||
Kevin Deldycke
|
||||
inondle
|
||||
Tomáš Čech
|
||||
Déstin Reed
|
||||
Roman Tsiupa
|
||||
Artur Krysiak
|
||||
Jakub Adam Wieczorek
|
||||
Aleksandar Topuzović
|
||||
Nehal Patel
|
||||
Rob van Bekkum
|
||||
|
154
CONTRIBUTING.md
154
CONTRIBUTING.md
@@ -97,9 +97,17 @@ If you want to add support for a new site, first of all **make sure** this site
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||
2. Check out the source code with:
|
||||
|
||||
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
|
||||
|
||||
3. Start a new git branch with
|
||||
|
||||
cd youtube-dl
|
||||
git checkout -b yourextractor
|
||||
|
||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||
|
||||
```python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
@@ -142,17 +150,149 @@ After you have ensured this site is distributing it's content legally, you can f
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
## youtube-dl coding conventions
|
||||
|
||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
|
||||
|
||||
```python
|
||||
meta = self._download_json(url, video_id)
|
||||
```
|
||||
|
||||
Assume at this point `meta`'s layout is:
|
||||
|
||||
```python
|
||||
{
|
||||
...
|
||||
"summary": "some fancy summary text",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
|
||||
```python
|
||||
description = meta.get('summary') # correct
|
||||
```
|
||||
|
||||
and not like:
|
||||
|
||||
```python
|
||||
description = meta['summary'] # incorrect
|
||||
```
|
||||
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||
|
||||
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||
|
||||
```python
|
||||
description = self._search_regex(
|
||||
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||
webpage, 'description', fatal=False)
|
||||
```
|
||||
|
||||
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
|
||||
|
||||
You can also pass `default=<some fallback value>`, for example:
|
||||
|
||||
```python
|
||||
description = self._search_regex(
|
||||
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||
|
||||
### Provide fallbacks
|
||||
|
||||
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||
|
||||
#### Example
|
||||
|
||||
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||
|
||||
```python
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
```python
|
||||
title = meta.get('title') or self._og_search_title(webpage)
|
||||
```
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
|
||||
#### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
```html
|
||||
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
|
||||
```
|
||||
|
||||
The code for that task should look similar to:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
|
||||
```
|
||||
|
||||
Or even better:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
|
||||
The code definitely should not look like:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
|
4
Makefile
4
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@@ -69,7 +69,7 @@ README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
||||
youtube-dl.1: README.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
|
||||
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
||||
rm -f youtube-dl.1.temp.md
|
||||
|
||||
|
227
README.md
227
README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
|
||||
sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
If you do not have curl, you can alternatively use a recent wget:
|
||||
@@ -25,20 +25,26 @@ If you do not have curl, you can alternatively use a recent wget:
|
||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||
|
||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
||||
|
||||
brew install youtube-dl
|
||||
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
||||
|
||||
You can also use pip:
|
||||
|
||||
sudo pip install youtube-dl
|
||||
sudo pip install --upgrade youtube-dl
|
||||
|
||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||
|
||||
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
|
||||
|
||||
brew install youtube-dl
|
||||
|
||||
Or with [MacPorts](https://www.macports.org/):
|
||||
|
||||
sudo port install youtube-dl
|
||||
|
||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
||||
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a small command-line program to download videos from
|
||||
**youtube-dl** is a command-line program to download videos from
|
||||
YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||
2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
|
||||
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||
@@ -73,8 +79,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: Do not read the user
|
||||
in the global configuration file
|
||||
/etc/youtube-dl.conf: Do not read the user
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
@@ -97,9 +103,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
(experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
(experimental)
|
||||
--cn-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some Chinese sites. The default proxy
|
||||
specified by --proxy (or none, if the
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
options is not present) is used for the
|
||||
actual downloading. (experimental)
|
||||
|
||||
@@ -162,7 +168,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
(experimental)
|
||||
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT Maximum download rate in bytes per second
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
@@ -249,18 +255,19 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--write-info-json Write video metadata to a .info.json file
|
||||
--write-annotations Write video annotations to a
|
||||
.annotations.xml file
|
||||
--load-info FILE JSON file containing the video information
|
||||
--load-info-json FILE JSON file containing the video information
|
||||
(created with the "--write-info-json"
|
||||
option)
|
||||
--cookies FILE File to read cookies from and dump cookie
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
permanently. By default
|
||||
$XDG_CACHE_HOME/youtube-dl or
|
||||
~/.cache/youtube-dl . At the moment, only
|
||||
YouTube player files (for videos with
|
||||
obfuscated signatures) are cached, but that
|
||||
may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
@@ -417,7 +424,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
@@ -425,6 +432,7 @@ For example, with the following configuration file youtube-dl will always extrac
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
# Lines starting with # are comments
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
@@ -504,6 +512,9 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||
- `playlist`: Name or id of the playlist that contains the video
|
||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id`: Playlist identifier
|
||||
- `playlist_title`: Playlist title
|
||||
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
- `chapter`: Name or title of the chapter the video belongs to
|
||||
@@ -543,6 +554,10 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
||||
#### Output template and Windows batch files
|
||||
|
||||
If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
|
||||
|
||||
#### Output template examples
|
||||
|
||||
Note on Windows you may need to use double quotes instead of single.
|
||||
@@ -693,6 +708,10 @@ hash -r
|
||||
|
||||
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
|
||||
|
||||
### youtube-dl is extremely slow to start on Windows
|
||||
|
||||
Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
|
||||
|
||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
||||
|
||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||
@@ -780,9 +799,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
|
||||
|
||||
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||
|
||||
### The exe throws a *Runtime error from Visual C++*
|
||||
### The exe throws an error due to missing `MSVCR100.dll`
|
||||
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
|
||||
|
||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||
|
||||
@@ -837,6 +856,12 @@ It is *not* possible to detect whether a URL is supported or not. That's because
|
||||
|
||||
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
|
||||
|
||||
# Why do I need to go through that much red tape when filing bugs?
|
||||
|
||||
Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
|
||||
|
||||
youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
|
||||
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||
@@ -866,9 +891,17 @@ If you want to add support for a new site, first of all **make sure** this site
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||
2. Check out the source code with:
|
||||
|
||||
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
|
||||
|
||||
3. Start a new git branch with
|
||||
|
||||
cd youtube-dl
|
||||
git checkout -b yourextractor
|
||||
|
||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||
|
||||
```python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
@@ -911,20 +944,152 @@ After you have ensured this site is distributing it's content legally, you can f
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
## youtube-dl coding conventions
|
||||
|
||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
|
||||
|
||||
```python
|
||||
meta = self._download_json(url, video_id)
|
||||
```
|
||||
|
||||
Assume at this point `meta`'s layout is:
|
||||
|
||||
```python
|
||||
{
|
||||
...
|
||||
"summary": "some fancy summary text",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
|
||||
```python
|
||||
description = meta.get('summary') # correct
|
||||
```
|
||||
|
||||
and not like:
|
||||
|
||||
```python
|
||||
description = meta['summary'] # incorrect
|
||||
```
|
||||
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||
|
||||
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||
|
||||
```python
|
||||
description = self._search_regex(
|
||||
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||
webpage, 'description', fatal=False)
|
||||
```
|
||||
|
||||
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
|
||||
|
||||
You can also pass `default=<some fallback value>`, for example:
|
||||
|
||||
```python
|
||||
description = self._search_regex(
|
||||
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||
|
||||
### Provide fallbacks
|
||||
|
||||
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||
|
||||
#### Example
|
||||
|
||||
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||
|
||||
```python
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
```python
|
||||
title = meta.get('title') or self._og_search_title(webpage)
|
||||
```
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
|
||||
#### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
```html
|
||||
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
|
||||
```
|
||||
|
||||
The code for that task should look similar to:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
|
||||
```
|
||||
|
||||
Or even better:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
|
||||
The code definitely should not look like:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
# EMBEDDING YOUTUBE-DL
|
||||
|
||||
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).
|
||||
@@ -940,7 +1105,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
|
@@ -1,17 +1,38 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from socketserver import ThreadingMixIn
|
||||
import argparse
|
||||
import ctypes
|
||||
import functools
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import traceback
|
||||
import os.path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
|
||||
from youtube_dl.compat import (
|
||||
compat_input,
|
||||
compat_http_server,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
class BuildHTTPServer(ThreadingMixIn, HTTPServer):
|
||||
# These are not used outside of buildserver.py thus not in compat.py
|
||||
|
||||
try:
|
||||
import winreg as compat_winreg
|
||||
except ImportError: # Python 2
|
||||
import _winreg as compat_winreg
|
||||
|
||||
try:
|
||||
import socketserver as compat_socketserver
|
||||
except ImportError: # Python 2
|
||||
import SocketServer as compat_socketserver
|
||||
|
||||
|
||||
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
|
||||
allow_reuse_address = True
|
||||
|
||||
|
||||
@@ -191,7 +212,7 @@ def main(args=None):
|
||||
action='store_const', dest='action', const='service',
|
||||
help='Run as a Windows service')
|
||||
parser.add_argument('-b', '--bind', metavar='<host:port>',
|
||||
action='store', default='localhost:8142',
|
||||
action='store', default='0.0.0.0:8142',
|
||||
help='Bind to host:port (default %default)')
|
||||
options = parser.parse_args(args=args)
|
||||
|
||||
@@ -216,7 +237,7 @@ def main(args=None):
|
||||
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
|
||||
thr = threading.Thread(target=srv.serve_forever)
|
||||
thr.start()
|
||||
input('Press ENTER to shut down')
|
||||
compat_input('Press ENTER to shut down')
|
||||
srv.shutdown()
|
||||
thr.join()
|
||||
|
||||
@@ -231,8 +252,6 @@ def rmtree(path):
|
||||
os.remove(fname)
|
||||
os.rmdir(path)
|
||||
|
||||
#==============================================================================
|
||||
|
||||
|
||||
class BuildError(Exception):
|
||||
def __init__(self, output, code=500):
|
||||
@@ -249,15 +268,25 @@ class HTTPError(BuildError):
|
||||
|
||||
class PythonBuilder(object):
|
||||
def __init__(self, **kwargs):
|
||||
pythonVersion = kwargs.pop('python', '2.7')
|
||||
python_version = kwargs.pop('python', '3.4')
|
||||
python_path = None
|
||||
for node in ('Wow6432Node\\', ''):
|
||||
try:
|
||||
key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
|
||||
key = compat_winreg.OpenKey(
|
||||
compat_winreg.HKEY_LOCAL_MACHINE,
|
||||
r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
|
||||
try:
|
||||
self.pythonPath, _ = _winreg.QueryValueEx(key, '')
|
||||
python_path, _ = compat_winreg.QueryValueEx(key, '')
|
||||
finally:
|
||||
_winreg.CloseKey(key)
|
||||
compat_winreg.CloseKey(key)
|
||||
break
|
||||
except Exception:
|
||||
raise BuildError('No such Python version: %s' % pythonVersion)
|
||||
pass
|
||||
|
||||
if not python_path:
|
||||
raise BuildError('No such Python version: %s' % python_version)
|
||||
|
||||
self.pythonPath = python_path
|
||||
|
||||
super(PythonBuilder, self).__init__(**kwargs)
|
||||
|
||||
@@ -305,8 +334,10 @@ class YoutubeDLBuilder(object):
|
||||
|
||||
def build(self):
|
||||
try:
|
||||
subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
cwd=self.buildPath)
|
||||
proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
|
||||
proc.wait()
|
||||
#subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
# cwd=self.buildPath)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise BuildError(e.output)
|
||||
|
||||
@@ -369,12 +400,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
|
||||
pass
|
||||
|
||||
|
||||
class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
|
||||
|
||||
def do_GET(self):
|
||||
path = urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
|
||||
path = compat_urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
|
||||
action, _, path = path.path.strip('/').partition('/')
|
||||
if path:
|
||||
path = path.split('/')
|
||||
@@ -388,7 +419,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
builder.close()
|
||||
except BuildError as e:
|
||||
self.send_response(e.code)
|
||||
msg = unicode(e).encode('UTF-8')
|
||||
msg = compat_str(e).encode('UTF-8')
|
||||
self.send_header('Content-Type', 'text/plain; charset=UTF-8')
|
||||
self.send_header('Content-Length', len(msg))
|
||||
self.end_headers()
|
||||
@@ -400,7 +431,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
else:
|
||||
self.send_response(500, 'Malformed URL')
|
||||
|
||||
#==============================================================================
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
111
devscripts/create-github-release.py
Normal file
111
devscripts/create-github-release.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import mimetypes
|
||||
import netrc
|
||||
import optparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_basestring,
|
||||
compat_input,
|
||||
compat_getpass,
|
||||
compat_print,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from youtube_dl.utils import (
|
||||
make_HTTPS_handler,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class GitHubReleaser(object):
|
||||
_API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases'
|
||||
_UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s'
|
||||
_NETRC_MACHINE = 'github.com'
|
||||
|
||||
def __init__(self, debuglevel=0):
|
||||
self._init_github_account()
|
||||
https_handler = make_HTTPS_handler({}, debuglevel=debuglevel)
|
||||
self._opener = compat_urllib_request.build_opener(https_handler)
|
||||
|
||||
def _init_github_account(self):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
self._username = info[0]
|
||||
self._password = info[2]
|
||||
compat_print('Using GitHub credentials found in .netrc...')
|
||||
return
|
||||
else:
|
||||
compat_print('No GitHub credentials found in .netrc')
|
||||
except (IOError, netrc.NetrcParseError):
|
||||
compat_print('Unable to parse .netrc')
|
||||
self._username = compat_input(
|
||||
'Type your GitHub username or email address and press [Return]: ')
|
||||
self._password = compat_getpass(
|
||||
'Type your GitHub password and press [Return]: ')
|
||||
|
||||
def _call(self, req):
|
||||
if isinstance(req, compat_basestring):
|
||||
req = sanitized_Request(req)
|
||||
# Authorizing manually since GitHub does not response with 401 with
|
||||
# WWW-Authenticate header set (see
|
||||
# https://developer.github.com/v3/#basic-authentication)
|
||||
b64 = base64.b64encode(
|
||||
('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
|
||||
req.add_header('Authorization', 'Basic %s' % b64)
|
||||
response = self._opener.open(req).read().decode('utf-8')
|
||||
return json.loads(response)
|
||||
|
||||
def list_releases(self):
|
||||
return self._call(self._API_URL)
|
||||
|
||||
def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False):
|
||||
data = {
|
||||
'tag_name': tag_name,
|
||||
'target_commitish': 'master',
|
||||
'name': name,
|
||||
'body': body,
|
||||
'draft': draft,
|
||||
'prerelease': prerelease,
|
||||
}
|
||||
req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8'))
|
||||
return self._call(req)
|
||||
|
||||
def create_asset(self, release_id, asset):
|
||||
asset_name = os.path.basename(asset)
|
||||
url = self._UPLOADS_URL % (release_id, asset_name)
|
||||
# Our files are small enough to be loaded directly into memory.
|
||||
data = open(asset, 'rb').read()
|
||||
req = sanitized_Request(url, data)
|
||||
mime_type, _ = mimetypes.guess_type(asset_name)
|
||||
req.add_header('Content-Type', mime_type or 'application/octet-stream')
|
||||
return self._call(req)
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 2:
|
||||
parser.error('Expected a version and a build directory')
|
||||
|
||||
version, build_path = args
|
||||
|
||||
releaser = GitHubReleaser()
|
||||
|
||||
new_release = releaser.create_release(version, name='youtube-dl %s' % version)
|
||||
release_id = new_release['id']
|
||||
|
||||
for asset in os.listdir(build_path):
|
||||
compat_print('Uploading %s...' % asset)
|
||||
releaser.create_asset(release_id, os.path.join(build_path, asset))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read()
|
||||
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
|
||||
template = tmplf.read()
|
||||
|
||||
md5sum = hashlib.md5(data).hexdigest()
|
||||
sha1sum = hashlib.sha1(data).hexdigest()
|
||||
sha256sum = hashlib.sha256(data).hexdigest()
|
||||
template = template.replace('@PROGRAM_VERSION@', version)
|
||||
template = template.replace('@PROGRAM_URL@', URL)
|
||||
template = template.replace('@PROGRAM_MD5SUM@', md5sum)
|
||||
template = template.replace('@PROGRAM_SHA1SUM@', sha1sum)
|
||||
template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
|
||||
template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
|
||||
template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])
|
||||
|
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
mkdir -p tmp && cd tmp
|
||||
wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
|
||||
tar zxvf srelay-0.4.8b6.tar.gz
|
||||
cd srelay-0.4.8b6
|
||||
./configure
|
||||
make
|
@@ -14,15 +14,17 @@ if os.path.exists(lazy_extractors_filename):
|
||||
os.remove(lazy_extractors_filename)
|
||||
|
||||
from youtube_dl.extractor import _ALL_CLASSES
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
|
||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
||||
module_template = f.read()
|
||||
|
||||
module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)]
|
||||
module_contents = [
|
||||
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
||||
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
||||
|
||||
ie_template = '''
|
||||
class {name}(LazyLoadExtractor):
|
||||
class {name}({bases}):
|
||||
_VALID_URL = {valid_url!r}
|
||||
_module = '{module}'
|
||||
'''
|
||||
@@ -34,10 +36,20 @@ make_valid_template = '''
|
||||
'''
|
||||
|
||||
|
||||
def get_base_name(base):
|
||||
if base is InfoExtractor:
|
||||
return 'LazyLoadExtractor'
|
||||
elif base is SearchInfoExtractor:
|
||||
return 'LazyLoadSearchExtractor'
|
||||
else:
|
||||
return base.__name__
|
||||
|
||||
|
||||
def build_lazy_ie(ie, name):
|
||||
valid_url = getattr(ie, '_VALID_URL', None)
|
||||
s = ie_template.format(
|
||||
name=name,
|
||||
bases=', '.join(map(get_base_name, ie.__bases__)),
|
||||
valid_url=valid_url,
|
||||
module=ie.__module__)
|
||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||
@@ -47,11 +59,34 @@ def build_lazy_ie(ie, name):
|
||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||
return s
|
||||
|
||||
# find the correct sorting and add the required base classes so that sublcasses
|
||||
# can be correctly created
|
||||
classes = _ALL_CLASSES[:-1]
|
||||
ordered_cls = []
|
||||
while classes:
|
||||
for c in classes[:]:
|
||||
bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor))
|
||||
stop = False
|
||||
for b in bases:
|
||||
if b not in classes and b not in ordered_cls:
|
||||
if b.__name__ == 'GenericIE':
|
||||
exit()
|
||||
classes.insert(0, b)
|
||||
stop = True
|
||||
if stop:
|
||||
break
|
||||
if all(b in ordered_cls for b in bases):
|
||||
ordered_cls.append(c)
|
||||
classes.remove(c)
|
||||
break
|
||||
ordered_cls.append(_ALL_CLASSES[-1])
|
||||
|
||||
names = []
|
||||
for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]:
|
||||
name = ie.ie_key() + 'IE'
|
||||
for ie in ordered_cls:
|
||||
name = ie.__name__
|
||||
src = build_lazy_ie(ie, name)
|
||||
module_contents.append(src)
|
||||
if ie in _ALL_CLASSES:
|
||||
names.append(name)
|
||||
|
||||
module_contents.append(
|
||||
|
@@ -1,13 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import optparse
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.error('Expected an output filename')
|
||||
|
||||
outfile, = args
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
readme = filter_options(readme)
|
||||
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(readme)
|
||||
|
||||
|
||||
def filter_options(readme):
|
||||
ret = ''
|
||||
@@ -37,27 +70,5 @@ def filter_options(readme):
|
||||
|
||||
return ret
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
readme = filter_options(readme)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
print(readme.encode('utf-8'))
|
||||
else:
|
||||
print(readme)
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -6,7 +6,7 @@
|
||||
# * the git config user.signingkey is properly set
|
||||
|
||||
# You will need
|
||||
# pip install coverage nose rsa
|
||||
# pip install coverage nose rsa wheel
|
||||
|
||||
# TODO
|
||||
# release notes
|
||||
@@ -15,10 +15,33 @@
|
||||
set -e
|
||||
|
||||
skip_tests=true
|
||||
if [ "$1" = '--run-tests' ]; then
|
||||
gpg_sign_commits=""
|
||||
buildserver='localhost:8142'
|
||||
|
||||
while true
|
||||
do
|
||||
case "$1" in
|
||||
--run-tests)
|
||||
skip_tests=false
|
||||
shift
|
||||
fi
|
||||
;;
|
||||
--gpg-sign-commits|-S)
|
||||
gpg_sign_commits="-S"
|
||||
shift
|
||||
;;
|
||||
--buildserver)
|
||||
buildserver="$2"
|
||||
shift 2
|
||||
;;
|
||||
--*)
|
||||
echo "ERROR: unknown option $1"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
||||
version="$1"
|
||||
@@ -33,7 +56,9 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th
|
||||
useless_files=$(find youtube_dl -type f -not -name '*.py')
|
||||
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
|
||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||
if ! type pandoc 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
|
||||
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
|
||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make clean
|
||||
@@ -49,7 +74,7 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
|
||||
make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
|
||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py
|
||||
git commit -m "release $version"
|
||||
git commit $gpg_sign_commits -m "release $version"
|
||||
|
||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||
git tag -s -m "Release $version" "$version"
|
||||
@@ -65,7 +90,7 @@ git push origin "$version"
|
||||
REV=$(git rev-parse HEAD)
|
||||
make youtube-dl youtube-dl.tar.gz
|
||||
read -p "VM running? (y/n) " -n 1
|
||||
wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
mkdir -p "build/$version"
|
||||
mv youtube-dl youtube-dl.exe "build/$version"
|
||||
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
|
||||
@@ -75,15 +100,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
||||
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
|
||||
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
|
||||
|
||||
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
|
||||
/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
|
||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
|
||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
|
||||
|
||||
ROOT=$(pwd)
|
||||
python devscripts/create-github-release.py $version "$ROOT/build/$version"
|
||||
|
||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||
|
||||
/bin/echo -e "\n### Now switching to gh-pages..."
|
||||
git clone --branch gh-pages --single-branch . build/gh-pages
|
||||
ROOT=$(pwd)
|
||||
(
|
||||
set -e
|
||||
ORIGIN_URL=$(git config --get remote.origin.url)
|
||||
@@ -95,7 +121,7 @@ ROOT=$(pwd)
|
||||
"$ROOT/devscripts/gh-pages/update-copyright.py"
|
||||
"$ROOT/devscripts/gh-pages/update-sites.py"
|
||||
git add *.html *.html.in update
|
||||
git commit -m "release $version"
|
||||
git commit $gpg_sign_commits -m "release $version"
|
||||
git push "$ROOT" gh-pages
|
||||
git push "$ORIGIN_URL" gh-pages
|
||||
)
|
||||
|
41
devscripts/show-downloads-statistics.py
Normal file
41
devscripts/show-downloads-statistics.py
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_print,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from youtube_dl.utils import format_bytes
|
||||
|
||||
|
||||
def format_size(bytes):
|
||||
return '%s (%d bytes)' % (format_bytes(bytes), bytes)
|
||||
|
||||
|
||||
total_bytes = 0
|
||||
|
||||
releases = json.loads(compat_urllib_request.urlopen(
|
||||
'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
|
||||
|
||||
for release in releases:
|
||||
compat_print(release['name'])
|
||||
for asset in release['assets']:
|
||||
asset_name = asset['name']
|
||||
total_bytes += asset['download_count'] * asset['size']
|
||||
if all(not re.match(p, asset_name) for p in (
|
||||
r'^youtube-dl$',
|
||||
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
|
||||
r'^youtube-dl\.exe$')):
|
||||
continue
|
||||
compat_print(
|
||||
' %s size: %s downloads: %d'
|
||||
% (asset_name, format_size(asset['size']), asset['download_count']))
|
||||
|
||||
compat_print('total downloads traffic: %s' % format_size(total_bytes))
|
@@ -14,6 +14,7 @@
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9gag**
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
- **Abc7News**
|
||||
- **abcnews**
|
||||
@@ -28,6 +29,7 @@
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **AfreecaTV**: afreecatv.com
|
||||
- **Aftonbladet**
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
@@ -43,8 +45,8 @@
|
||||
- **appletrailers:section**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||
- **ARD:mediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
@@ -55,6 +57,7 @@
|
||||
- **arte.tv:future**
|
||||
- **arte.tv:info**
|
||||
- **arte.tv:magazine**
|
||||
- **arte.tv:playlist**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **AudiMedia**
|
||||
@@ -72,6 +75,8 @@
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BeatportPro**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
@@ -102,6 +107,8 @@
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canvas**
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CBC**
|
||||
- **CBCPlayer**
|
||||
- **CBS**
|
||||
@@ -122,6 +129,7 @@
|
||||
- **cliphunter**
|
||||
- **ClipRs**
|
||||
- **Clipsyndicate**
|
||||
- **CloserToTruth**
|
||||
- **cloudtime**: CloudTime
|
||||
- **Cloudy**
|
||||
- **Clubic**
|
||||
@@ -134,8 +142,9 @@
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
@@ -145,6 +154,8 @@
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **CultureUnplugged**
|
||||
- **CWTV**
|
||||
@@ -205,6 +216,7 @@
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
@@ -214,6 +226,7 @@
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Flipagram**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
@@ -232,6 +245,7 @@
|
||||
- **FreeVideo**
|
||||
- **Funimation**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **GameInformer**
|
||||
- **Gamekings**
|
||||
- **GameOne**
|
||||
@@ -239,7 +253,6 @@
|
||||
- **Gamersyde**
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gametrailers**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
@@ -250,6 +263,7 @@
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GoogleDrive**
|
||||
@@ -264,6 +278,7 @@
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HornBunny**
|
||||
@@ -271,6 +286,8 @@
|
||||
- **HotStar**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
- **HRTi**
|
||||
- **HRTiPlaylist**
|
||||
- **HuffPost**: Huffington Post
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
@@ -298,6 +315,7 @@
|
||||
- **jpopsuki.tv**
|
||||
- **JWPlatform**
|
||||
- **Kaltura**
|
||||
- **Kamcord**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
@@ -317,8 +335,10 @@
|
||||
- **kuwo:mv**: 酷我音乐 - MV
|
||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.tv**
|
||||
- **la7.it**
|
||||
- **Laola1Tv**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
@@ -326,8 +346,8 @@
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
- **Libsyn**
|
||||
- **life**: Life.ru
|
||||
- **life:embed**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
@@ -336,6 +356,7 @@
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
- **loc**: Library of Congress
|
||||
- **LocalNews8**
|
||||
- **LoveHomePorn**
|
||||
- **lrt.lt**
|
||||
@@ -349,6 +370,7 @@
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **META**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
@@ -375,10 +397,9 @@
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **MPORA**
|
||||
- **MSNBC**
|
||||
- **MSN**
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtviggy.com**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
@@ -418,7 +439,6 @@
|
||||
- **Newstube**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **nextmovie.com**
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **nhl.com**
|
||||
@@ -426,8 +446,11 @@
|
||||
- **nhl.com:videocenter**
|
||||
- **nhl.com:videocenter:category**: NHL videocenter category
|
||||
- **nick.com**
|
||||
- **nick.de**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NineCNineMedia**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **Noco**
|
||||
@@ -455,9 +478,12 @@
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **onet.tv**
|
||||
- **onet.tv:channel**
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
@@ -491,8 +517,9 @@
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
- **PolskieRadio**
|
||||
- **PornHd**
|
||||
- **PornHub**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPlaylist**
|
||||
- **PornHubUserVideos**
|
||||
- **Pornotube**
|
||||
@@ -510,8 +537,11 @@
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **R7**
|
||||
- **R7Article**
|
||||
- **radio.de**
|
||||
- **radiobremen**
|
||||
- **radiocanada**
|
||||
- **RadioCanadaAudioVideo**
|
||||
- **radiofrance**
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
@@ -521,10 +551,14 @@
|
||||
- **RedTube**
|
||||
- **RegioTV**
|
||||
- **Restudy**
|
||||
- **Reuters**
|
||||
- **ReverbNation**
|
||||
- **Revision3**
|
||||
- **revision**
|
||||
- **revision3:embed**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **RTBF**
|
||||
@@ -537,7 +571,9 @@
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **RulePorn**
|
||||
- **rutube**: Rutube videos
|
||||
@@ -561,6 +597,7 @@
|
||||
- **ScreencastOMatic**
|
||||
- **ScreenJunkies**
|
||||
- **ScreenwaveMedia**
|
||||
- **Seeker**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
- **ServingSys**
|
||||
@@ -569,8 +606,10 @@
|
||||
- **Shared**: shared.sx and vivo.sx
|
||||
- **ShareSix**
|
||||
- **Sina**
|
||||
- **SixPlay**
|
||||
- **skynewsarabia:article**
|
||||
- **skynewsarabia:video**
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
- **Slideshare**
|
||||
- **Slutload**
|
||||
- **smotri**: Smotri.com
|
||||
@@ -602,12 +641,14 @@
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **Streamable**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
@@ -636,6 +677,7 @@
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TF1**
|
||||
- **TheIntercept**
|
||||
- **ThePlatform**
|
||||
@@ -654,6 +696,7 @@
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
- **toggle**
|
||||
- **Tosh**: Tosh.0
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
@@ -682,11 +725,12 @@
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **tvp.pl**
|
||||
- **tvp.pl:Series**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
- **twitch:past_broadcasts**
|
||||
- **twitch:profile**
|
||||
- **twitch:stream**
|
||||
@@ -700,6 +744,7 @@
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USAToday**
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
@@ -717,6 +762,7 @@
|
||||
- **vh1.com**
|
||||
- **Vice**
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
@@ -729,6 +775,7 @@
|
||||
- **VideoPremium**
|
||||
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
@@ -755,6 +802,7 @@
|
||||
- **vine:user**
|
||||
- **vk**: VK
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
@@ -764,17 +812,15 @@
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **vulture.com**
|
||||
- **Walla**
|
||||
- **WashingtonPost**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
- **wat.tv**
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
@@ -782,10 +828,11 @@
|
||||
- **WNL**
|
||||
- **WorldStarHipHop**
|
||||
- **wrzuta.pl**
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
@@ -810,6 +857,7 @@
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
- **youku**: 优酷
|
||||
- **youku:show**
|
||||
- **YouPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
@@ -823,6 +871,7 @@
|
||||
- **youtube:search**: YouTube.com searches
|
||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||
- **youtube:search_url**: YouTube.com search URLs
|
||||
- **youtube:shared**
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
|
62
setup.py
62
setup.py
@@ -21,25 +21,37 @@ try:
|
||||
import py2exe
|
||||
except ImportError:
|
||||
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||
print("Cannot import py2exe", file=sys.stderr)
|
||||
print('Cannot import py2exe', file=sys.stderr)
|
||||
exit(1)
|
||||
|
||||
py2exe_options = {
|
||||
"bundle_files": 1,
|
||||
"compressed": 1,
|
||||
"optimize": 2,
|
||||
"dist_dir": '.',
|
||||
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
|
||||
'bundle_files': 1,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': '.',
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
}
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
|
||||
DESCRIPTION = 'YouTube video downloader'
|
||||
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
|
||||
|
||||
py2exe_console = [{
|
||||
"script": "./youtube_dl/__main__.py",
|
||||
"dest_base": "youtube-dl",
|
||||
'script': './youtube_dl/__main__.py',
|
||||
'dest_base': 'youtube-dl',
|
||||
'version': __version__,
|
||||
'description': DESCRIPTION,
|
||||
'comments': LONG_DESCRIPTION,
|
||||
'product_name': 'youtube-dl',
|
||||
'product_version': __version__,
|
||||
}]
|
||||
|
||||
py2exe_params = {
|
||||
'console': py2exe_console,
|
||||
'options': {"py2exe": py2exe_options},
|
||||
'options': {'py2exe': py2exe_options},
|
||||
'zipfile': None
|
||||
}
|
||||
|
||||
@@ -72,7 +84,7 @@ else:
|
||||
params['scripts'] = ['bin/youtube-dl']
|
||||
|
||||
class build_lazy_extractors(Command):
|
||||
description = "Build the extractor lazy loading module"
|
||||
description = 'Build the extractor lazy loading module'
|
||||
user_options = []
|
||||
|
||||
def initialize_options(self):
|
||||
@@ -87,16 +99,11 @@ class build_lazy_extractors(Command):
|
||||
dry_run=self.dry_run,
|
||||
)
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
|
||||
setup(
|
||||
name='youtube_dl',
|
||||
version=__version__,
|
||||
description='YouTube video downloader',
|
||||
long_description='Small command-line program to download videos from'
|
||||
' YouTube.com and other video sites.',
|
||||
description=DESCRIPTION,
|
||||
long_description=LONG_DESCRIPTION,
|
||||
url='https://github.com/rg3/youtube-dl',
|
||||
author='Ricardo Garcia',
|
||||
author_email='ytdl@yt-dl.org',
|
||||
@@ -112,16 +119,17 @@ setup(
|
||||
# test_requires = ['nosetest'],
|
||||
|
||||
classifiers=[
|
||||
"Topic :: Multimedia :: Video",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Console",
|
||||
"License :: Public Domain",
|
||||
"Programming Language :: Python :: 2.6",
|
||||
"Programming Language :: Python :: 2.7",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.2",
|
||||
"Programming Language :: Python :: 3.3",
|
||||
"Programming Language :: Python :: 3.4",
|
||||
'Topic :: Multimedia :: Video',
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'License :: Public Domain',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.2',
|
||||
'Programming Language :: Python :: 3.3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
|
@@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
@@ -66,6 +66,11 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._html_search_meta('d', html), '4')
|
||||
self.assertEqual(ie._html_search_meta('e', html), '5')
|
||||
self.assertEqual(ie._html_search_meta('f', html), '6')
|
||||
self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
|
||||
self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
|
||||
self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||
|
||||
def test_download_json(self):
|
||||
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
|
||||
|
@@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
def test_audio_only_extractor_format_selection(self):
|
||||
# For extractors with incomplete formats (all formats are audio-only or
|
||||
# video-only) best and worst should fallback to corresponding best/worst
|
||||
# video-only or audio-only formats (as per
|
||||
# https://github.com/rg3/youtube-dl/pull/5556)
|
||||
formats = [
|
||||
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'best'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'high')
|
||||
|
||||
ydl = YDL({'format': 'worst'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'low')
|
||||
|
||||
def test_format_not_available(self):
|
||||
formats = [
|
||||
{'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
|
||||
{'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
# This must fail since complete video-audio format does not match filter
|
||||
# and extractor does not provide incomplete only formats (i.e. only
|
||||
# video-only or audio-only).
|
||||
ydl = YDL({'format': 'best[height>360]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_invalid_format_specs(self):
|
||||
def assert_syntax_error(format_spec):
|
||||
ydl = YDL({'format': format_spec})
|
||||
|
@@ -6,6 +6,7 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
import collections
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
@@ -100,8 +101,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ythistory', ['youtube:history'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||
|
||||
def test_vimeo_matching(self):
|
||||
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
@@ -130,6 +129,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
||||
['Yahoo'])
|
||||
|
||||
def test_no_duplicated_ie_names(self):
|
||||
name_accu = collections.defaultdict(list)
|
||||
for ie in self.ies:
|
||||
name_accu[ie.IE_NAME.lower()].append(type(ie).__name__)
|
||||
for (ie_name, ie_list) in name_accu.items():
|
||||
self.assertEqual(
|
||||
len(ie_list), 1,
|
||||
'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -87,6 +87,8 @@ class TestCompat(unittest.TestCase):
|
||||
|
||||
def test_compat_shlex_split(self):
|
||||
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
|
||||
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
|
||||
|
||||
def test_compat_etree_fromstring(self):
|
||||
xml = '''
|
||||
@@ -103,6 +105,12 @@ class TestCompat(unittest.TestCase):
|
||||
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
|
||||
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
|
||||
|
||||
def test_compat_etree_fromstring_doctype(self):
|
||||
xml = '''<?xml version="1.0"?>
|
||||
<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
|
||||
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
|
||||
compat_etree_fromstring(xml)
|
||||
|
||||
def test_struct_unpack(self):
|
||||
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
|
||||
|
||||
|
@@ -16,6 +16,15 @@ import threading
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
@@ -31,6 +40,22 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
||||
elif self.path == '/302':
|
||||
if sys.version_info[0] == 3:
|
||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
return
|
||||
|
||||
new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
|
||||
self.send_response(302)
|
||||
self.send_header(b'Location', new_url.encode('utf-8'))
|
||||
self.end_headers()
|
||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
||||
else:
|
||||
assert False
|
||||
|
||||
@@ -47,18 +72,32 @@ class FakeLogger(object):
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def test_unicode_path_redirection(self):
|
||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
||||
if sys.version_info[0] == 3:
|
||||
return
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
r = ydl.extract_info('http://localhost:%d/302' % self.port)
|
||||
self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
class TestHTTPS(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
if os.name == 'java':
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = self.httpd.socket.sock
|
||||
else:
|
||||
sock = self.httpd.socket
|
||||
self.port = sock.getsockname()[1]
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
@@ -94,32 +133,32 @@ class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('normal'))
|
||||
self.port = self.proxy.socket.getsockname()[1]
|
||||
self.port = http_server_port(self.proxy)
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.cn_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('cn'))
|
||||
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||
self.cn_proxy_thread.daemon = True
|
||||
self.cn_proxy_thread.start()
|
||||
self.geo_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('geo'))
|
||||
self.geo_port = http_server_port(self.geo_proxy)
|
||||
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
|
||||
self.geo_proxy_thread.daemon = True
|
||||
self.geo_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
||||
geo_proxy = 'localhost:{0}'.format(self.geo_port)
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'cn_verification_proxy': cn_proxy,
|
||||
'geo_verification_proxy': geo_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
response = ydl.urlopen(url).read().decode('utf-8')
|
||||
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Ytdl-request-proxy', cn_proxy)
|
||||
req.add_header('Ytdl-request-proxy', geo_proxy)
|
||||
response = ydl.urlopen(req).read().decode('utf-8')
|
||||
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||
self.assertEqual(response, 'geo: {0}'.format(url))
|
||||
|
||||
def test_proxy_with_idn(self):
|
||||
ydl = YoutubeDL({
|
||||
|
@@ -33,6 +33,7 @@ from youtube_dl.utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
get_element_by_class,
|
||||
InAdvancePagedList,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
@@ -60,11 +61,13 @@ from youtube_dl.utils import (
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
urshift,
|
||||
update_url_query,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
@@ -78,6 +81,7 @@ from youtube_dl.utils import (
|
||||
cli_option,
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
parse_codecs,
|
||||
)
|
||||
from youtube_dl.compat import (
|
||||
compat_chr,
|
||||
@@ -157,8 +161,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertTrue(sanitize_filename(':', restricted=True) != '')
|
||||
|
||||
self.assertEqual(sanitize_filename(
|
||||
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True),
|
||||
'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy')
|
||||
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
|
||||
'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
|
||||
|
||||
def test_sanitize_ids(self):
|
||||
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
|
||||
@@ -249,6 +253,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unescapeHTML('/'), '/')
|
||||
self.assertEqual(unescapeHTML('é'), 'é')
|
||||
self.assertEqual(unescapeHTML('�'), '�')
|
||||
# HTML5 entities
|
||||
self.assertEqual(unescapeHTML('.''), '.\'')
|
||||
|
||||
def test_date_from_str(self):
|
||||
self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day'))
|
||||
@@ -281,8 +287,28 @@ class TestUtil(unittest.TestCase):
|
||||
'20150202')
|
||||
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
def test_unified_timestamps(self):
|
||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||
self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
|
||||
self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
|
||||
self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
|
||||
self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
|
||||
self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
|
||||
self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
|
||||
self.assertEqual(
|
||||
unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
|
||||
1417001400)
|
||||
self.assertEqual(
|
||||
unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
|
||||
1422902860)
|
||||
self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
|
||||
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
|
||||
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
|
||||
@@ -381,6 +407,12 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(res_url, url)
|
||||
self.assertEqual(res_data, None)
|
||||
|
||||
smug_url = smuggle_url(url, {'a': 'b'})
|
||||
smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
|
||||
res_url, res_data = unsmuggle_url(smug_smug_url)
|
||||
self.assertEqual(res_url, url)
|
||||
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
|
||||
|
||||
def test_shell_quote(self):
|
||||
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
|
||||
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||
@@ -577,6 +609,29 @@ class TestUtil(unittest.TestCase):
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_parse_codecs(self):
|
||||
self.assertEqual(parse_codecs(''), {})
|
||||
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||
'vcodec': 'avc1.77.30',
|
||||
'acodec': 'mp4a.40.2',
|
||||
})
|
||||
self.assertEqual(parse_codecs('mp4a.40.2'), {
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp4a.40.2',
|
||||
})
|
||||
self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
|
||||
'vcodec': 'avc1.42001e',
|
||||
'acodec': 'mp4a.40.5',
|
||||
})
|
||||
self.assertEqual(parse_codecs('avc3.640028'), {
|
||||
'vcodec': 'avc3.640028',
|
||||
'acodec': 'none',
|
||||
})
|
||||
self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||
@@ -638,6 +693,9 @@ class TestUtil(unittest.TestCase):
|
||||
"1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"}
|
||||
}''')
|
||||
|
||||
inp = '''{"foo":101}'''
|
||||
self.assertEqual(js_to_json(inp), '''{"foo":101}''')
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
@@ -954,5 +1012,17 @@ The first line
|
||||
self.assertRaises(ValueError, encode_base_n, 0, 70)
|
||||
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
|
||||
|
||||
def test_urshift(self):
|
||||
self.assertEqual(urshift(3, 1), 1)
|
||||
self.assertEqual(urshift(-3, 1), 2147483646)
|
||||
|
||||
def test_get_element_by_class(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import collections
|
||||
import contextlib
|
||||
import copy
|
||||
import datetime
|
||||
import errno
|
||||
import fileinput
|
||||
@@ -196,8 +197,8 @@ class YoutubeDL(object):
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
proxy: URL of the proxy server to use
|
||||
cn_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on Chinese sites. (Experimental)
|
||||
geo_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on geo-restricted sites. (Experimental)
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
support, using fridibi
|
||||
@@ -304,6 +305,11 @@ class YoutubeDL(object):
|
||||
self.params.update(params)
|
||||
self.cache = Cache(self)
|
||||
|
||||
if self.params.get('cn_verification_proxy') is not None:
|
||||
self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
|
||||
if self.params.get('geo_verification_proxy') is None:
|
||||
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
||||
|
||||
if params.get('bidi_workaround', False):
|
||||
try:
|
||||
import pty
|
||||
@@ -1046,9 +1052,9 @@ class YoutubeDL(object):
|
||||
if isinstance(selector, list):
|
||||
fs = [_build_selector_function(s) for s in selector]
|
||||
|
||||
def selector_function(formats):
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
for format in f(formats):
|
||||
for format in f(ctx):
|
||||
yield format
|
||||
return selector_function
|
||||
elif selector.type == GROUP:
|
||||
@@ -1056,17 +1062,17 @@ class YoutubeDL(object):
|
||||
elif selector.type == PICKFIRST:
|
||||
fs = [_build_selector_function(s) for s in selector.selector]
|
||||
|
||||
def selector_function(formats):
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
picked_formats = list(f(formats))
|
||||
picked_formats = list(f(ctx))
|
||||
if picked_formats:
|
||||
return picked_formats
|
||||
return []
|
||||
elif selector.type == SINGLE:
|
||||
format_spec = selector.selector
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
if format_spec == 'all':
|
||||
@@ -1079,9 +1085,10 @@ class YoutubeDL(object):
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
yield audiovideo_formats[format_idx]
|
||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||
elif (all(f.get('acodec') != 'none' for f in formats) or
|
||||
all(f.get('vcodec') != 'none' for f in formats)):
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) we will fallback to best/worst
|
||||
# {video,audio}-only format
|
||||
elif ctx['incomplete_formats']:
|
||||
yield formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
@@ -1155,17 +1162,18 @@ class YoutubeDL(object):
|
||||
}
|
||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
for pair in itertools.product(video_selector(formats), audio_selector(formats)):
|
||||
def selector_function(ctx):
|
||||
for pair in itertools.product(
|
||||
video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
|
||||
yield _merge(pair)
|
||||
|
||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||
|
||||
def final_selector(formats):
|
||||
def final_selector(ctx):
|
||||
ctx_copy = copy.deepcopy(ctx)
|
||||
for _filter in filters:
|
||||
formats = list(filter(_filter, formats))
|
||||
return selector_function(formats)
|
||||
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
||||
return selector_function(ctx_copy)
|
||||
return final_selector
|
||||
|
||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||
@@ -1223,6 +1231,10 @@ class YoutubeDL(object):
|
||||
if 'title' not in info_dict:
|
||||
raise ExtractorError('Missing "title" field in extractor result')
|
||||
|
||||
if not isinstance(info_dict['id'], compat_str):
|
||||
self.report_warning('"id" field is not a string - forcing string conversion')
|
||||
info_dict['id'] = compat_str(info_dict['id'])
|
||||
|
||||
if 'playlist' not in info_dict:
|
||||
# It isn't part of a playlist
|
||||
info_dict['playlist'] = None
|
||||
@@ -1368,7 +1380,34 @@ class YoutubeDL(object):
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
formats_to_download = list(format_selector(formats))
|
||||
|
||||
# While in format selection we may need to have an access to the original
|
||||
# format set in order to calculate some metrics or do some processing.
|
||||
# For now we need to be able to guess whether original formats provided
|
||||
# by extractor are incomplete or not (i.e. whether extractor provides only
|
||||
# video-only or audio-only formats) for proper formats selection for
|
||||
# extractors with such incomplete formats (see
|
||||
# https://github.com/rg3/youtube-dl/pull/5556).
|
||||
# Since formats may be filtered during format selection and may not match
|
||||
# the original formats the results may be incorrect. Thus original formats
|
||||
# or pre-calculated metrics should be passed to format selection routines
|
||||
# as well.
|
||||
# We will pass a context object containing all necessary additional data
|
||||
# instead of just formats.
|
||||
# This fixes incorrect format selection issue (see
|
||||
# https://github.com/rg3/youtube-dl/issues/10083).
|
||||
incomplete_formats = (
|
||||
# All formats are video-only or
|
||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
|
||||
# all formats are audio-only
|
||||
all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
|
||||
|
||||
ctx = {
|
||||
'formats': formats,
|
||||
'incomplete_formats': incomplete_formats,
|
||||
}
|
||||
|
||||
formats_to_download = list(format_selector(ctx))
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
|
@@ -18,7 +18,6 @@ from .options import (
|
||||
from .compat import (
|
||||
compat_expanduser,
|
||||
compat_getpass,
|
||||
compat_print,
|
||||
compat_shlex_split,
|
||||
workaround_optparse_bug9161,
|
||||
)
|
||||
@@ -76,7 +75,7 @@ def _real_main(argv=None):
|
||||
|
||||
# Dump user agent
|
||||
if opts.dump_user_agent:
|
||||
compat_print(std_headers['User-Agent'])
|
||||
write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Batch file verification
|
||||
@@ -101,10 +100,10 @@ def _real_main(argv=None):
|
||||
|
||||
if opts.list_extractors:
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
||||
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
|
||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||
for mu in matchedUrls:
|
||||
compat_print(' ' + mu)
|
||||
write_string(' ' + mu + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
if opts.list_extractor_descriptions:
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
@@ -117,7 +116,7 @@ def _real_main(argv=None):
|
||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||
_COUNTS = ('', '5', '10', 'all')
|
||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||
compat_print(desc)
|
||||
write_string(desc + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Conflicting, missing and erroneous options
|
||||
@@ -383,6 +382,8 @@ def _real_main(argv=None):
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'postprocessor_args': postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
2274
youtube_dl/compat.py
2274
youtube_dl/compat.py
File diff suppressed because it is too large
Load Diff
@@ -85,7 +85,7 @@ class ExternalFD(FileDownloader):
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
self.to_stderr(stderr)
|
||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||
return p.returncode
|
||||
|
||||
|
||||
@@ -210,6 +210,7 @@ class FFmpegFD(ExternalFD):
|
||||
# args += ['-http_proxy', proxy]
|
||||
env = os.environ.copy()
|
||||
compat_setenv('HTTP_PROXY', proxy, env=env)
|
||||
compat_setenv('http_proxy', proxy, env=env)
|
||||
|
||||
protocol = info_dict.get('protocol')
|
||||
|
||||
|
@@ -196,6 +196,11 @@ def build_fragments_list(boot_info):
|
||||
first_frag_number = fragment_run_entry_table[0]['first']
|
||||
fragments_counter = itertools.count(first_frag_number)
|
||||
for segment, fragments_count in segment_run_table['segment_run']:
|
||||
# In some live HDS streams (for example Rai), `fragments_count` is
|
||||
# abnormal and causing out-of-memory errors. It's OK to change the
|
||||
# number of fragments for live streams as they are updated periodically
|
||||
if fragments_count == 4294967295 and boot_info['live']:
|
||||
fragments_count = 2
|
||||
for _ in range(fragments_count):
|
||||
res.append((segment, next(fragments_counter)))
|
||||
|
||||
@@ -319,7 +324,7 @@ class F4mFD(FragmentFD):
|
||||
doc = compat_etree_fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
for f in self._get_unencrypted_media(doc)]
|
||||
if requested_bitrate is None:
|
||||
if requested_bitrate is None or len(formats) == 1:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
@@ -329,7 +334,11 @@ class F4mFD(FragmentFD):
|
||||
|
||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
||||
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
|
||||
# From Adobe F4M 3.0 spec:
|
||||
# The <baseURL> element SHALL be the base URL for all relative
|
||||
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
|
||||
# URLs should be relative to the location of the containing document.
|
||||
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
|
||||
live = boot_info['live']
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
|
@@ -2,14 +2,24 @@ from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import binascii
|
||||
try:
|
||||
from Crypto.Cipher import AES
|
||||
can_decrypt_frag = True
|
||||
except ImportError:
|
||||
can_decrypt_frag = False
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from .external import FFmpegFD
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_struct_pack,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
parse_m3u8_attributes,
|
||||
)
|
||||
|
||||
|
||||
@@ -21,19 +31,27 @@ class HlsFD(FragmentFD):
|
||||
@staticmethod
|
||||
def can_download(manifest):
|
||||
UNSUPPORTED_FEATURES = (
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1]
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
# Live streams heuristic does not always work (e.g. geo restricted to Germany
|
||||
# http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
|
||||
# r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
|
||||
r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||
# event media playlists [4]
|
||||
|
||||
# This heuristic also is not correct since segments may not be appended as well.
|
||||
# Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
|
||||
# no segments will definitely be appended to the end of the playlist.
|
||||
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||
# # event media playlists [4]
|
||||
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||
)
|
||||
return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
|
||||
return all(check_results)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
@@ -51,36 +69,60 @@ class HlsFD(FragmentFD):
|
||||
fd.add_progress_hook(ph)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
fragment_urls = []
|
||||
total_frags = 0
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
segment_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
fragment_urls.append(segment_url)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
break
|
||||
total_frags += 1
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(fragment_urls),
|
||||
'total_frags': total_frags,
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
frags_filenames = []
|
||||
for i, frag_url in enumerate(fragment_urls):
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
frag_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
frags_filenames.append(frag_sanitized)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
elif line.startswith('#EXT-X-KEY'):
|
||||
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
if 'IV' in decrypt_info:
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@@ -156,7 +156,10 @@ class AdobeTVVideoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(url + '?format=json', video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
|
||||
|
@@ -2,23 +2,137 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
unescapeHTML,
|
||||
extract_attributes,
|
||||
get_element_by_attribute,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class AENetworksIE(InfoExtractor):
|
||||
class AENetworksBaseIE(ThePlatformIE):
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
||||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
|
||||
'info_dict': {
|
||||
'id': '71889446852',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
|
||||
'info_dict': {
|
||||
'id': 'SERIES4317',
|
||||
'title': 'Atlanta Plastic',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
'aetv.com': 'AETV',
|
||||
'mylifetime.com': 'LIFETIME',
|
||||
'fyi.tv': 'FYI',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
if url_parts_len == 1:
|
||||
entries = []
|
||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
elif url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
episode_url = compat_urlparse.urljoin(
|
||||
url, episode_attributes['data-canonical'])
|
||||
entries.append(self.url_result(
|
||||
episode_url, 'AENetworks',
|
||||
episode_attributes['data-videoid']))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': 'medium_video_s3'
|
||||
}
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
media_url = update_url_query(media_url, query)
|
||||
media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
|
||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class HistoryTopicIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:topic'
|
||||
IE_DESC = 'History.com Topic'
|
||||
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)(?:/[^/]+(?:/(?P<video_display_id>[^/?#]+))?)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||
'info_dict': {
|
||||
'id': 'g12m5Gyt3fdR',
|
||||
'id': '40700995724',
|
||||
'ext': 'mp4',
|
||||
'title': "Bet You Didn't Know: Valentine's Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
@@ -31,57 +145,61 @@ class AENetworksIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'expected_warnings': ['JSON-LD'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
||||
'info_dict': {
|
||||
'id': 'eg47EERs_JsZ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
|
||||
'info_dict':
|
||||
{
|
||||
'id': 'world-war-i-history',
|
||||
'title': 'World War I History',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'playlist_mincount': 24,
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
|
||||
'only_matching': True
|
||||
'url': 'http://www.history.com/topics/world-war-i-history/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
|
||||
'only_matching': True
|
||||
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
|
||||
'only_matching': True
|
||||
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url_re = [
|
||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||
r"media_url\s*=\s*'([^']+)'"
|
||||
]
|
||||
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
|
||||
query = {'mbr': 'true'}
|
||||
if page_type == 'shows':
|
||||
query['assetTypes'] = 'medium_video_s3'
|
||||
if 'switch=hds' in video_url:
|
||||
query['switch'] = 'hls'
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
info.update({
|
||||
def theplatform_url_result(self, theplatform_url, video_id, query):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': smuggle_url(
|
||||
update_url_query(video_url, query),
|
||||
update_url_query(theplatform_url, query),
|
||||
{
|
||||
'sig': {
|
||||
'key': 'crazyjava',
|
||||
'secret': 's3cr3t'},
|
||||
'key': self._THEPLATFORM_KEY,
|
||||
'secret': self._THEPLATFORM_SECRET,
|
||||
},
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
|
||||
if video_display_id:
|
||||
webpage = self._download_webpage(url, video_display_id)
|
||||
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
|
||||
release_url = unescapeHTML(release_url)
|
||||
|
||||
return self.theplatform_url_result(
|
||||
release_url, video_id, {
|
||||
'mbr': 'true',
|
||||
'switch': 'hls'
|
||||
})
|
||||
return info
|
||||
else:
|
||||
webpage = self._download_webpage(url, topic_id)
|
||||
entries = []
|
||||
for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
|
||||
video_attributes = extract_attributes(episode_item)
|
||||
entries.append(self.theplatform_url_result(
|
||||
video_attributes['data-release-url'], video_attributes['data-id'], {
|
||||
'mbr': 'true',
|
||||
'switch': 'hls'
|
||||
}))
|
||||
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
|
||||
|
133
youtube_dl/extractor/afreecatv.py
Normal file
133
youtube_dl/extractor/afreecatv.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)^
|
||||
https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html))
|
||||
\?.*?\bnTitleNo=(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
'info_dict': {
|
||||
'id': '36164052',
|
||||
'ext': 'mp4',
|
||||
'title': '데일리 에이프릴 요정들의 시상식!',
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
'upload_date': '20160503',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
|
||||
'info_dict': {
|
||||
'id': '36153164',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
|
||||
'uploader': 'dailyapril',
|
||||
'uploader_id': 'dailyapril',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': 'd8b7c174568da61d774ef0203159bf97',
|
||||
'info_dict': {
|
||||
'id': '36153164_1',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}, {
|
||||
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
|
||||
'info_dict': {
|
||||
'id': '36153164_2',
|
||||
'ext': 'mp4',
|
||||
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
|
||||
'upload_date': '20160502',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def parse_video_key(key):
|
||||
video_key = {}
|
||||
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
||||
if m:
|
||||
video_key['upload_date'] = m.group('upload_date')
|
||||
video_key['part'] = m.group('part')
|
||||
return video_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
info_url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
netloc='afbbs.afreecatv.com:8080',
|
||||
path='/api/video/get_video_info.php'))
|
||||
video_xml = self._download_xml(info_url, video_id)
|
||||
|
||||
if xpath_element(video_xml, './track/video/file') is None:
|
||||
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||
expected=True)
|
||||
|
||||
title = xpath_text(video_xml, './track/title', 'title')
|
||||
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
||||
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
||||
duration = int_or_none(xpath_text(video_xml, './track/duration',
|
||||
'duration'))
|
||||
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
||||
|
||||
entries = []
|
||||
for i, video_file in enumerate(video_xml.findall('./track/video/file')):
|
||||
video_key = self.parse_video_key(video_file.get('key', ''))
|
||||
if not video_key:
|
||||
continue
|
||||
entries.append({
|
||||
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
|
||||
'title': title,
|
||||
'upload_date': video_key.get('upload_date'),
|
||||
'duration': int_or_none(video_file.get('duration')),
|
||||
'url': video_file.text,
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
if len(entries) > 1:
|
||||
info['_type'] = 'multi_video'
|
||||
info['entries'] = entries
|
||||
elif len(entries) == 1:
|
||||
info['url'] = entries[0]['url']
|
||||
info['upload_date'] = entries[0].get('upload_date')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'No files found for the specified AfreecaTV video, either'
|
||||
' the URL is incorrect or the video has been made private.',
|
||||
expected=True)
|
||||
|
||||
return info
|
@@ -24,10 +24,10 @@ class AftonbladetIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||
meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
|
||||
player_config = self._parse_json(self._html_search_regex(
|
||||
r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
|
||||
internal_meta_id = player_config['videoId']
|
||||
internal_meta_id = player_config['aptomaVideoId']
|
||||
internal_meta_url = meta_url % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
@@ -5,6 +5,8 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -50,21 +52,25 @@ class AMPIE(InfoExtractor):
|
||||
if isinstance(media_content, dict):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data['@attributes']
|
||||
media_type = media['type']
|
||||
if media_type in ('video/f4m', 'application/f4m+xml'):
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif media_type == 'application/x-mpegURL':
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
@@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# Film wording is used instead of Episode
|
||||
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles
|
||||
# Episodes without titles, jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -110,35 +115,12 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||
@@ -161,7 +143,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
format_id_list.append(lang)
|
||||
if kind:
|
||||
format_id_list.append(kind)
|
||||
if not format_id_list:
|
||||
if not format_id_list and num is not None:
|
||||
format_id_list.append(compat_str(num))
|
||||
format_id = '-'.join(format_id_list)
|
||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||
@@ -215,28 +197,74 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
})
|
||||
formats.extend(file_formats)
|
||||
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
return {
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
})
|
||||
}
|
||||
|
||||
def extract_entries(html, video_id, common_info, num=None):
|
||||
info = extract_info(html, video_id, num)
|
||||
|
||||
if info['formats']:
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update(info)
|
||||
entries.append(f)
|
||||
|
||||
# Extract teaser only when full episode is not available
|
||||
if not formats:
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
||||
episode_html)
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||
html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-teaser' % f['id'],
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
extract_entries(episode_html, video_id, common_info)
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
'id': anime_id,
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
extract_entries(html, video_id, common_info)
|
||||
|
||||
extract_episodes(webpage)
|
||||
|
||||
if not entries:
|
||||
extract_film(webpage, anime_id)
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||
|
@@ -7,6 +7,8 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel',
|
||||
'id': '5111',
|
||||
'title': 'Man of Steel',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
@@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'id': 'blackthorn',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
|
||||
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
|
||||
'info_dict': {
|
||||
'id': '15881',
|
||||
'title': 'Kung Fu Panda 3',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||
'only_matching': True,
|
||||
@@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
|
||||
movie = mobj.group('movie')
|
||||
uploader_id = mobj.group('company')
|
||||
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
entries = []
|
||||
for clip in film_data.get('clips', []):
|
||||
clip_title = clip['title']
|
||||
|
||||
formats = []
|
||||
for version, version_data in clip.get('versions', {}).items():
|
||||
for size, size_data in version_data.get('sizes', {}).items():
|
||||
src = size_data.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
'language': version[:2],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
|
||||
'formats': formats,
|
||||
'title': clip_title,
|
||||
'thumbnail': clip.get('screen') or clip.get('thumb'),
|
||||
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
|
||||
'upload_date': unified_strdate(clip.get('posted')),
|
||||
'uploader_id': uploader_id,
|
||||
})
|
||||
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
|
@@ -8,19 +8,19 @@ from .generic import GenericIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
qualities,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
@@ -35,6 +35,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
@@ -45,6 +46,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
@@ -56,9 +58,21 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'md5': '4e8f00631aac0395fee17368ac0e9867',
|
||||
'info_dict': {
|
||||
'id': '30796318',
|
||||
'ext': 'mp3',
|
||||
'title': 'Vor dem Fest',
|
||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||
'duration': 3287,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
@@ -114,11 +128,14 @@ class ARDMediathekIE(InfoExtractor):
|
||||
continue
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False))
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
@@ -232,7 +249,8 @@ class ARDIE(InfoExtractor):
|
||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||
'upload_date': '20140804',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -274,41 +292,3 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class SportschauIE(ARDMediathekIE):
|
||||
IE_NAME = 'Sportschau'
|
||||
_VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
|
||||
'info_dict': {
|
||||
'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
base_url = mobj.group('baseurl')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = get_element_by_attribute('class', 'headline', webpage)
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
info = self._extract_media_info(
|
||||
base_url + '-mc_defaultQuality-h.json', webpage, video_id)
|
||||
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
|
||||
return info
|
||||
|
115
youtube_dl/extractor/arkena.py
Normal file
115
youtube_dl/extractor/arkena.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
account_id = mobj.group('account_id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||
|
||||
media_info = playlist['MediaInfo']
|
||||
title = media_info['Title']
|
||||
media_files = playlist['MediaFiles']
|
||||
|
||||
is_live = False
|
||||
formats = []
|
||||
for kind_case, kind_formats in media_files.items():
|
||||
kind = kind_case.lower()
|
||||
for f in kind_formats:
|
||||
f_url = f.get('Url')
|
||||
if not f_url:
|
||||
continue
|
||||
is_live = f.get('Live') == 'true'
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f_url, video_id, f4m_id=kind, fatal=False))
|
||||
elif kind == 'dash' or 'mpd' in exts:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
f_url, video_id, mpd_id=kind, fatal=False))
|
||||
elif kind == 'silverlight':
|
||||
# TODO: process when ism is supported (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8118)
|
||||
continue
|
||||
else:
|
||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media_info.get('Description')
|
||||
video_id = media_info.get('VideoId') or video_id
|
||||
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||
thumbnails = [{
|
||||
'url': thumbnail['Url'],
|
||||
'width': int_or_none(thumbnail.get('Size')),
|
||||
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)'
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url)
|
||||
@@ -78,60 +75,6 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
return video_id, lang
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||
ids = (video_id, '')
|
||||
# some pages contain multiple videos (like
|
||||
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||
# so we first try to look for json URLs that contain the video id from
|
||||
# the 'vid' parameter.
|
||||
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'embed url', group='url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
@@ -235,28 +178,94 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return info_dict
|
||||
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||
ids = (video_id, '')
|
||||
# some pages contain multiple videos (like
|
||||
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||
# so we first try to look for json URLs that contain the video id from
|
||||
# the 'vid' parameter.
|
||||
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
entries = [
|
||||
self.url_result(url)
|
||||
for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:creative'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||
'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1',
|
||||
'info_dict': {
|
||||
'id': '72176',
|
||||
'id': '057405-001-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Folge 2 - Corporate Design',
|
||||
'upload_date': '20131004',
|
||||
'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)',
|
||||
'upload_date': '20150716',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
|
||||
'info_dict': {
|
||||
'id': '160676',
|
||||
'ext': 'mp4',
|
||||
'title': 'Monty Python live (mostly)',
|
||||
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
|
||||
'upload_date': '20140805',
|
||||
}
|
||||
'playlist_count': 11,
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
|
||||
'only_matching': True,
|
||||
@@ -267,7 +276,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:info'
|
||||
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
|
||||
'info_dict': {
|
||||
'id': '067528-000-A',
|
||||
@@ -275,7 +284,7 @@ class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
'title': 'Service civique, un cache misère ?',
|
||||
'upload_date': '20160403',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
@@ -300,6 +309,8 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:ddc'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
if lang == 'folge':
|
||||
@@ -318,7 +329,7 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:concert'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||
'info_dict': {
|
||||
@@ -328,24 +339,23 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
'upload_date': '20140128',
|
||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:cinema'
|
||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://cinema.arte.tv/de/node/38291',
|
||||
'md5': '6b275511a5107c60bacbeeda368c3aa1',
|
||||
_TESTS = [{
|
||||
'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck',
|
||||
'md5': 'a5b9dd5575a11d93daf0e3f404f45438',
|
||||
'info_dict': {
|
||||
'id': '055876-000_PWA12025-D',
|
||||
'id': '062494-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tod auf dem Nil',
|
||||
'upload_date': '20160122',
|
||||
'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
|
||||
'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck',
|
||||
'upload_date': '20150807',
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVMagazineIE(ArteTVPlus7IE):
|
||||
@@ -390,9 +400,42 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
json_url = mobj.group('json_url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
|
||||
'info_dict': {
|
||||
'id': 'PL-013263',
|
||||
'title': 'Areva & Uramin',
|
||||
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, lang = self._extract_url_info(url)
|
||||
collection = self._download_json(
|
||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||
% (lang, playlist_id), playlist_id)
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
entries = [
|
||||
self._extract_from_json_url(
|
||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
||||
for video in collection['videos'] if video.get('jsonUrl')]
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
@@ -6,6 +6,7 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .soundcloud import SoundcloudIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
@@ -136,7 +137,7 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
result[resultkey] = api_response[apikey]
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': api_response.get('id', song_id),
|
||||
'id': compat_str(api_response.get('id', song_id)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title', song_id),
|
||||
'url': api_response['url'],
|
||||
|
@@ -46,6 +46,7 @@ class AzubuIE(InfoExtractor):
|
||||
'uploader_id': 272749,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Channel offline',
|
||||
},
|
||||
]
|
||||
|
||||
@@ -56,22 +57,26 @@ class AzubuIE(InfoExtractor):
|
||||
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
|
||||
|
||||
title = data['title'].strip()
|
||||
description = data['description']
|
||||
thumbnail = data['thumbnail']
|
||||
view_count = data['view_count']
|
||||
uploader = data['user']['username']
|
||||
uploader_id = data['user']['id']
|
||||
description = data.get('description')
|
||||
thumbnail = data.get('thumbnail')
|
||||
view_count = data.get('view_count')
|
||||
user = data.get('user', {})
|
||||
uploader = user.get('username')
|
||||
uploader_id = user.get('id')
|
||||
|
||||
stream_params = json.loads(data['stream_params'])
|
||||
|
||||
timestamp = float_or_none(stream_params['creationDate'], 1000)
|
||||
duration = float_or_none(stream_params['length'], 1000)
|
||||
timestamp = float_or_none(stream_params.get('creationDate'), 1000)
|
||||
duration = float_or_none(stream_params.get('length'), 1000)
|
||||
|
||||
renditions = stream_params.get('renditions') or []
|
||||
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
|
||||
if video:
|
||||
renditions.append(video)
|
||||
|
||||
if not renditions and not user.get('channel', {}).get('is_live', True):
|
||||
raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True)
|
||||
|
||||
formats = [{
|
||||
'url': fmt['url'],
|
||||
'width': fmt['frameWidth'],
|
||||
|
@@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '2b68e5851514c20efdff2afc5603b8b4',
|
||||
'md5': '73d0b3171568232574e45652f8720b5c',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'mp3',
|
||||
@@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)[0]
|
||||
track_id = compat_str(data['id'])
|
||||
|
||||
if not data.get('file'):
|
||||
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data['file'].items():
|
||||
@@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(data['id']),
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
|
@@ -31,7 +31,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
music/clips[/#]|
|
||||
radio/player/
|
||||
)
|
||||
(?P<id>%s)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
''' % _ID_REGEX
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
@@ -192,6 +192,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Now it\'s really geo-restricted',
|
||||
}, {
|
||||
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
|
||||
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
||||
@@ -588,7 +589,8 @@ class BBCIE(BBCCoUkIE):
|
||||
'info_dict': {
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
|
||||
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
},
|
||||
@@ -602,6 +604,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
},
|
||||
@@ -698,7 +701,9 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url)
|
||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
|
||||
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
||||
else super(BBCIE, cls).suitable(url))
|
||||
|
||||
def _extract_from_media_meta(self, media_meta, video_id):
|
||||
# Direct links to media in media metadata (e.g.
|
||||
@@ -815,8 +820,20 @@ class BBCIE(BBCCoUkIE):
|
||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||
if playlist:
|
||||
for key in ('progressiveDownload', 'streaming'):
|
||||
playlist_url = playlist.get('%sUrl' % key)
|
||||
if not playlist_url:
|
||||
continue
|
||||
try:
|
||||
entries.append(self._extract_from_playlist_sxml(
|
||||
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
||||
playlist_url, playlist_id, timestamp))
|
||||
except Exception as e:
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
continue
|
||||
raise
|
||||
|
||||
if entries:
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
@@ -975,3 +992,82 @@ class BBCCoUkArticleIE(InfoExtractor):
|
||||
r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
|
||||
class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
|
||||
|
||||
title, description = self._extract_title_and_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||
'info_dict': {
|
||||
'id': 'b05rcz9v',
|
||||
'title': 'The Disappearance',
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
'skip': 'This programme is not currently available on BBC iPlayer',
|
||||
}, {
|
||||
# Available for over a year unlike 30 days for most other programmes
|
||||
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||
'info_dict': {
|
||||
'id': 'p02tcc32',
|
||||
'title': 'Bohemian Icons',
|
||||
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
|
||||
def _extract_title_and_description(self, webpage):
|
||||
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
description = self._search_regex(
|
||||
r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
|
||||
webpage, 'description', fatal=False, group='value')
|
||||
return title, description
|
||||
|
||||
|
||||
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
|
||||
'info_dict': {
|
||||
'id': 'b05rcz9v',
|
||||
'title': 'The Disappearance - Clips - BBC Four',
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_title_and_description(self, webpage):
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
return title, description
|
||||
|
@@ -1,31 +1,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class BetIE(InfoExtractor):
|
||||
class BetIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||
'info_dict': {
|
||||
'id': 'news/national/2014/a-conversation-with-president-obama',
|
||||
'id': '07e96bd3-8850-3051-b856-271b457f0ab8',
|
||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||
'ext': 'flv',
|
||||
'title': 'A Conversation With President Obama',
|
||||
'description': 'md5:699d0652a350cf3e491cd15cc745b5da',
|
||||
'description': 'President Obama urges persistence in confronting racism and bias.',
|
||||
'duration': 1534,
|
||||
'timestamp': 1418075340,
|
||||
'upload_date': '20141208',
|
||||
'uploader': 'admin',
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
'subtitles': {
|
||||
'en': 'mincount:2',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -35,16 +31,17 @@ class BetIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||
'info_dict': {
|
||||
'id': 'news/national/2014/justice-for-ferguson-a-community-reacts',
|
||||
'id': '9f516bf1-7543-39c4-8076-dd441b459ba9',
|
||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||
'ext': 'flv',
|
||||
'title': 'Justice for Ferguson: A Community Reacts',
|
||||
'description': 'A BET News special.',
|
||||
'duration': 1696,
|
||||
'timestamp': 1416942360,
|
||||
'upload_date': '20141125',
|
||||
'uploader': 'admin',
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
'subtitles': {
|
||||
'en': 'mincount:2',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -53,57 +50,32 @@ class BetIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return compat_urllib_parse_urlencode({
|
||||
'uuid': uri,
|
||||
})
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
mgid = self._extract_mgid(webpage)
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
|
||||
media_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
||||
webpage, 'media URL'))
|
||||
info_dict = videos_info['entries'][0]
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'/video/(.*)/_jcr_content/', media_url, 'video id')
|
||||
upload_date = unified_strdate(self._html_search_meta('date', webpage))
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
mrss = self._download_xml(media_url, display_id)
|
||||
|
||||
item = mrss.find('./channel/item')
|
||||
|
||||
NS_MAP = {
|
||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'ka': 'http://kickapps.com/karss',
|
||||
}
|
||||
|
||||
title = xpath_text(item, './title', 'title')
|
||||
description = xpath_text(
|
||||
item, './description', 'description', fatal=False)
|
||||
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
item, xpath_with_ns('./dc:date', NS_MAP),
|
||||
'upload date', fatal=False))
|
||||
uploader = xpath_text(
|
||||
item, xpath_with_ns('./dc:creator', NS_MAP),
|
||||
'uploader', fatal=False)
|
||||
|
||||
media_content = item.find(
|
||||
xpath_with_ns('./media:content', NS_MAP))
|
||||
duration = int_or_none(media_content.get('duration'))
|
||||
smil_url = media_content.get('url')
|
||||
|
||||
thumbnail = media_content.find(
|
||||
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
|
||||
|
||||
formats = self._extract_smil_formats(smil_url, display_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
'upload_date': upload_date,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@@ -12,7 +12,7 @@ class BigflixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
|
||||
'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
|
||||
'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3',
|
||||
'info_dict': {
|
||||
'id': '16537',
|
||||
'ext': 'mp4',
|
||||
@@ -26,7 +26,7 @@ class BigflixIE(InfoExtractor):
|
||||
'id': '16070',
|
||||
'ext': 'mp4',
|
||||
'title': 'Madarasapatinam',
|
||||
'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca',
|
||||
'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b',
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
'params': {
|
||||
|
@@ -1,34 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308313,
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012660,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'timestamp': 1397983878,
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
@@ -36,75 +44,186 @@ class BiliBiliIE(InfoExtractor):
|
||||
'id': '1041170',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'uploader': '枫叶逝去',
|
||||
'timestamp': 1396501299,
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '4808130',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
|
||||
'info_dict': {
|
||||
'id': '4808130_part1',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '926f9f67d0c482091872fbd8eca7ea3d',
|
||||
'info_dict': {
|
||||
'id': '4808130_part2',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '4b7b225b968402d7c32348c646f1fd83',
|
||||
'info_dict': {
|
||||
'id': '4808130_part3',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
'md5': '7b795e214166501e9141139eea236e91',
|
||||
'info_dict': {
|
||||
'id': '4808130_part4',
|
||||
'ext': 'flv',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# Missing upload time
|
||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||
'info_dict': {
|
||||
'id': '2880301',
|
||||
'ext': 'flv',
|
||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||
'uploader': '黑夜为猫',
|
||||
'uploader_id': '610729',
|
||||
},
|
||||
'params': {
|
||||
# Just to test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['upload time'],
|
||||
}]
|
||||
|
||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||
# the Android client
|
||||
# TODO: find the sign algorithm used in the flash player
|
||||
_APP_KEY = '86385cdc024c0f6c'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
page_num = mobj.group('page_num') or '1'
|
||||
|
||||
view_data = self._download_json(
|
||||
'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
|
||||
video_id)
|
||||
if 'error' in view_data:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cid = view_data['cid']
|
||||
title = unescapeHTML(view_data['title'])
|
||||
params = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))
|
||||
cid = params['cid'][0]
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
|
||||
cid,
|
||||
'Downloading page %s/%s' % (page_num, view_data['pages'])
|
||||
)
|
||||
info_xml_str = self._download_webpage(
|
||||
'http://interface.bilibili.com/v_cdn_play',
|
||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||
note='Downloading video info page')
|
||||
|
||||
if xpath_text(doc, './result') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
|
||||
err_msg = None
|
||||
durls = None
|
||||
info_xml = None
|
||||
try:
|
||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||
err_msg = (info_json or {}).get('error_text')
|
||||
else:
|
||||
err_msg = xpath_text(info_xml, './message')
|
||||
|
||||
if info_xml is not None:
|
||||
durls = info_xml.findall('./durl')
|
||||
if not durls:
|
||||
if err_msg:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||
else:
|
||||
raise ExtractorError('No videos found!')
|
||||
|
||||
entries = []
|
||||
|
||||
for durl in doc.findall('./durl'):
|
||||
for durl in durls:
|
||||
size = xpath_text(durl, ['./filesize', './size'])
|
||||
formats = [{
|
||||
'url': durl.find('./url').text,
|
||||
'filesize': int_or_none(size),
|
||||
'ext': 'flv',
|
||||
}]
|
||||
backup_urls = durl.find('./backup_url')
|
||||
if backup_urls is not None:
|
||||
for backup_url in backup_urls.findall('./url'):
|
||||
formats.append({'url': backup_url.text})
|
||||
formats.reverse()
|
||||
for backup_url in durl.findall('./backup_url/url'):
|
||||
formats.append({
|
||||
'url': backup_url.text,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||
'title': title,
|
||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': compat_str(cid),
|
||||
'title': title,
|
||||
'description': view_data.get('description'),
|
||||
'thumbnail': view_data.get('pic'),
|
||||
'uploader': view_data.get('author'),
|
||||
'timestamp': int_or_none(view_data.get('created')),
|
||||
'view_count': int_or_none(view_data.get('play')),
|
||||
'duration': int_or_none(xpath_text(doc, './timelength')),
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
|
||||
if len(entries) == 1:
|
||||
entries[0].update(info)
|
||||
return entries[0]
|
||||
else:
|
||||
info.update({
|
||||
for idx, entry in enumerate(entries):
|
||||
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
})
|
||||
return info
|
||||
}
|
||||
|
@@ -2,11 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
from .rudo import RudoIE
|
||||
|
||||
|
||||
class BioBioChileTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
_VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
||||
@@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Fernando Atria',
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
# different uploader layout
|
||||
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
||||
@@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||
'info_dict': {
|
||||
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
||||
'ext': 'mp4',
|
||||
'uploader': '(none)',
|
||||
'upload_date': '20160708',
|
||||
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||
'only_matching': True,
|
||||
@@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
rudo_url = RudoIE._extract_url(webpage)
|
||||
if not rudo_url:
|
||||
raise ExtractorError('No videos found')
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
||||
|
||||
file_url = self._search_regex(
|
||||
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'file url', group='url')
|
||||
|
||||
base_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
|
||||
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
|
||||
group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
f = {
|
||||
'url': '%s%s' % (base_url, file_url),
|
||||
'format_id': 'http',
|
||||
'protocol': 'http',
|
||||
'preference': 1,
|
||||
}
|
||||
if formats:
|
||||
f_copy = formats[-1].copy()
|
||||
f_copy.update(f)
|
||||
f = f_copy
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
|
||||
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': rudo_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -29,7 +29,8 @@ class BRIE(InfoExtractor):
|
||||
'duration': 180,
|
||||
'uploader': 'Reinhard Weber',
|
||||
'upload_date': '20150422',
|
||||
}
|
||||
},
|
||||
'skip': '404 not found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
|
||||
@@ -40,7 +41,8 @@ class BRIE(InfoExtractor):
|
||||
'title': 'Manfred Schreiber ist tot',
|
||||
'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
|
||||
'duration': 26,
|
||||
}
|
||||
},
|
||||
'skip': '404 not found',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
|
||||
@@ -51,7 +53,8 @@ class BRIE(InfoExtractor):
|
||||
'title': 'Kurzweilig und sehr bewegend',
|
||||
'description': 'md5:0351996e3283d64adeb38ede91fac54e',
|
||||
'duration': 296,
|
||||
}
|
||||
},
|
||||
'skip': '404 not found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
|
||||
|
@@ -26,6 +26,8 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
clean_html,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -90,6 +92,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'description': 'md5:363109c02998fee92ec02211bd8000df',
|
||||
'uploader': 'National Ballet of Canada',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
},
|
||||
{
|
||||
# test flv videos served by akamaihd.net
|
||||
@@ -108,7 +111,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
# playlist test
|
||||
# playlist with 'videoList'
|
||||
# from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||
'info_dict': {
|
||||
@@ -117,6 +120,15 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
{
|
||||
# playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965)
|
||||
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
|
||||
'info_dict': {
|
||||
'id': '1522758701001',
|
||||
'title': 'Lesson 08',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
},
|
||||
]
|
||||
FLV_VCODECS = {
|
||||
1: 'SORENSON',
|
||||
@@ -298,13 +310,19 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
info_url, player_key, 'Downloading playlist information')
|
||||
|
||||
json_data = json.loads(playlist_info)
|
||||
if 'videoList' not in json_data:
|
||||
raise ExtractorError('Empty playlist')
|
||||
if 'videoList' in json_data:
|
||||
playlist_info = json_data['videoList']
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||
playlist_dto = playlist_info['mediaCollectionDTO']
|
||||
elif 'playlistTabs' in json_data:
|
||||
playlist_info = json_data['playlistTabs']
|
||||
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
|
||||
else:
|
||||
raise ExtractorError('Empty playlist')
|
||||
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
playlist_title=playlist_dto['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
video_id = compat_str(video_info['id'])
|
||||
@@ -528,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
container = source.get('container')
|
||||
source_type = source.get('type')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if source_type == 'application/x-mpegURL' or container == 'M2TS':
|
||||
if ext == 'ism':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'application/dash+xml':
|
||||
elif ext == 'mpd':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
||||
@@ -551,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'container': container,
|
||||
'ext': container.lower(),
|
||||
'ext': ext or container.lower(),
|
||||
}
|
||||
if width == 0 and height == 0:
|
||||
f.update({
|
||||
@@ -585,6 +605,13 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
@@ -597,7 +624,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': json_data.get('description'),
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
|
@@ -5,6 +5,7 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .facebook import FacebookIE
|
||||
|
||||
|
||||
class BuzzFeedIE(InfoExtractor):
|
||||
@@ -20,11 +21,11 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'aVCR29aE_OQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Angry Ram destroys a punching bag..',
|
||||
'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
|
||||
'upload_date': '20141024',
|
||||
'uploader_id': 'Buddhanz1',
|
||||
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
|
||||
'uploader': 'Buddhanz',
|
||||
'title': 'Angry Ram destroys a punching bag',
|
||||
'uploader': 'Angry Ram',
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
@@ -41,13 +42,30 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'mVmBL8B-In0',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'description': 're:© 2014 Munchkin the',
|
||||
'uploader': 're:^Munchkin the',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
|
||||
'info_dict': {
|
||||
'id': 'the-most-adorable-crash-landing-ever',
|
||||
'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
|
||||
'description': 'This gosling knows how to stick a landing.',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '763ca415512f91ca62e4621086900a23',
|
||||
'info_dict': {
|
||||
'id': '971793786185728',
|
||||
'ext': 'mp4',
|
||||
'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
|
||||
'uploader': 'Calgary Outdoor Centre-University of Calgary',
|
||||
},
|
||||
}],
|
||||
'add_ie': ['Facebook'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -66,6 +84,10 @@ class BuzzFeedIE(InfoExtractor):
|
||||
continue
|
||||
entries.append(self.url_result(video['url']))
|
||||
|
||||
facebook_url = FacebookIE._extract_url(webpage)
|
||||
if facebook_url:
|
||||
entries.append(self.url_result(facebook_url))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
|
@@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||
'info_dict': {
|
||||
'id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
@@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,8 +9,10 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
clean_html,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -26,14 +27,14 @@ class CamdemyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'creator': 'ss11spring',
|
||||
'duration': 1591,
|
||||
'upload_date': '20130114',
|
||||
'timestamp': 1358154556,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# With non-empty description
|
||||
# webpage returns "No permission or not login"
|
||||
'url': 'http://www.camdemy.com/media/13885',
|
||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||
'info_dict': {
|
||||
@@ -41,64 +42,71 @@ class CamdemyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'EverCam + Camdemy QuickStart',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
||||
'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
|
||||
'creator': 'evercam',
|
||||
'upload_date': '20140620',
|
||||
'timestamp': 1403271569,
|
||||
'duration': 318,
|
||||
}
|
||||
}, {
|
||||
# External source
|
||||
# External source (YouTube)
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
||||
'info_dict': {
|
||||
'id': '2vsYQzNIsJo',
|
||||
'ext': 'mp4',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'upload_date': '20130211',
|
||||
'uploader': 'Hun Kim',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'uploader_id': 'hunkimtutorials',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
src_from = self._html_search_regex(
|
||||
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
||||
'external source', default=None)
|
||||
r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
|
||||
webpage, 'external source', default=None, group='url')
|
||||
if src_from:
|
||||
return self.url_result(src_from)
|
||||
|
||||
oembed_obj = self._download_json(
|
||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||
|
||||
title = oembed_obj['title']
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Filelist XML')
|
||||
video_id, 'Downloading filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'creation time', fatal=False),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'view count', fatal=False))
|
||||
# Some URLs return "No permission or not login" in a webpage despite being
|
||||
# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'>published on ([^<]+)<', webpage,
|
||||
'upload date', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
|
||||
webpage, 'view count', default=None))
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, default=None) or clean_html(
|
||||
oembed_obj.get('description'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': oembed_obj['title'],
|
||||
'title': title,
|
||||
'thumbnail': thumb_url,
|
||||
'description': self._html_search_meta('description', page),
|
||||
'creator': oembed_obj['author_name'],
|
||||
'duration': oembed_obj['duration'],
|
||||
'timestamp': timestamp,
|
||||
'description': description,
|
||||
'creator': oembed_obj.get('author_name'),
|
||||
'duration': parse_duration(oembed_obj.get('duration')),
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
qualities,
|
||||
int_or_none,
|
||||
)
|
||||
@@ -16,24 +16,38 @@ from ..utils import (
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
||||
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:
|
||||
(?:(?:www|m)\.)?canalplus\.fr|
|
||||
(?:www\.)?piwiplus\.fr|
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||
)
|
||||
|
||||
'''
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
|
||||
_SITE_ID_MAP = {
|
||||
'canalplus.fr': 'cplus',
|
||||
'piwiplus.fr': 'teletoon',
|
||||
'd8.tv': 'd8',
|
||||
'itele.fr': 'itele',
|
||||
'canalplus': 'cplus',
|
||||
'piwiplus': 'teletoon',
|
||||
'd8': 'd8',
|
||||
'd17': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092',
|
||||
'md5': '12164a6f14ff6df8bd628e8ba9b10b78',
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||
'md5': '41f438a4904f7664b91b4ed0dec969dc',
|
||||
'info_dict': {
|
||||
'id': '1263092',
|
||||
'id': '1192814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Zapping - 13/05/15',
|
||||
'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',
|
||||
'upload_date': '20150513',
|
||||
'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
|
||||
'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
|
||||
'upload_date': '20150105',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||
@@ -46,35 +60,45 @@ class CanalplusIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
}, {
|
||||
'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
|
||||
'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
|
||||
'info_dict': {
|
||||
'id': '966289',
|
||||
'ext': 'flv',
|
||||
'title': 'Campagne intime - Documentaire exceptionnel',
|
||||
'description': 'md5:d2643b799fb190846ae09c61e59a859f',
|
||||
'upload_date': '20131108',
|
||||
},
|
||||
'skip': 'videos get deleted after a while',
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
|
||||
'md5': '38b8f7934def74f0d6f3ba6c036a5f82',
|
||||
'info_dict': {
|
||||
'id': '1213714',
|
||||
'id': '1390231',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
|
||||
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
|
||||
'upload_date': '20150211',
|
||||
'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
|
||||
'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
|
||||
'upload_date': '20160512',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
|
||||
'info_dict': {
|
||||
'id': '1398334',
|
||||
'ext': 'mp4',
|
||||
'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
|
||||
'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
|
||||
'upload_date': '20160607',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.groupdict().get('id')
|
||||
video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
|
||||
|
||||
site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal']
|
||||
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
|
||||
|
||||
# Beware, some subclasses do not define an id group
|
||||
display_id = url_basename(mobj.group('path'))
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
88
youtube_dl/extractor/carambatv.py
Normal file
88
youtube_dl/extractor/carambatv.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class CarambaTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://video1.carambatv.ru/v/191910501',
|
||||
'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a',
|
||||
'info_dict': {
|
||||
'id': '191910501',
|
||||
'ext': 'mp4',
|
||||
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 2678.31,
|
||||
},
|
||||
}, {
|
||||
'url': 'carambatv:191910501',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id,
|
||||
video_id)
|
||||
|
||||
title = video['title']
|
||||
|
||||
base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id
|
||||
|
||||
formats = [{
|
||||
'url': base_url + f['fn'],
|
||||
'height': int_or_none(f.get('height')),
|
||||
'format_id': '%sp' % f['height'] if f.get('height') else None,
|
||||
} for f in video['qualities'] if f.get('fn')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = video.get('splash')
|
||||
duration = float_or_none(try_get(
|
||||
video, lambda x: x['annotations'][0]['end_time'], compat_str))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class CarambaTVPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '191910501',
|
||||
'ext': 'mp4',
|
||||
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2678.31,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._og_search_property('video:iframe', webpage, default=None)
|
||||
|
||||
if not video_url:
|
||||
video_id = self._search_regex(
|
||||
r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)',
|
||||
webpage, 'video id')
|
||||
video_url = 'carambatv:%s' % video_id
|
||||
|
||||
return self.url_result(video_url, CarambaTVIE.ie_key())
|
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,8 +27,22 @@ class CBCIE(InfoExtractor):
|
||||
'upload_date': '20160203',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# with clipId
|
||||
# with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
|
||||
'md5': '162adfa070274b144f4fdc3c3b8207db',
|
||||
'info_dict': {
|
||||
'id': '2414435309',
|
||||
'ext': 'mp4',
|
||||
'title': '22 Minutes Update: What Not To Wear Quebec',
|
||||
'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
|
||||
'upload_date': '20131025',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'timestamp': 1382717907,
|
||||
},
|
||||
}, {
|
||||
# with clipId, feed only available via tpfeed.cbc.ca
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||
'info_dict': {
|
||||
@@ -64,6 +80,7 @@ class CBCIE(InfoExtractor):
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}],
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -81,6 +98,12 @@ class CBCIE(InfoExtractor):
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
feed = self._download_json(
|
||||
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
|
||||
clip_id, fatal=False)
|
||||
if feed:
|
||||
media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
|
||||
if not media_id:
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
@@ -104,6 +127,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
|
@@ -1,15 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class CBSBaseIE(ThePlatformIE):
|
||||
class CBSBaseIE(ThePlatformFeedIE):
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
||||
return {
|
||||
@@ -19,9 +17,22 @@ class CBSBaseIE(ThePlatformIE):
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
def _extract_video_info(self, filter_query, video_id):
|
||||
return self._extract_feed_info(
|
||||
'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: {
|
||||
'series': entry.get('cbs$SeriesTitle'),
|
||||
'season_number': int_or_none(entry.get('cbs$SeasonNumber')),
|
||||
'episode': entry.get('cbs$EpisodeTitle'),
|
||||
'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')),
|
||||
}, {
|
||||
'StreamPack': {
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
@@ -36,25 +47,7 @@ class CBSIE(CBSBaseIE):
|
||||
'upload_date': '20131127',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
|
||||
'info_dict': {
|
||||
'id': 'WWF_5KqY3PK1',
|
||||
'display_id': 'st-vincent',
|
||||
'ext': 'flv',
|
||||
'title': 'Live on Letterman - St. Vincent',
|
||||
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
|
||||
'duration': 3221,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'_skip': 'Blocked outside the US',
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
@@ -66,43 +59,5 @@ class CBSIE(CBSBaseIE):
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
webpage, 'content id')
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for item in items_data.findall('.//item'):
|
||||
pid = xpath_text(item, 'pid')
|
||||
if not pid:
|
||||
continue
|
||||
tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
|
||||
if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
|
||||
tp_release_url += '&manifest=m3u'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id)
|
||||
info.update({
|
||||
'id': content_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
content_id = self._match_id(url)
|
||||
return self._extract_video_info('byGuid=%s' % content_id, content_id)
|
||||
|
@@ -80,9 +80,6 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
|
||||
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
||||
formats, subtitles = [], {}
|
||||
if site == 'cnet':
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id)
|
||||
for (fkey, vid) in vdata['files'].items():
|
||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
||||
continue
|
||||
@@ -94,7 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
||||
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
@@ -26,13 +26,17 @@ class CBSNewsIE(CBSBaseIE):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Subscribers only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
'info_dict': {
|
||||
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
|
||||
'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||
'description': 'md5:4a6983e480542d8b333a947bfc64ddc7',
|
||||
'upload_date': '19700101',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 205,
|
||||
'subtitles': {
|
||||
@@ -58,37 +62,15 @@ class CBSNewsIE(CBSBaseIE):
|
||||
webpage, 'video JSON info'), video_id)
|
||||
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
title = item.get('articleTitle') or item.get('hed')
|
||||
duration = item.get('duration')
|
||||
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
||||
pid = item.get('media' + format_id)
|
||||
if not pid:
|
||||
continue
|
||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
guid = item['mpxRefId']
|
||||
return self._extract_video_info('byGuid=%s' % guid, guid)
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
@@ -96,7 +78,15 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
}
|
||||
'skip': 'Video gone, redirected to http://www.cbsnews.com/live/',
|
||||
}, {
|
||||
'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/',
|
||||
'info_dict': {
|
||||
'id': 'video-shows-intense-paragliding-accident',
|
||||
'ext': 'flv',
|
||||
'title': 'Video Shows Intense Paragliding Accident',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -1,30 +1,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .cbs import CBSBaseIE
|
||||
|
||||
|
||||
class CBSSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||
class CBSSportsIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
|
||||
'info_dict': {
|
||||
'id': '_d5_GbO8p1sT',
|
||||
'ext': 'flv',
|
||||
'title': 'US Open flashbacks: 1990s',
|
||||
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||
'id': '708337219968',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ben Simmons the next LeBron? Not so fast',
|
||||
'description': 'md5:854294f627921baba1f4b9a990d87197',
|
||||
'timestamp': 1466293740,
|
||||
'upload_date': '20160618',
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
section = mobj.group('section')
|
||||
video_id = mobj.group('id')
|
||||
all_videos = self._download_json(
|
||||
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||
video_id)
|
||||
# The json file contains the info of all the videos in the section
|
||||
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
||||
|
@@ -58,7 +58,8 @@ class CDAIE(InfoExtractor):
|
||||
def extract_format(page, version):
|
||||
unpacked = decode_packed_codes(page)
|
||||
format_url = self._search_regex(
|
||||
r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False)
|
||||
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
|
||||
'%s url' % version, fatal=False, group='url')
|
||||
if not format_url:
|
||||
return
|
||||
f = {
|
||||
@@ -75,7 +76,8 @@ class CDAIE(InfoExtractor):
|
||||
info_dict['formats'].append(f)
|
||||
if not info_dict['duration']:
|
||||
info_dict['duration'] = parse_duration(self._search_regex(
|
||||
r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False))
|
||||
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
|
||||
unpacked, 'duration', fatal=False, group='duration'))
|
||||
|
||||
extract_format(webpage, 'default')
|
||||
|
||||
|
@@ -20,10 +20,9 @@ class Channel9IE(InfoExtractor):
|
||||
'''
|
||||
IE_DESC = 'Channel 9'
|
||||
IE_NAME = 'channel9'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
|
||||
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
_TESTS = [{
|
||||
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
|
||||
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
|
||||
'info_dict': {
|
||||
@@ -36,10 +35,10 @@ class Channel9IE(InfoExtractor):
|
||||
'session_code': 'KOS002',
|
||||
'session_day': 'Day 1',
|
||||
'session_room': 'Arena 1A',
|
||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'],
|
||||
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
|
||||
'Mads Kristensen'],
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
|
||||
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
|
||||
'info_dict': {
|
||||
@@ -51,8 +50,7 @@ class Channel9IE(InfoExtractor):
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'authors': ['Mike Wilmot'],
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
# low quality mp4 is best
|
||||
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
|
||||
'info_dict': {
|
||||
@@ -66,8 +64,20 @@ class Channel9IE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
|
||||
'info_dict': {
|
||||
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
|
||||
'title': 'Channel 9',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
|
||||
|
||||
@@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor):
|
||||
|
||||
return self.playlist_result(contents)
|
||||
|
||||
def _extract_list(self, content_path):
|
||||
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
|
||||
def _extract_list(self, video_id, rss_url=None):
|
||||
if not rss_url:
|
||||
rss_url = self._RSS_URL % video_id
|
||||
rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
|
||||
entries = [self.url_result(session_url.text, 'Channel9')
|
||||
for session_url in rss.findall('./channel/item/link')]
|
||||
title_text = rss.find('./channel/title').text
|
||||
return self.playlist_result(entries, content_path, title_text)
|
||||
return self.playlist_result(entries, video_id, title_text)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
content_path = mobj.group('contentpath')
|
||||
rss = mobj.group('rss')
|
||||
|
||||
webpage = self._download_webpage(url, content_path, 'Downloading web page')
|
||||
if rss:
|
||||
return self._extract_list(content_path, url)
|
||||
|
||||
page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
|
||||
if page_type_m is not None:
|
||||
page_type = page_type_m.group('pagetype')
|
||||
webpage = self._download_webpage(
|
||||
url, content_path, 'Downloading web page')
|
||||
|
||||
page_type = self._search_regex(
|
||||
r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
|
||||
webpage, 'page type', default=None, group='pagetype')
|
||||
if page_type:
|
||||
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
|
||||
return self._extract_entry_item(webpage, content_path)
|
||||
elif page_type == 'Session': # Event session page, may contain downloadable content
|
||||
@@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor):
|
||||
return self._extract_list(content_path)
|
||||
else:
|
||||
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
|
||||
|
||||
else: # Assuming list
|
||||
return self._extract_list(content_path)
|
||||
|
@@ -23,7 +23,7 @@ class CliphunterIE(InfoExtractor):
|
||||
(?P<id>[0-9]+)/
|
||||
(?P<seo>.+?)(?:$|[#\?])
|
||||
'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
||||
'info_dict': {
|
||||
@@ -32,8 +32,19 @@ class CliphunterIE(InfoExtractor):
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
|
||||
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
|
||||
'info_dict': {
|
||||
'id': '2019449',
|
||||
'ext': 'mp4',
|
||||
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -1,16 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
from .onet import OnetBaseIE
|
||||
|
||||
|
||||
class ClipRsIE(InfoExtractor):
|
||||
class ClipRsIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
||||
_TEST = {
|
||||
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
||||
@@ -27,64 +21,13 @@ class ClipRsIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||
mvp_id = self._search_mvp_id(webpage)
|
||||
|
||||
response = self._download_json(
|
||||
'http://qi.ckm.onetapi.pl/', video_id,
|
||||
query={
|
||||
'body[id]': video_id,
|
||||
'body[jsonrpc]': '2.0',
|
||||
'body[method]': 'get_asset_detail',
|
||||
'body[params][ID_Publikacji]': video_id,
|
||||
'body[params][Service]': 'www.onet.pl',
|
||||
'content-type': 'application/jsonp',
|
||||
'x-onet-app': 'player.front.onetapi.pl',
|
||||
})
|
||||
info_dict = self._extract_from_id(mvp_id, webpage)
|
||||
info_dict['display_id'] = display_id
|
||||
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error['message']), expected=True)
|
||||
|
||||
video = response['result'].get('0')
|
||||
|
||||
formats = []
|
||||
for _, formats_dict in video['formats'].items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_list in formats_dict.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for f in format_list:
|
||||
if not f.get('url'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(f.get('vertical_resolution')),
|
||||
'width': int_or_none(f.get('horizontal_resolution')),
|
||||
'abr': float_or_none(f.get('audio_bitrate')),
|
||||
'vbr': float_or_none(f.get('video_bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta = video.get('meta', {})
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
||||
duration = meta.get('length') or meta.get('lenght')
|
||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
return info_dict
|
||||
|
92
youtube_dl/extractor/closertotruth.py
Normal file
92
youtube_dl/extractor/closertotruth.py
Normal file
@@ -0,0 +1,92 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CloserToTruthIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
|
||||
'info_dict': {
|
||||
'id': '0_zof1ktre',
|
||||
'display_id': 'solutions-the-mind-body-problem',
|
||||
'ext': 'mov',
|
||||
'title': 'Solutions to the Mind-Body Problem?',
|
||||
'upload_date': '20140221',
|
||||
'timestamp': 1392956007,
|
||||
'uploader_id': 'CTTXML'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://closertotruth.com/episodes/how-do-brains-work',
|
||||
'info_dict': {
|
||||
'id': '0_iuxai6g6',
|
||||
'display_id': 'how-do-brains-work',
|
||||
'ext': 'mov',
|
||||
'title': 'How do Brains Work?',
|
||||
'upload_date': '20140221',
|
||||
'timestamp': 1392956024,
|
||||
'uploader_id': 'CTTXML'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://closertotruth.com/interviews/1725',
|
||||
'info_dict': {
|
||||
'id': '1725',
|
||||
'title': 'AyaFr-002',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
|
||||
webpage, 'kaltura partner_id')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
|
||||
|
||||
select = self._search_regex(
|
||||
r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
|
||||
webpage, 'select version', default=None)
|
||||
if select:
|
||||
entry_ids = set()
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
|
||||
webpage):
|
||||
entry_id = mobj.group('id')
|
||||
if entry_id in entry_ids:
|
||||
continue
|
||||
entry_ids.add(entry_id)
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'title': mobj.group('title'),
|
||||
})
|
||||
if entries:
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
|
||||
entry_id = self._search_regex(
|
||||
r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
|
||||
webpage, 'kaltura entry_id', group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'title': title
|
||||
}
|
@@ -6,7 +6,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,17 +16,16 @@ from ..utils import (
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
_IE_DESC = 'cloudy.ec and videoraj.ch'
|
||||
_IE_DESC = 'cloudy.ec'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
|
||||
https?://(?:www\.)?cloudy\.ec/
|
||||
(?:v/|embed\.php\?id=)
|
||||
(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
||||
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
||||
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
|
||||
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
|
||||
_MAX_TRIES = 2
|
||||
_TESTS = [
|
||||
{
|
||||
_TEST = {
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||
'info_dict': {
|
||||
@@ -35,19 +33,9 @@ class CloudyIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.videoraj.to/v/47f399fd8bb60',
|
||||
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
|
||||
'info_dict': {
|
||||
'id': '47f399fd8bb60',
|
||||
'ext': 'flv',
|
||||
'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
|
||||
def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
|
||||
|
||||
if try_num > self._MAX_TRIES - 1:
|
||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||
@@ -64,9 +52,8 @@ class CloudyIE(InfoExtractor):
|
||||
'errorUrl': error_url,
|
||||
})
|
||||
|
||||
data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form))
|
||||
player_data = self._download_webpage(
|
||||
data_url, video_id, 'Downloading player data')
|
||||
self._API_URL, video_id, 'Downloading player data', query=form)
|
||||
data = compat_parse_qs(player_data)
|
||||
|
||||
try_num += 1
|
||||
@@ -88,7 +75,7 @@ class CloudyIE(InfoExtractor):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||
return self._extract_video(video_host, video_id, file_key, video_url, try_num)
|
||||
return self._extract_video(video_id, file_key, video_url, try_num)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -98,14 +85,13 @@ class CloudyIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = self._EMBED_URL % (video_host, video_id)
|
||||
url = self._EMBED_URL % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||
webpage, 'file_key')
|
||||
|
||||
return self._extract_video(video_host, video_id, file_key)
|
||||
return self._extract_video(video_id, file_key)
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
@@ -16,7 +18,27 @@ class CMTIE(MTVIE):
|
||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
'description': 'Blame It All On My Roots',
|
||||
},
|
||||
'skip': 'Video not available',
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908',
|
||||
'md5': 'e61a801ca4a183a466c08bd98dccbb1c',
|
||||
'info_dict': {
|
||||
'id': '1504699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Still The King Ep. 109 in 3 Minutes',
|
||||
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
|
||||
'timestamp': 1469421000.0,
|
||||
'upload_date': '20160725',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
if 'error_not_available.swf' in rtmp_video_url:
|
||||
raise ExtractorError(
|
||||
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
||||
|
||||
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
|
@@ -1,17 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
@@ -26,8 +15,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
'ext': 'mp4',
|
||||
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
|
||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
'timestamp': 1376798400,
|
||||
'upload_date': '20130818',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||
@@ -35,238 +26,73 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'The Daily Show / The Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow
|
||||
# urls for episodes like:
|
||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/
|
||||
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
|
||||
(?:/[^/?#]?|[?#]|$))))
|
||||
'''
|
||||
class ToshIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'Tosh.0'
|
||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'info_dict': {
|
||||
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'title': 'Twitter Users Share Summer Plans',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||
'info_dict': {
|
||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121213',
|
||||
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow kristen-stewart part 1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
|
||||
'info_dict': {
|
||||
'id': 'sarah-chayes-extended-interview',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'title': 'thedailyshow Sarah Chayes Extended Interview',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
|
||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
# It's really reported to be published on year 2077
|
||||
'upload_date': '20770610',
|
||||
'timestamp': 3390510600,
|
||||
'subtitles': {
|
||||
'en': 'mincount:3',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm')
|
||||
return new_urls
|
||||
|
||||
_video_extensions = {
|
||||
'3500': 'mp4',
|
||||
'2200': 'mp4',
|
||||
'1700': 'mp4',
|
||||
'1200': 'mp4',
|
||||
'750': 'mp4',
|
||||
'400': 'mp4',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'3500': (1280, 720),
|
||||
'2200': (960, 540),
|
||||
'1700': (768, 432),
|
||||
'1200': (640, 360),
|
||||
'750': (512, 288),
|
||||
'400': (384, 216),
|
||||
}
|
||||
|
||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if mobj.group('shortname'):
|
||||
return self.url_result('http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if mobj.group('clip'):
|
||||
if mobj.group('videotitle'):
|
||||
epTitle = mobj.group('videotitle')
|
||||
elif mobj.group('showname') == 'thedailyshow':
|
||||
epTitle = mobj.group('tdstitle')
|
||||
else:
|
||||
epTitle = mobj.group('cntitle')
|
||||
dlNewest = False
|
||||
elif mobj.group('interview'):
|
||||
epTitle = mobj.group('interview_title')
|
||||
dlNewest = False
|
||||
else:
|
||||
dlNewest = not mobj.group('episode')
|
||||
if dlNewest:
|
||||
epTitle = mobj.group('showname')
|
||||
else:
|
||||
epTitle = mobj.group('episode')
|
||||
show_name = mobj.group('showname')
|
||||
mrss_url = self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'mrss url', group='url')
|
||||
|
||||
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
if dlNewest:
|
||||
url = htmlHandle.geturl()
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
||||
if mobj.group('episode') == '':
|
||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
||||
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
|
||||
|
||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||
if len(mMovieParams) == 0:
|
||||
# The Colbert Report embeds the information in a without
|
||||
# a URL prefix; so extract the alternate reference
|
||||
# and then add the URL prefix manually.
|
||||
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
||||
if len(altMovieParams) == 0:
|
||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
||||
else:
|
||||
mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
# Correct cc.com in uri
|
||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
|
||||
|
||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri}))
|
||||
idoc = self._download_xml(
|
||||
index_url, epTitle,
|
||||
'Downloading show index', 'Unable to download episode index')
|
||||
|
||||
title = idoc.find('./channel/title').text
|
||||
description = idoc.find('./channel/description').text
|
||||
|
||||
entries = []
|
||||
item_els = idoc.findall('.//item')
|
||||
for part_num, itemEl in enumerate(item_els):
|
||||
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
||||
|
||||
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
duration = float_or_none(content.attrib.get('duration'))
|
||||
mediagen_url = content.attrib['url']
|
||||
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
||||
|
||||
cdoc = self._download_xml(
|
||||
mediagen_url, epTitle,
|
||||
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
||||
|
||||
turls = []
|
||||
for rendition in cdoc.findall('.//rendition'):
|
||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||
turls.append(finfo)
|
||||
|
||||
formats = []
|
||||
for format, rtmp_video_url in turls:
|
||||
w, h = self._video_dimensions.get(format, (None, None))
|
||||
formats.append({
|
||||
'format_id': 'vhttp-%s' % format,
|
||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self._extract_subtitles(cdoc, guid)
|
||||
|
||||
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||
entries.append({
|
||||
'id': guid,
|
||||
'title': virtual_id,
|
||||
'formats': formats,
|
||||
'uploader': show_name,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': epTitle,
|
||||
'entries': entries,
|
||||
'title': show_name + ' ' + title,
|
||||
'description': description,
|
||||
}
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
@@ -44,7 +44,9 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
determine_protocol,
|
||||
@@ -52,6 +54,9 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
parse_m3u8_attributes,
|
||||
extract_attributes,
|
||||
parse_codecs,
|
||||
)
|
||||
|
||||
|
||||
@@ -159,6 +164,7 @@ class InfoExtractor(object):
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -747,10 +753,12 @@ class InfoExtractor(object):
|
||||
return self._og_search_property('url', html, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
||||
if not isinstance(name, (list, tuple)):
|
||||
name = [name]
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
display_name = name[0]
|
||||
return self._html_search_regex(
|
||||
self._meta_regex(name),
|
||||
[self._meta_regex(n) for n in name],
|
||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
@@ -799,15 +807,17 @@ class InfoExtractor(object):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
def _search_json_ld(self, html, video_id, **kwargs):
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
if not json_ld:
|
||||
return {}
|
||||
return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
|
||||
return self._json_ld(
|
||||
json_ld, video_id, fatal=kwargs.get('fatal', True),
|
||||
expected_type=expected_type)
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True):
|
||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
if isinstance(json_ld, compat_str):
|
||||
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
||||
if not json_ld:
|
||||
@@ -815,6 +825,8 @@ class InfoExtractor(object):
|
||||
info = {}
|
||||
if json_ld.get('@context') == 'http://schema.org':
|
||||
item_type = json_ld.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type == 'TVEpisode':
|
||||
info.update({
|
||||
'episode': unescapeHTML(json_ld.get('name')),
|
||||
@@ -833,6 +845,19 @@ class InfoExtractor(object):
|
||||
'title': unescapeHTML(json_ld.get('headline')),
|
||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
info.update({
|
||||
'url': json_ld.get('contentUrl'),
|
||||
'title': unescapeHTML(json_ld.get('name')),
|
||||
'description': unescapeHTML(json_ld.get('description')),
|
||||
'thumbnail': json_ld.get('thumbnailUrl'),
|
||||
'duration': parse_duration(json_ld.get('duration')),
|
||||
'timestamp': unified_timestamp(json_ld.get('uploadDate')),
|
||||
'filesize': float_or_none(json_ld.get('contentSize')),
|
||||
'tbr': int_or_none(json_ld.get('bitrate')),
|
||||
'width': int_or_none(json_ld.get('width')),
|
||||
'height': int_or_none(json_ld.get('height')),
|
||||
})
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
@@ -874,7 +899,11 @@ class InfoExtractor(object):
|
||||
f['ext'] = determine_ext(f['url'])
|
||||
|
||||
if isinstance(field_preference, (list, tuple)):
|
||||
return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
|
||||
return tuple(
|
||||
f.get(field)
|
||||
if f.get(field) is not None
|
||||
else ('' if field == 'format_id' else -1)
|
||||
for field in field_preference)
|
||||
|
||||
preference = f.get('preference')
|
||||
if preference is None:
|
||||
@@ -987,7 +1016,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
fatal=True, m3u8_id=None):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest',
|
||||
@@ -1001,11 +1030,11 @@ class InfoExtractor(object):
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
|
||||
|
||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
fatal=True, m3u8_id=None):
|
||||
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
||||
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
||||
if akamai_pv is not None and ';' in akamai_pv.text:
|
||||
@@ -1029,9 +1058,26 @@ class InfoExtractor(object):
|
||||
'base URL', default=None)
|
||||
if base_url:
|
||||
base_url = base_url.strip()
|
||||
|
||||
bootstrap_info = xpath_element(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||
'bootstrap info', default=None)
|
||||
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
width = int_or_none(media_el.attrib.get('width'))
|
||||
height = int_or_none(media_el.attrib.get('height'))
|
||||
format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
|
||||
# If <bootstrapInfo> is present, the specified f4m is a
|
||||
# stream-level manifest, and only set-level manifests may refer to
|
||||
# external resources. See section 11.4 and section 4 of F4M spec
|
||||
if bootstrap_info is None:
|
||||
media_url = None
|
||||
# @href is introduced in 2.0, see section 11.6 of F4M spec
|
||||
if manifest_version == '2.0':
|
||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||
media_url = media_el.attrib.get('href')
|
||||
if media_url is None:
|
||||
media_url = media_el.attrib.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
manifest_url = (
|
||||
@@ -1041,19 +1087,37 @@ class InfoExtractor(object):
|
||||
# since bitrates in parent manifest (this one) and media_url manifest
|
||||
# may differ leading to inability to resolve the format by requested
|
||||
# bitrate in f4m downloader
|
||||
if determine_ext(manifest_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
ext = determine_ext(manifest_url)
|
||||
if ext == 'f4m':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal))
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
# Sometimes stream-level manifest contains single media entry that
|
||||
# does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
|
||||
# At the same time parent's media entry in set-level manifest may
|
||||
# contain it. We will copy it from parent in such cases.
|
||||
if len(f4m_formats) == 1:
|
||||
f = f4m_formats[0]
|
||||
f.update({
|
||||
'tbr': f.get('tbr') or tbr,
|
||||
'width': f.get('width') or width,
|
||||
'height': f.get('height') or height,
|
||||
'format_id': f.get('format_id') if not tbr else format_id,
|
||||
})
|
||||
formats.extend(f4m_formats)
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', preference=preference,
|
||||
m3u8_id=m3u8_id, fatal=fatal))
|
||||
continue
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||
'format_id': format_id,
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'ext': 'flv' if bootstrap_info is not None else None,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(media_el.attrib.get('width')),
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'preference': preference,
|
||||
})
|
||||
return formats
|
||||
@@ -1114,23 +1178,11 @@ class InfoExtractor(object):
|
||||
}]
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = {}
|
||||
for m in kv_rex.finditer(line):
|
||||
v = m.group('val')
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_info[m.group('key')] = v
|
||||
last_info = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
last_media = {}
|
||||
for m in kv_rex.finditer(line):
|
||||
v = m.group('val')
|
||||
if v.startswith('"'):
|
||||
v = v[1:-1]
|
||||
last_media[m.group('key')] = v
|
||||
last_media = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
@@ -1155,6 +1207,7 @@ class InfoExtractor(object):
|
||||
'url': format_url(line.strip()),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
@@ -1163,24 +1216,17 @@ class InfoExtractor(object):
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
vcodec, acodec = [None] * 2
|
||||
va_codecs = codecs.split(',')
|
||||
if len(va_codecs) == 1:
|
||||
# Audio only entries usually come with single codec and
|
||||
# no resolution. For more robustness we also check it to
|
||||
# be mp4 audio.
|
||||
if not resolution and va_codecs[0].startswith('mp4a'):
|
||||
vcodec, acodec = 'none', va_codecs[0]
|
||||
else:
|
||||
vcodec = va_codecs[0]
|
||||
else:
|
||||
vcodec, acodec = va_codecs[:2]
|
||||
# Unified Streaming Platform
|
||||
mobj = re.search(
|
||||
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||
if mobj:
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
f.update({
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
@@ -1435,6 +1481,13 @@ class InfoExtractor(object):
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
|
||||
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||
"""
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
@@ -1467,8 +1520,16 @@ class InfoExtractor(object):
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
else:
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
@@ -1505,7 +1566,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
representation_attrib = adaptation_set.attrib.copy()
|
||||
representation_attrib.update(representation.attrib)
|
||||
# According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
|
||||
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
||||
mime_type = representation_attrib['mimeType']
|
||||
content_type = mime_type.split('/')[0]
|
||||
if content_type == 'text':
|
||||
@@ -1549,16 +1610,40 @@ class InfoExtractor(object):
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template:
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth')}
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
representation_ms_info['segment_urls'] = []
|
||||
segment_time = 0
|
||||
|
||||
def add_segment_url():
|
||||
representation_ms_info['segment_urls'].append(
|
||||
media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
)
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
add_segment_url()
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += s['d']
|
||||
add_segment_url()
|
||||
segment_time += s['d']
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
@@ -1585,6 +1670,62 @@ class InfoExtractor(object):
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
|
||||
def parse_content_type(content_type):
|
||||
if not content_type:
|
||||
return {}
|
||||
ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
|
||||
if ctr:
|
||||
mimetype, codecs = ctr.groups()
|
||||
f = parse_codecs(codecs)
|
||||
f['ext'] = mimetype2ext(mimetype)
|
||||
return f
|
||||
return {}
|
||||
|
||||
entries = []
|
||||
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = media_attributes.get('src')
|
||||
if src:
|
||||
media_info['formats'].append({
|
||||
'url': absolute_url(src),
|
||||
'vcodec': 'none' if media_type == 'audio' else None,
|
||||
})
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
if media_content:
|
||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||
source_attributes = extract_attributes(source_tag)
|
||||
src = source_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(source_attributes.get('type'))
|
||||
f.update({
|
||||
'url': absolute_url(src),
|
||||
'vcodec': 'none' if media_type == 'audio' else None,
|
||||
})
|
||||
media_info['formats'].append(f)
|
||||
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind == 'subtitles':
|
||||
src = track_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
if media_info['formats']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@@ -1698,6 +1839,13 @@ class InfoExtractor(object):
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def geo_verification_headers(self):
|
||||
headers = {}
|
||||
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
|
||||
if geo_verification_proxy:
|
||||
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||
return headers
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
143
youtube_dl/extractor/coub.py
Normal file
143
youtube_dl/extractor/coub.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class CoubIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://coub.com/view/5u5n1',
|
||||
'info_dict': {
|
||||
'id': '5u5n1',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Matrix Moonwalk',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 4.6,
|
||||
'timestamp': 1428527772,
|
||||
'upload_date': '20150408',
|
||||
'uploader': 'Артём Лоскутников',
|
||||
'uploader_id': 'artyom.loskutnikov',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'coub:5u5n1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# longer video id
|
||||
'url': 'http://coub.com/view/237d5l5h',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
coub = self._download_json(
|
||||
'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
|
||||
|
||||
if coub.get('error'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
|
||||
|
||||
title = coub['title']
|
||||
|
||||
file_versions = coub['file_versions']
|
||||
|
||||
QUALITIES = ('low', 'med', 'high')
|
||||
|
||||
MOBILE = 'mobile'
|
||||
IPHONE = 'iphone'
|
||||
HTML5 = 'html5'
|
||||
|
||||
SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
|
||||
|
||||
quality_key = qualities(QUALITIES)
|
||||
preference_key = qualities(SOURCE_PREFERENCE)
|
||||
|
||||
formats = []
|
||||
|
||||
for kind, items in file_versions.get(HTML5, {}).items():
|
||||
if kind not in ('video', 'audio'):
|
||||
continue
|
||||
if not isinstance(items, dict):
|
||||
continue
|
||||
for quality, item in items.items():
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
item_url = item.get('url')
|
||||
if not item_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': '%s-%s-%s' % (HTML5, kind, quality),
|
||||
'filesize': int_or_none(item.get('size')),
|
||||
'vcodec': 'none' if kind == 'audio' else None,
|
||||
'quality': quality_key(quality),
|
||||
'preference': preference_key(HTML5),
|
||||
})
|
||||
|
||||
iphone_url = file_versions.get(IPHONE, {}).get('url')
|
||||
if iphone_url:
|
||||
formats.append({
|
||||
'url': iphone_url,
|
||||
'format_id': IPHONE,
|
||||
'preference': preference_key(IPHONE),
|
||||
})
|
||||
|
||||
mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
|
||||
if mobile_url:
|
||||
formats.append({
|
||||
'url': mobile_url,
|
||||
'format_id': '%s-audio' % MOBILE,
|
||||
'preference': preference_key(MOBILE),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = coub.get('picture')
|
||||
duration = float_or_none(coub.get('duration'))
|
||||
timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
|
||||
uploader = coub.get('channel', {}).get('title')
|
||||
uploader_id = coub.get('channel', {}).get('permalink')
|
||||
|
||||
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
|
||||
like_count = int_or_none(coub.get('likes_count'))
|
||||
repost_count = int_or_none(coub.get('recoubs_count'))
|
||||
comment_count = int_or_none(coub.get('comments_count'))
|
||||
|
||||
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
|
||||
if age_restricted is not None:
|
||||
age_limit = 18 if age_restricted is True else 0
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor):
|
||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
}]
|
||||
|
||||
|
30
youtube_dl/extractor/ctv.py
Normal file
30
youtube_dl/extractor/ctv.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
'info_dict': {
|
||||
'id': '706966',
|
||||
'ext': 'mp4',
|
||||
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
|
||||
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
|
||||
'upload_date': '20150919',
|
||||
'timestamp': 1442624700,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:ctv_web:%s' % video_id,
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
65
youtube_dl/extractor/ctvnews.py
Normal file
65
youtube_dl/extractor/ctvnews.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import orderedSet
|
||||
|
||||
|
||||
class CTVNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
|
||||
'info_dict': {
|
||||
'id': '901995',
|
||||
'ext': 'mp4',
|
||||
'title': 'Extended: \'That person cannot be me\' Johnson says',
|
||||
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
||||
'timestamp': 1467286284,
|
||||
'upload_date': '20160630',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '1.2966224',
|
||||
},
|
||||
'playlist_mincount': 19,
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '1.2876780',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/1.810401',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
def ninecninemedia_url_result(clip_id):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': clip_id,
|
||||
'url': '9c9media:ctvnews_web:%s' % clip_id,
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
|
||||
if page_id.isdigit():
|
||||
return ninecninemedia_url_result(page_id)
|
||||
else:
|
||||
webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
|
||||
'ot': 'example.AjaxPageLayout.ot',
|
||||
'maxItemsPerPage': 1000000,
|
||||
})
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
|
||||
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
return self.playlist_result(entries, page_id)
|
@@ -5,19 +5,20 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_protocol,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DailyMailIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
|
||||
'md5': '2f639d446394f53f3a33658b518b6615',
|
||||
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
|
||||
'md5': 'f6129624562251f628296c3a9ffde124',
|
||||
'info_dict': {
|
||||
'id': '1288527',
|
||||
'id': '1295863',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turn any video into an impressionist masterpiece',
|
||||
'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
|
||||
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
|
||||
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,7 +27,7 @@ class DailyMailIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||
title = video_data['title']
|
||||
title = unescapeHTML(video_data['title'])
|
||||
video_sources = self._download_json(video_data.get(
|
||||
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
||||
|
||||
@@ -55,7 +56,7 @@ class DailyMailIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('descr'),
|
||||
'description': unescapeHTML(video_data.get('descr')),
|
||||
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -111,6 +112,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
return list(map(lambda m: unescapeHTML(m[1]), matches))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -153,18 +161,19 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
type_ = media.get('type')
|
||||
if type_ == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
ext = determine_ext(media_url)
|
||||
if type_ == 'application/x-mpegURL' or ext == 'm3u8':
|
||||
ext = mimetype2ext(type_) or determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', preference=-1,
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif type_ == 'application/f4m' or ext == 'f4m':
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
'format_id': 'http-%s' % quality,
|
||||
'ext': ext,
|
||||
}
|
||||
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
|
||||
if m:
|
||||
|
@@ -66,22 +66,32 @@ class DaumIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# Requires dte_type=WEB (#9972)
|
||||
'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU',
|
||||
'md5': 'a8917742069a4dd442516b86e7d66529',
|
||||
'info_dict': {
|
||||
'id': 's3794Uf1NZeZ1qMpGpeqeRU',
|
||||
'ext': 'mp4',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회',
|
||||
'upload_date': '20160611',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||
query = compat_urllib_parse_urlencode({'vid': video_id})
|
||||
movie_data = self._download_json(
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
||||
video_id, 'Downloading video formats info')
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json',
|
||||
video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'})
|
||||
|
||||
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
||||
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
||||
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
||||
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
'Downloading video info')
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id,
|
||||
'Downloading video info', query={'vid': video_id})
|
||||
|
||||
formats = []
|
||||
for format_el in movie_data['output_list']['output_list']:
|
||||
|
@@ -4,78 +4,47 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
|
||||
'md5': '2e24f67936517b143a234b4cadf792ec',
|
||||
'info_dict': {
|
||||
'id': '33100',
|
||||
'id': '3649835190001',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1404039863.438,
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'timestamp': 1404039863,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
'uploader_id': '1027729757001',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew']
|
||||
}, {
|
||||
'url': 'http://dbtv.no/3649835190001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/vice/5000634109001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/filmtrailer/3359293614001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
|
||||
|
||||
video = data['playlist'][0]
|
||||
|
||||
formats = [{
|
||||
'url': f['URL'],
|
||||
'vcodec': f.get('container'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'vbr': float_or_none(f.get('rate'), 1000),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
} for f in video['renditions'] if 'URL' in f]
|
||||
|
||||
if not formats:
|
||||
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
|
||||
if url_key in video:
|
||||
formats.append({
|
||||
'url': video[url_key],
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
return {
|
||||
'id': compat_str(video['id']),
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
'thumbnail': video.get('splash') or video.get('thumb'),
|
||||
'timestamp': float_or_none(video.get('publishedAt'), 1000),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'categories': video.get('tags'),
|
||||
'formats': formats,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
@@ -55,30 +55,30 @@ class DCNBaseIE(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_video_formats(self, webpage, video_id, entry_protocol):
|
||||
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
|
||||
formats = []
|
||||
m3u8_url = self._html_search_regex(
|
||||
r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False)
|
||||
if m3u8_url:
|
||||
format_url_base = 'http' + self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url_base + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None))
|
||||
|
||||
rtsp_url = self._search_regex(
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url_base + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class DCNVideoIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
|
||||
'info_dict':
|
||||
{
|
||||
@@ -94,7 +94,10 @@ class DCNVideoIE(DCNBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -120,7 +123,7 @@ class DCNVideoIE(DCNBaseIE):
|
||||
|
||||
class DCNLiveIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
@@ -147,7 +150,7 @@ class DCNLiveIE(DCNBaseIE):
|
||||
|
||||
class DCNSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'dcn:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
|
||||
'info_dict':
|
||||
|
@@ -17,8 +17,12 @@ class DreiSatIE(ZDFIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'uploader': '3sat',
|
||||
'uploader': 'SCHWEIZWEIT',
|
||||
'uploader_id': '100000210',
|
||||
'upload_date': '20140913'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@@ -2,13 +2,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
IE_NAME = 'dw'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
|
||||
@@ -31,6 +34,18 @@ class DWIE(InfoExtractor):
|
||||
'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
|
||||
'upload_date': '20160311',
|
||||
}
|
||||
}, {
|
||||
# DW documentaries, only last for one or two weeks
|
||||
'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
|
||||
'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
|
||||
'info_dict': {
|
||||
'id': '19274438',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to the 90s – Hip Hop',
|
||||
'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
|
||||
'upload_date': '20160521',
|
||||
},
|
||||
'skip': 'Video removed',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -38,6 +53,7 @@ class DWIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
title = hidden_inputs['media_title']
|
||||
media_id = hidden_inputs.get('media_id') or media_id
|
||||
|
||||
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
||||
formats = self._extract_smil_formats(
|
||||
@@ -49,13 +65,20 @@ class DWIE(InfoExtractor):
|
||||
else:
|
||||
formats = [{'url': hidden_inputs['file_name']}]
|
||||
|
||||
upload_date = hidden_inputs.get('display_date')
|
||||
if not upload_date:
|
||||
upload_date = self._html_search_regex(
|
||||
r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
|
||||
'upload date', default=None)
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': hidden_inputs.get('preview_image'),
|
||||
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
||||
'upload_date': hidden_inputs.get('display_date'),
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor):
|
||||
'skip': 'Georestricted',
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
@staticmethod
|
||||
def _handle_error(response):
|
||||
status = int_or_none(response.get('status', 200))
|
||||
|
@@ -6,12 +6,13 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
)
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
|
||||
'md5': '4294cf98bc165f218aaa0b89e0fd8042',
|
||||
'info_dict': {
|
||||
@@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor):
|
||||
'timestamp': 1428035648,
|
||||
'upload_date': '20150403',
|
||||
'uploader_id': 'batchUser',
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# not available via http://widgets.ellentube.com/
|
||||
'url': 'http://www.ellentv.com/videos/1-szkgu2m2/',
|
||||
'info_dict': {
|
||||
'id': '1_szkgu2m2',
|
||||
'ext': 'flv',
|
||||
'title': "Ellen's Amazingly Talented Audience",
|
||||
'description': 'md5:86ff1e376ff0d717d7171590e273f0a5',
|
||||
'timestamp': 1255140900,
|
||||
'upload_date': '20091010',
|
||||
'uploader_id': 'ellenkaltura@gmail.com',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url)
|
||||
|
||||
for num, url_ in enumerate(URLS, 1):
|
||||
webpage = self._download_webpage(
|
||||
'http://widgets.ellentube.com/videos/%s' % video_id,
|
||||
video_id)
|
||||
url_, video_id, fatal=num == len(URLS))
|
||||
|
||||
default = NO_DEFAULT if num == len(URLS) else None
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id')
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id',
|
||||
default=default)
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
[r'id="kaltura_player_([^"]+)"',
|
||||
r"_wb_entry_id\s*:\s*'([^']+)",
|
||||
r'data-kaltura-entry-id="([^"]+)'],
|
||||
webpage, 'kaltura id')
|
||||
webpage, 'kaltura id', default=default)
|
||||
|
||||
if partner_id and kaltura_id:
|
||||
break
|
||||
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
|
||||
|
||||
|
@@ -4,54 +4,100 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
'info_dict': {
|
||||
'id': '95008',
|
||||
'id': 'qlDUmNsj6VS',
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
'duration': 1838,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?) - EPORNER', webpage, 'title')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
|
||||
player_code = self._download_webpage(
|
||||
redirect_url, display_id, note='Downloading player config')
|
||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
||||
|
||||
sources = self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
|
||||
hash = self._search_regex(
|
||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?) - EPORNER', webpage, 'title')
|
||||
|
||||
# Reverse engineered from vjs.js
|
||||
def calc_hash(s):
|
||||
return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.eporner.com/xhr/video/%s' % video_id,
|
||||
display_id, note='Downloading video JSON',
|
||||
query={
|
||||
'hash': calc_hash(hash),
|
||||
'device': 'generic',
|
||||
'domain': 'www.eporner.com',
|
||||
'fallback': 'false',
|
||||
})
|
||||
|
||||
if video.get('available') is False:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, video['message']), expected=True)
|
||||
|
||||
sources = video['sources']
|
||||
|
||||
formats = []
|
||||
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
for kind, formats_dict in sources.items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=kind, fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', format_id, 'height', default=None))
|
||||
fps = int_or_none(self._search_regex(
|
||||
r'(\d+)fps', format_id, 'fps', default=None))
|
||||
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(\d+)', format_id)
|
||||
if m:
|
||||
fmt['height'] = int(m.group(1))
|
||||
formats.append(fmt)
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
@@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
'info_dict': {
|
||||
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
||||
'ext': 'mp4',
|
||||
@@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
|
||||
'description': None,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
||||
'url': 'http://espn.go.com/video/clip?id=2743663',
|
||||
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
|
||||
'info_dict': {
|
||||
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must-See Moments: Best of the MLS season',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
|
@@ -20,7 +20,11 @@ from .adobetv import (
|
||||
AdobeTVVideoIE,
|
||||
)
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aenetworks import AENetworksIE
|
||||
from .aenetworks import (
|
||||
AENetworksIE,
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
@@ -40,10 +44,10 @@ from .appletrailers import (
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
SportschauIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTvIE,
|
||||
@@ -56,6 +60,7 @@ from .arte import (
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
@@ -69,6 +74,8 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCCoUkArticleIE,
|
||||
BBCCoUkIPlayerPlaylistIE,
|
||||
BBCCoUkPlaylistIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
@@ -106,6 +113,10 @@ from .camwithher import CamWithHerIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
)
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
@@ -129,10 +140,11 @@ from .chirbit import (
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
@@ -143,8 +155,13 @@ from .cnn import (
|
||||
CNNBlogsIE,
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralIE,
|
||||
ComedyCentralTVIE,
|
||||
ToshIE,
|
||||
)
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
@@ -159,6 +176,8 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymail import DailyMailIE
|
||||
@@ -231,6 +250,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
@@ -241,6 +261,7 @@ from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .flipagram import FlipagramIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
@@ -266,6 +287,7 @@ from .freespeech import FreespeechIE
|
||||
from .freevideo import FreeVideoIE
|
||||
from .funimation import FunimationIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import (
|
||||
@@ -275,7 +297,6 @@ from .gameone import (
|
||||
from .gamersyde import GamersydeIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
@@ -288,6 +309,7 @@ from .globo import (
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .goldenmoustache import GoldenMoustacheIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
@@ -310,6 +332,10 @@ from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import HotStarIE
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .hrti import (
|
||||
HRTiIE,
|
||||
HRTiPlaylistIE,
|
||||
)
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
@@ -348,6 +374,7 @@ from .jove import JoveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kamcord import KamcordIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
@@ -371,6 +398,10 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
LcpIE,
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
@@ -379,6 +410,7 @@ from .leeco import (
|
||||
LePlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .libraryofcongress import LibraryOfCongressIE
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import (
|
||||
LifeNewsIE,
|
||||
@@ -411,6 +443,7 @@ from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .meta import METAIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
@@ -443,10 +476,10 @@ from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
@@ -469,7 +502,6 @@ from .nbc import (
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
MSNBCIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
@@ -497,7 +529,6 @@ from .nextmedia import (
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyIE,
|
||||
)
|
||||
from .nextmovie import NextMovieIE
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import (
|
||||
@@ -506,9 +537,15 @@ from .nhl import (
|
||||
NHLVideocenterCategoryIE,
|
||||
NHLIE,
|
||||
)
|
||||
from .nick import NickIE
|
||||
from .nick import (
|
||||
NickIE,
|
||||
NickDeIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
@@ -553,8 +590,13 @@ from .nytimes import (
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .onet import (
|
||||
OnetIE,
|
||||
OnetChannelIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
OoyalaIE,
|
||||
@@ -593,6 +635,7 @@ from .pluralsight import (
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .polskieradio import PolskieRadioIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
@@ -616,7 +659,14 @@ from .qqmusic import (
|
||||
QQMusicToplistIE,
|
||||
QQMusicPlaylistIE,
|
||||
)
|
||||
from .r7 import R7IE
|
||||
from .r7 import (
|
||||
R7IE,
|
||||
R7ArticleIE,
|
||||
)
|
||||
from .radiocanada import (
|
||||
RadioCanadaIE,
|
||||
RadioCanadaAudioVideoIE,
|
||||
)
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
@@ -630,11 +680,17 @@ from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .revision3 import Revision3IE
|
||||
from .revision3 import (
|
||||
Revision3EmbedIE,
|
||||
Revision3IE,
|
||||
)
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .roosterteeth import RoosterTeethIE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
@@ -643,8 +699,9 @@ from .rtlnl import RtlNlIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
from .rutube import (
|
||||
@@ -670,6 +727,7 @@ from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenjunkies import ScreenJunkiesIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .seeker import SeekerIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
@@ -678,10 +736,12 @@ from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
from .skynewsarabia import (
|
||||
SkyNewsArabiaIE,
|
||||
SkyNewsArabiaArticleIE,
|
||||
)
|
||||
from .skysports import SkySportsIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
@@ -722,6 +782,7 @@ from .sportbox import (
|
||||
SportBoxEmbedIE,
|
||||
)
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .sportschau import SportschauIE
|
||||
from .srgssr import (
|
||||
SRGSSRIE,
|
||||
SRGSSRPlayIE,
|
||||
@@ -730,6 +791,7 @@ from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
@@ -762,6 +824,7 @@ from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .theintercept import TheInterceptIE
|
||||
@@ -827,7 +890,10 @@ from .tvc import (
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvp import (
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
)
|
||||
from .tvplay import TVPlayIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
@@ -843,6 +909,7 @@ from .twitch import (
|
||||
TwitchProfileIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchStreamIE,
|
||||
TwitchClipsIE,
|
||||
)
|
||||
from .twitter import (
|
||||
TwitterCardIE,
|
||||
@@ -857,6 +924,7 @@ from .udn import UDNEmbedIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usatoday import USATodayIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import (
|
||||
@@ -883,6 +951,7 @@ from .vice import (
|
||||
ViceIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
@@ -894,6 +963,7 @@ from .videomore import (
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .vidio import VidioIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
@@ -930,6 +1000,7 @@ from .viki import (
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vodlocker import VodlockerIE
|
||||
@@ -939,26 +1010,29 @@ from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
WashingtonPostArticleIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
)
|
||||
from .weibo import WeiboIE
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wrzuta import WrzutaIE
|
||||
from .wrzuta import (
|
||||
WrzutaIE,
|
||||
WrzutaPlaylistIE,
|
||||
)
|
||||
from .wsj import WSJIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
@@ -994,7 +1068,10 @@ from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
from .youku import (
|
||||
YoukuIE,
|
||||
YoukuShowIE,
|
||||
)
|
||||
from .youporn import YouPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
@@ -1009,6 +1086,7 @@ from .youtube import (
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeSharedVideoIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
|
64
youtube_dl/extractor/eyedotv.py
Normal file
64
youtube_dl/extractor/eyedotv.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EyedoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
|
||||
'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
|
||||
'info_dict': {
|
||||
'id': '16301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Journée du conseil scientifique de l\'Afnic 2015',
|
||||
'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
|
||||
'uploader': 'Afnic Live',
|
||||
'uploader_id': '8023',
|
||||
}
|
||||
}
|
||||
_ROOT_URL = 'http://live.eyedo.net:1935/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
|
||||
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
|
||||
|
||||
title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
|
||||
state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
|
||||
if state_live_code == 'avenir':
|
||||
raise ExtractorError(
|
||||
'%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
|
||||
expected=True)
|
||||
|
||||
is_live = state_live_code == 'live'
|
||||
m3u8_url = None
|
||||
# http://eyedo.tv/Content/Html5/Scripts/html5view.js
|
||||
if is_live:
|
||||
if xpath_text(video_data, 'Cdn') == 'true':
|
||||
m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
|
||||
'description': xpath_text(video_data, _add_ns('Description')),
|
||||
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
||||
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
||||
'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
|
||||
'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
|
||||
'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
|
||||
}
|
@@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
(?:\w+\.)?facebook\.com/
|
||||
(?:[\w-]+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
@@ -127,8 +127,26 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
# Facebook API embed
|
||||
# see https://developers.facebook.com/docs/plugins/embedded-video-player
|
||||
mobj = re.search(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
|
||||
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
def _login(self):
|
||||
(useremail, password) = self._get_login_info()
|
||||
if useremail is None:
|
||||
@@ -204,12 +222,25 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
|
||||
if m:
|
||||
swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
|
||||
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
|
||||
|
||||
for m in re.findall(PATTERN, webpage):
|
||||
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
|
||||
data = dict(json.loads(swf_params))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data = json.loads(params_raw)['video_data']
|
||||
video_data_candidate = json.loads(params_raw)['video_data']
|
||||
for _, f in video_data_candidate.items():
|
||||
if not f:
|
||||
continue
|
||||
if isinstance(f, dict):
|
||||
f = [f]
|
||||
if not isinstance(f, list):
|
||||
continue
|
||||
if f[0].get('video_id') == video_id:
|
||||
video_data = video_data_candidate
|
||||
break
|
||||
if video_data:
|
||||
break
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
@@ -239,6 +270,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, f in video_data.items():
|
||||
if f and isinstance(f, dict):
|
||||
f = [f]
|
||||
if not f or not isinstance(f, list):
|
||||
continue
|
||||
for quality in ('sd', 'hd'):
|
||||
|
115
youtube_dl/extractor/flipagram.py
Normal file
115
youtube_dl/extractor/flipagram.py
Normal file
@@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FlipagramIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||
'md5': '888dcf08b7ea671381f00fab74692755',
|
||||
'info_dict': {
|
||||
'id': 'nyvTSJMKId',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
|
||||
'duration': 35.571,
|
||||
'timestamp': 1461244995,
|
||||
'upload_date': '20160421',
|
||||
'uploader': 'kitty juria',
|
||||
'uploader_id': 'sjuria101',
|
||||
'creator': 'kitty juria',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
|
||||
video_id)
|
||||
|
||||
flipagram = video_data['flipagram']
|
||||
video = flipagram['video']
|
||||
|
||||
json_ld = self._search_json_ld(webpage, video_id, default=False)
|
||||
title = json_ld.get('title') or flipagram['captionText']
|
||||
description = json_ld.get('description') or flipagram.get('captionText')
|
||||
|
||||
formats = [{
|
||||
'url': video['url'],
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': int_or_none(video_data.get('size')),
|
||||
}]
|
||||
|
||||
preview_url = try_get(
|
||||
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
|
||||
if preview_url:
|
||||
formats.append({
|
||||
'url': preview_url,
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
counts = flipagram.get('counts', {})
|
||||
user = flipagram.get('user', {})
|
||||
video_data = flipagram.get('video', {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(cover['url']),
|
||||
'width': int_or_none(cover.get('width')),
|
||||
'height': int_or_none(cover.get('height')),
|
||||
'filesize': int_or_none(cover.get('size')),
|
||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||
|
||||
# Note that this only retrieves comments that are initally loaded.
|
||||
# For videos with large amounts of comments, most won't be retrieved.
|
||||
comments = []
|
||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||
text = comment.get('comment')
|
||||
if not text or not isinstance(text, list):
|
||||
continue
|
||||
comments.append({
|
||||
'author': comment.get('user', {}).get('name'),
|
||||
'author_id': comment.get('user', {}).get('username'),
|
||||
'id': comment.get('id'),
|
||||
'text': text[0],
|
||||
'timestamp': unified_timestamp(comment.get('created')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': float_or_none(flipagram.get('duration'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
|
||||
'uploader': user.get('name'),
|
||||
'uploader_id': user.get('username'),
|
||||
'creator': user.get('name'),
|
||||
'view_count': int_or_none(counts.get('plays')),
|
||||
'like_count': int_or_none(counts.get('likes')),
|
||||
'repost_count': int_or_none(counts.get('reflips')),
|
||||
'comment_count': int_or_none(counts.get('comments')),
|
||||
'comments': comments,
|
||||
'formats': formats,
|
||||
}
|
@@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor):
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
@@ -9,11 +12,15 @@ class FoxSportsIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/video?vid=432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
'info_dict': {
|
||||
'id': 'gA0bHB3Ladz3',
|
||||
'ext': 'flv',
|
||||
'id': 'i0qKWsk3qJaM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
'upload_date': '20150423',
|
||||
'timestamp': 1429761109,
|
||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
@@ -28,5 +35,8 @@ class FoxSportsIE(InfoExtractor):
|
||||
r"data-player-config='([^']+)'", webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True}))
|
||||
return self.url_result(smuggle_url(update_url_query(
|
||||
config['releaseURL'], {
|
||||
'mbr': 'true',
|
||||
'switch': 'http',
|
||||
}), {'force_smil_url': True}))
|
||||
|
@@ -14,7 +14,10 @@ from ..utils import (
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
)
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@@ -188,6 +191,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Dailymotion embed
|
||||
'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
|
||||
'md5': 'ee7f1828f25a648addc90cb2687b1f12',
|
||||
'info_dict': {
|
||||
'id': 'x4iiko0',
|
||||
'ext': 'mp4',
|
||||
'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
|
||||
'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
|
||||
'timestamp': 1467011958,
|
||||
'upload_date': '20160627',
|
||||
'uploader': 'France Inter',
|
||||
'uploader_id': 'x2q2ez',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -197,7 +215,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key())
|
||||
|
||||
dailymotion_urls = DailymotionIE._extract_urls(webpage)
|
||||
if dailymotion_urls:
|
||||
return self.playlist_result([
|
||||
self.url_result(dailymotion_url, DailymotionIE.ie_key())
|
||||
for dailymotion_url in dailymotion_urls])
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
(r'id-video=([^@]+@[^"]+)',
|
||||
|
35
youtube_dl/extractor/fusion.py
Normal file
35
youtube_dl/extractor/fusion.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class FusionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'info_dict': {
|
||||
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
|
||||
'ext': 'mp4',
|
||||
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
|
||||
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
|
||||
'duration': 140.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://fusion.net/video/201781',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-video-id=(["\'])(?P<code>.+?)\1',
|
||||
webpage, 'ooyala code', group='code')
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_code)
|
@@ -1,19 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
dict_get,
|
||||
)
|
||||
|
||||
|
||||
class GameSpotIE(InfoExtractor):
|
||||
class GameSpotIE(OnceIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
@@ -28,10 +28,13 @@ class GameSpotIE(InfoExtractor):
|
||||
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
||||
'info_dict': {
|
||||
'id': 'gs-2300-6424837',
|
||||
'ext': 'flv',
|
||||
'title': 'The Witcher 3: Wild Hunt [Xbox ONE] - Now Playing',
|
||||
'ext': 'mp4',
|
||||
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
||||
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -39,29 +42,73 @@ class GameSpotIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
data_video_json = self._search_regex(
|
||||
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
||||
data_video = json.loads(unescapeHTML(data_video_json))
|
||||
data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
|
||||
streams = data_video['videoStreams']
|
||||
|
||||
manifest_url = None
|
||||
formats = []
|
||||
f4m_url = streams.get('f4m_stream')
|
||||
if f4m_url is not None:
|
||||
# Transform the manifest url to a link to the mp4 files
|
||||
# they are used in mobile devices.
|
||||
f4m_path = compat_urlparse.urlparse(f4m_url).path
|
||||
if f4m_url:
|
||||
manifest_url = f4m_url
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
|
||||
m3u8_url = streams.get('m3u8_stream')
|
||||
if m3u8_url:
|
||||
manifest_url = m3u8_url
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
progressive_url = dict_get(
|
||||
streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
|
||||
if progressive_url and manifest_url:
|
||||
qualities_basename = self._search_regex(
|
||||
'/([^/]+)\.csmil/',
|
||||
manifest_url, 'qualities basename', default=None)
|
||||
if qualities_basename:
|
||||
QUALITIES_RE = r'((,\d+)+,?)'
|
||||
qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',')
|
||||
http_path = f4m_path[1:].split('/', 1)[1]
|
||||
http_template = re.sub(QUALITIES_RE, r'%s', http_path)
|
||||
http_template = http_template.replace('.csmil/manifest.f4m', '')
|
||||
http_template = compat_urlparse.urljoin(
|
||||
'http://video.gamespotcdn.com/', http_template)
|
||||
qualities = self._search_regex(
|
||||
QUALITIES_RE, qualities_basename,
|
||||
'qualities', default=None)
|
||||
if qualities:
|
||||
qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
|
||||
qualities.sort()
|
||||
http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
|
||||
http_url_basename = url_basename(progressive_url)
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': progressive_url.replace(
|
||||
http_url_basename, http_template % q),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
'url': http_template % q,
|
||||
'url': progressive_url.replace(
|
||||
http_url_basename, http_template % q),
|
||||
'ext': 'mp4',
|
||||
'format_id': q,
|
||||
'format_id': 'http-%d' % q,
|
||||
'tbr': q,
|
||||
})
|
||||
else:
|
||||
|
||||
onceux_json = self._search_regex(
|
||||
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
|
||||
if onceux_json:
|
||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
||||
if onceux_url:
|
||||
formats.extend(self._extract_once_formats(re.sub(
|
||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
|
||||
|
||||
if not formats:
|
||||
for quality in ['sd', 'hd']:
|
||||
# It's actually a link to a flv file
|
||||
flv_url = streams.get('f4m_{0}'.format(quality))
|
||||
@@ -71,6 +118,7 @@ class GameSpotIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'format_id': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': data_video['guid'],
|
||||
|
@@ -1,62 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class GametrailersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
|
||||
'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a',
|
||||
'info_dict': {
|
||||
'id': '2983958',
|
||||
'ext': 'mp4',
|
||||
'display_id': '116437-Just-Cause-3-Review',
|
||||
'title': 'Just Cause 3 - Review',
|
||||
'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)\|', webpage, 'title').strip()
|
||||
embed_url = self._proto_relative_url(
|
||||
self._search_regex(
|
||||
r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage,
|
||||
'embed url'),
|
||||
scheme='http:')
|
||||
video_id = url_basename(embed_url)
|
||||
embed_page = self._download_webpage(embed_url, video_id)
|
||||
embed_vars_json = self._search_regex(
|
||||
r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page,
|
||||
'embed vars')
|
||||
info = self._parse_json(embed_vars_json, video_id)
|
||||
|
||||
formats = []
|
||||
for media in info['media']:
|
||||
if media['mediaPurpose'] == 'play':
|
||||
formats.append({
|
||||
'url': media['uri'],
|
||||
'height': media['height'],
|
||||
'width:': media['width'],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('thumbUri'),
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': int_or_none(info.get('videoLengthInSeconds')),
|
||||
'age_limit': parse_age_limit(info.get('audienceRating')),
|
||||
}
|
@@ -49,7 +49,10 @@ from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .viewlift import ViewLiftEmbedIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
@@ -59,9 +62,15 @@ from .videomore import VideomoreIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .arkena import ArkenaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .vessel import VesselIE
|
||||
from .kaltura import KalturaIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .facebook import FacebookIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -465,7 +474,7 @@ class GenericIE(InfoExtractor):
|
||||
'url': 'http://www.vestifinance.ru/articles/25753',
|
||||
'info_dict': {
|
||||
'id': '25753',
|
||||
'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
|
||||
'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
@@ -625,13 +634,15 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
# MTVSercices embed
|
||||
{
|
||||
'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
|
||||
'md5': '35727f82f58c76d996fc188f9755b0d5',
|
||||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||
'info_dict': {
|
||||
'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
|
||||
'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Review',
|
||||
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
||||
'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
|
||||
'description': 'Two valets share their love for movie star Liam Neesons.',
|
||||
'timestamp': 1349922600,
|
||||
'upload_date': '20121011',
|
||||
},
|
||||
},
|
||||
# YouTube embed via <data-embed-url="">
|
||||
@@ -783,6 +794,19 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||
}
|
||||
},
|
||||
# Another Livestream embed, without 'new.' in URL
|
||||
{
|
||||
'url': 'https://www.freespeech.org/',
|
||||
'info_dict': {
|
||||
'id': '123537347',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# Live stream
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# LazyYT
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
@@ -835,6 +859,7 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
},
|
||||
'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
|
||||
},
|
||||
# jwplayer YouTube
|
||||
{
|
||||
@@ -867,18 +892,6 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||
}
|
||||
},
|
||||
# Kaltura embed
|
||||
{
|
||||
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
|
||||
'info_dict': {
|
||||
'id': '1_eergr3h1',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150226',
|
||||
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
|
||||
'timestamp': int,
|
||||
'title': 'John Carlson Postgame 2/25/15',
|
||||
},
|
||||
},
|
||||
# Kaltura embed (different embed code)
|
||||
{
|
||||
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
|
||||
@@ -904,6 +917,37 @@ class GenericIE(InfoExtractor):
|
||||
'uploader_id': 'echojecka',
|
||||
},
|
||||
},
|
||||
# Kaltura embed with single quotes
|
||||
{
|
||||
'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
|
||||
'info_dict': {
|
||||
'id': '0_izeg5utt',
|
||||
'ext': 'mp4',
|
||||
'title': '35871',
|
||||
'timestamp': 1355743100,
|
||||
'upload_date': '20121217',
|
||||
'uploader_id': 'batchUser',
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
{
|
||||
# Kaltura embedded via quoted entry_id
|
||||
'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
|
||||
'info_dict': {
|
||||
'id': '0_utuok90b',
|
||||
'ext': 'mp4',
|
||||
'title': '06_matthew_brender_raj_dutt',
|
||||
'timestamp': 1466638791,
|
||||
'upload_date': '20160622',
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
'expected_warnings': [
|
||||
'Could not send HEAD request'
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@@ -1016,16 +1060,31 @@ class GenericIE(InfoExtractor):
|
||||
'timestamp': 1389118457,
|
||||
},
|
||||
},
|
||||
# NBC News embed
|
||||
{
|
||||
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
|
||||
'md5': '1aa589c675898ae6d37a17913cf68d66',
|
||||
'info_dict': {
|
||||
'id': '701714499682',
|
||||
'ext': 'mp4',
|
||||
'title': 'PREVIEW: On Assignment: David Letterman',
|
||||
'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
|
||||
},
|
||||
},
|
||||
# UDN embed
|
||||
{
|
||||
'url': 'http://www.udn.com/news/story/7314/822787',
|
||||
'url': 'https://video.udn.com/news/300346',
|
||||
'md5': 'fd2060e988c326991037b9aff9df21a6',
|
||||
'info_dict': {
|
||||
'id': '300346',
|
||||
'ext': 'mp4',
|
||||
'title': '中一中男師變性 全校師生力挺',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Ooyala embed
|
||||
{
|
||||
@@ -1042,20 +1101,6 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Contains a SMIL manifest
|
||||
{
|
||||
'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
|
||||
'info_dict': {
|
||||
'id': 'file',
|
||||
'ext': 'flv',
|
||||
'title': '+ Football: Lottery Champions League Europe',
|
||||
'uploader': 'www.telewebion.com',
|
||||
},
|
||||
'params': {
|
||||
# rtmpe downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Brightcove URL in single quotes
|
||||
{
|
||||
'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
|
||||
@@ -1074,12 +1119,17 @@ class GenericIE(InfoExtractor):
|
||||
# Dailymotion Cloud video
|
||||
{
|
||||
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
|
||||
'md5': '49444254273501a64675a7e68c502681',
|
||||
'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
|
||||
'info_dict': {
|
||||
'id': '5585de919473990de4bee11b',
|
||||
'id': 'x2uy8t3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le débat',
|
||||
'title': 'Sauvons les abeilles ! - Le débat',
|
||||
'description': 'md5:d9082128b1c5277987825d684939ca26',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
'timestamp': 1434970506,
|
||||
'upload_date': '20150622',
|
||||
'uploader': 'Public Sénat',
|
||||
'uploader_id': 'xa9gza',
|
||||
}
|
||||
},
|
||||
# OnionStudios embed
|
||||
@@ -1193,6 +1243,143 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'Lake8737',
|
||||
}
|
||||
},
|
||||
# Duplicated embedded video URLs
|
||||
{
|
||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||
'info_dict': {
|
||||
'id': '149298443_480_16c25b74_2',
|
||||
'ext': 'mp4',
|
||||
'title': 'vs. Blue Orange Spring Game',
|
||||
'uploader': 'www.hudl.com',
|
||||
},
|
||||
},
|
||||
# twitter:player:stream embed
|
||||
{
|
||||
'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
|
||||
'info_dict': {
|
||||
'id': 'master',
|
||||
'ext': 'mp4',
|
||||
'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
|
||||
'uploader': 'www.rtl.be',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# twitter:player embed
|
||||
{
|
||||
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
|
||||
'md5': 'a3e0df96369831de324f0778e126653c',
|
||||
'info_dict': {
|
||||
'id': '4909620399001',
|
||||
'ext': 'mp4',
|
||||
'title': 'What Do Black Holes Sound Like?',
|
||||
'description': 'what do black holes sound like',
|
||||
'upload_date': '20160524',
|
||||
'uploader_id': '29913724001',
|
||||
'timestamp': 1464107587,
|
||||
'uploader': 'TheAtlantic',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
},
|
||||
# Facebook <iframe> embed
|
||||
{
|
||||
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
|
||||
'md5': 'fbcde74f534176ecb015849146dd3aee',
|
||||
'info_dict': {
|
||||
'id': '599637780109885',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #599637780109885',
|
||||
},
|
||||
},
|
||||
# Facebook API embed
|
||||
{
|
||||
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
|
||||
'md5': 'a47372ee61b39a7b90287094d447d94e',
|
||||
'info_dict': {
|
||||
'id': '10153467542406923',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #10153467542406923',
|
||||
},
|
||||
},
|
||||
# Wordpress "YouTube Video Importer" plugin
|
||||
{
|
||||
'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
|
||||
'md5': 'd16797741b560b485194eddda8121b48',
|
||||
'info_dict': {
|
||||
'id': 'HNTXWDXV9Is',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Devils Drumline Stanford lot 2016',
|
||||
'upload_date': '20160627',
|
||||
'uploader_id': 'GENOCIDE8GENERAL10',
|
||||
'uploader': 'cylus cyrus',
|
||||
},
|
||||
},
|
||||
{
|
||||
# video stored on custom kaltura server
|
||||
'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
|
||||
'md5': '537617d06e64dfed891fa1593c4b30cc',
|
||||
'info_dict': {
|
||||
'id': '0_1iotm5bh',
|
||||
'ext': 'mp4',
|
||||
'title': 'Elecciones británicas: 5 lecciones para Rajoy',
|
||||
'description': 'md5:435a89d68b9760b92ce67ed227055f16',
|
||||
'uploader_id': 'videos.expansion@el-mundo.net',
|
||||
'upload_date': '20150429',
|
||||
'timestamp': 1430303472,
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
{
|
||||
# Non-standard Vimeo embed
|
||||
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
||||
'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
|
||||
'info_dict': {
|
||||
'id': '148867247',
|
||||
'ext': 'mp4',
|
||||
'title': 'Understanding the web - Teaser',
|
||||
'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
|
||||
'upload_date': '20151214',
|
||||
'uploader': 'OpenClassrooms',
|
||||
'uploader_id': 'openclassrooms',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
},
|
||||
{
|
||||
'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [ArkenaIE.ie_key()],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
# 'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||
# 'md5': '888dcf08b7ea671381f00fab74692755',
|
||||
# 'info_dict': {
|
||||
# 'id': 'nyvTSJMKId',
|
||||
# 'ext': 'mp4',
|
||||
# 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||
# 'description': '#love for cats.',
|
||||
# 'timestamp': 1461244995,
|
||||
# 'upload_date': '20160421',
|
||||
# },
|
||||
# 'params': {
|
||||
# 'force_generic_extractor': True,
|
||||
# },
|
||||
# }
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -1499,6 +1686,16 @@ class GenericIE(InfoExtractor):
|
||||
if bc_urls:
|
||||
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
||||
|
||||
# Look for ThePlatform embeds
|
||||
tp_urls = ThePlatformIE._extract_urls(webpage)
|
||||
if tp_urls:
|
||||
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
||||
|
||||
# Look for Vessel embeds
|
||||
vessel_urls = VesselIE._extract_urls(webpage)
|
||||
if vessel_urls:
|
||||
return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||
@@ -1539,12 +1736,16 @@ class GenericIE(InfoExtractor):
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
# Look for Wordpress "YouTube Video Importer" plugin
|
||||
matches = re.findall(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
return _playlist_from_matches(matches, lambda m: m[-1])
|
||||
|
||||
matches = DailymotionIE._extract_urls(webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches)
|
||||
|
||||
# Look for embedded Dailymotion playlist player (#3822)
|
||||
m = re.search(
|
||||
@@ -1681,10 +1882,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded Facebook player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Facebook')
|
||||
facebook_url = FacebookIE._extract_url(webpage)
|
||||
if facebook_url is not None:
|
||||
return self.url_result(facebook_url, 'Facebook')
|
||||
|
||||
# Look for embedded VK player
|
||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||
@@ -1806,14 +2006,6 @@ class GenericIE(InfoExtractor):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url)
|
||||
|
||||
# Look for embedded vulture.com player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='Vulture')
|
||||
|
||||
# Look for embedded mtvservices player
|
||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||
if mtvservices_url:
|
||||
@@ -1862,7 +2054,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
|
||||
r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Livestream')
|
||||
@@ -1874,18 +2066,14 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result(smuggle_url(
|
||||
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
|
||||
{'source_url': url}), 'Kaltura')
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
|
||||
|
||||
# Look for Eagle.Platform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'EaglePlatform')
|
||||
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
||||
if eagleplatform_url:
|
||||
return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
|
||||
|
||||
# Look for ClipYou (uses Eagle.Platform) embeds
|
||||
mobj = re.search(
|
||||
@@ -1926,6 +2114,12 @@ class GenericIE(InfoExtractor):
|
||||
if nbc_sports_url:
|
||||
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||
|
||||
# Look for NBC News embeds
|
||||
nbc_news_embed_url = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
|
||||
if nbc_news_embed_url:
|
||||
return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
|
||||
|
||||
# Look for Google Drive embeds
|
||||
google_drive_url = GoogleDriveIE._extract_url(webpage)
|
||||
if google_drive_url:
|
||||
@@ -1973,6 +2167,11 @@ class GenericIE(InfoExtractor):
|
||||
if digiteka_url:
|
||||
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
||||
|
||||
# Look for Arkena embeds
|
||||
arkena_url = ArkenaIE._extract_url(webpage)
|
||||
if arkena_url:
|
||||
return self.url_result(arkena_url, ArkenaIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
@@ -2025,6 +2224,19 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default=None, expected_type='VideoObject')
|
||||
if json_ld and json_ld.get('url'):
|
||||
info_dict.update({
|
||||
'title': video_title or info_dict['title'],
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': age_limit
|
||||
})
|
||||
info_dict.update(json_ld)
|
||||
return info_dict
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
@@ -2068,6 +2280,9 @@ class GenericIE(InfoExtractor):
|
||||
r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
|
||||
if not found:
|
||||
# Try to find twitter cards info
|
||||
# twitter:player:stream should be checked before twitter:player since
|
||||
# it is expected to contain a raw stream (see
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||
found = filter_video(re.findall(
|
||||
r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
|
||||
if not found:
|
||||
@@ -2101,11 +2316,20 @@ class GenericIE(InfoExtractor):
|
||||
'_type': 'url',
|
||||
'url': new_url,
|
||||
}
|
||||
|
||||
if not found:
|
||||
# twitter:player is a https URL to iframe player that may or may not
|
||||
# be supported by youtube-dl thus this is checked the very last (see
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url:
|
||||
return self.url_result(embed_url)
|
||||
|
||||
if not found:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
entries = []
|
||||
for video_url in found:
|
||||
for video_url in orderedSet(found):
|
||||
video_url = unescapeHTML(video_url)
|
||||
video_url = video_url.replace('\\/', '/')
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
66
youtube_dl/extractor/godtv.py
Normal file
66
youtube_dl/extractor/godtv.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class GodTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
|
||||
'info_dict': {
|
||||
'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Randy Needham',
|
||||
'duration': 3615.08,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://god.tv/playlist/bible-study',
|
||||
'info_dict': {
|
||||
'id': 'bible-study',
|
||||
},
|
||||
'playlist_mincount': 37,
|
||||
}, {
|
||||
'url': 'http://god.tv/node/15097',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://god.tv/live/africa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://god.tv/liveevents',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
settings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'settings', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
ooyala_id = None
|
||||
|
||||
if settings:
|
||||
playlist = settings.get('playlist')
|
||||
if playlist and isinstance(playlist, list):
|
||||
entries = [
|
||||
OoyalaIE._build_url_result(video['content_id'])
|
||||
for video in playlist if video.get('content_id')]
|
||||
if entries:
|
||||
return self.playlist_result(entries, display_id)
|
||||
ooyala_id = settings.get('ooyala', {}).get('content_id')
|
||||
|
||||
if not ooyala_id:
|
||||
ooyala_id = self._search_regex(
|
||||
r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
|
||||
webpage, 'ooyala id', group='id')
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_id)
|
@@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
|
||||
'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '42428ce8a00585f9bc36e49226eae7a1',
|
||||
'info_dict': {
|
||||
'id': 'fk6OhWpXgIQ',
|
||||
'ext': 'mp4',
|
||||
@@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
|
||||
'uploader_id': 'groupon',
|
||||
'uploader': 'Groupon',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
_PROVIDERS = {
|
||||
|
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||
'md5': '8b743df908c42f60cf6496586c7f12c3',
|
||||
'md5': '7d45932269a288149483144f01b99789',
|
||||
'info_dict': {
|
||||
'id': '390161',
|
||||
'ext': 'mp4',
|
||||
@@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
|
||||
'duration': 56.823,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
202
youtube_dl/extractor/hrti.py
Normal file
202
youtube_dl/extractor/hrti.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class HRTiBaseIE(InfoExtractor):
|
||||
"""
|
||||
Base Information Extractor for Croatian Radiotelevision
|
||||
video on demand site https://hrti.hrt.hr
|
||||
Reverse engineered from the JavaScript app in app.min.js
|
||||
"""
|
||||
_NETRC_MACHINE = 'hrti'
|
||||
|
||||
_APP_LANGUAGE = 'hr'
|
||||
_APP_VERSION = '1.1'
|
||||
_APP_PUBLICATION_ID = 'all_in_one'
|
||||
_API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json'
|
||||
|
||||
def _initialize_api(self):
|
||||
init_data = {
|
||||
'application_publication_id': self._APP_PUBLICATION_ID
|
||||
}
|
||||
|
||||
uuid = self._download_json(
|
||||
self._API_URL, None, note='Downloading uuid',
|
||||
errnote='Unable to download uuid',
|
||||
data=json.dumps(init_data).encode('utf-8'))['uuid']
|
||||
|
||||
app_data = {
|
||||
'uuid': uuid,
|
||||
'application_publication_id': self._APP_PUBLICATION_ID,
|
||||
'application_version': self._APP_VERSION
|
||||
}
|
||||
|
||||
req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
|
||||
req.get_method = lambda: 'PUT'
|
||||
|
||||
resources = self._download_json(
|
||||
req, None, note='Downloading session information',
|
||||
errnote='Unable to download session information')
|
||||
|
||||
self._session_id = resources['session_id']
|
||||
|
||||
modules = resources['modules']
|
||||
|
||||
self._search_url = modules['vod_catalog']['resources']['search']['uri'].format(
|
||||
language=self._APP_LANGUAGE,
|
||||
application_id=self._APP_PUBLICATION_ID)
|
||||
|
||||
self._login_url = (modules['user']['resources']['login']['uri'] +
|
||||
'/format/json').format(session_id=self._session_id)
|
||||
|
||||
self._logout_url = modules['user']['resources']['logout']['uri']
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
# TODO: figure out authentication with cookies
|
||||
if username is None or password is None:
|
||||
self.raise_login_required()
|
||||
|
||||
auth_data = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
try:
|
||||
auth_info = self._download_json(
|
||||
self._login_url, None, note='Logging in', errnote='Unable to log in',
|
||||
data=json.dumps(auth_data).encode('utf-8'))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
|
||||
auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
|
||||
else:
|
||||
raise
|
||||
|
||||
error_message = auth_info.get('error', {}).get('message')
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error_message),
|
||||
expected=True)
|
||||
|
||||
self._token = auth_info['secure_streaming_token']
|
||||
|
||||
def _real_initialize(self):
|
||||
self._initialize_api()
|
||||
self._login()
|
||||
|
||||
|
||||
class HRTiIE(HRTiBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
hrti:(?P<short_id>[0-9]+)|
|
||||
https?://
|
||||
hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd',
|
||||
'info_dict': {
|
||||
'id': '2181385',
|
||||
'display_id': 'republika-dokumentarna-serija-16-hd',
|
||||
'ext': 'mp4',
|
||||
'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)',
|
||||
'description': 'md5:48af85f620e8e0e1df4096270568544f',
|
||||
'duration': 2922,
|
||||
'view_count': int,
|
||||
'average_rating': int,
|
||||
'episode_number': int,
|
||||
'season_number': int,
|
||||
'age_limit': 12,
|
||||
},
|
||||
'skip': 'Requires account credentials',
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/#/video/show/2181385/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'hrti:2181385',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('short_id') or mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
video = self._download_json(
|
||||
'%s/video_id/%s/format/json' % (self._search_url, video_id),
|
||||
display_id, 'Downloading video metadata JSON')['video'][0]
|
||||
|
||||
title_info = video['title']
|
||||
title = title_info['title_long']
|
||||
|
||||
movie = video['video_assets']['movie'][0]
|
||||
m3u8_url = movie['url'].format(TOKEN=self._token)
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = clean_html(title_info.get('summary_long'))
|
||||
age_limit = parse_age_limit(video.get('parental_control', {}).get('rating'))
|
||||
view_count = int_or_none(video.get('views'))
|
||||
average_rating = int_or_none(video.get('user_rating'))
|
||||
duration = int_or_none(movie.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class HRTiPlaylistIE(HRTiBaseIE):
|
||||
_VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
|
||||
'info_dict': {
|
||||
'id': '212',
|
||||
'title': 'ekumena',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
'skip': 'Requires account credentials',
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
category_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or category_id
|
||||
|
||||
response = self._download_json(
|
||||
'%s/category_id/%s/format/json' % (self._search_url, category_id),
|
||||
display_id, 'Downloading video metadata JSON')
|
||||
|
||||
video_ids = try_get(
|
||||
response, lambda x: x['video_listings'][0]['alternatives'][0]['list'],
|
||||
list) or [video['id'] for video in response.get('videos', []) if video.get('id')]
|
||||
|
||||
entries = [self.url_result('hrti:%s' % video_id) for video_id in video_ids]
|
||||
|
||||
return self.playlist_result(entries, category_id, display_id)
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = 'imdb'
|
||||
IE_DESC = 'Internet Movie Database trailers'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
@@ -25,6 +25,12 @@ class ImdbIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user