Compare commits
834 Commits
2015.06.15
...
2015.08.23
Author | SHA1 | Date | |
---|---|---|---|
|
11addc50ff | ||
|
e4df2f98cc | ||
|
e7c14660d3 | ||
|
90076b6172 | ||
|
28b83495d8 | ||
|
551c7837ac | ||
|
59e6acc757 | ||
|
9990c960f2 | ||
|
2006a06eff | ||
|
2b6bda1ed8 | ||
|
468083d2f5 | ||
|
483fc223bb | ||
|
66ce97024d | ||
|
8c97f81943 | ||
|
d7c1630570 | ||
|
5e1a5ac8de | ||
|
9eb4ab6ad9 | ||
|
4932a817a0 | ||
|
5d003e29b1 | ||
|
dc95bd503e | ||
|
f738dd7b7c | ||
|
f908b74fa3 | ||
|
c687ac745b | ||
|
912e0b7e46 | ||
|
03bc7237ad | ||
|
dd565ac1ad | ||
|
5cdefc4625 | ||
|
ce00af8767 | ||
|
51047444aa | ||
|
aa6cd05ed8 | ||
|
dac14bf311 | ||
|
05fe2594e4 | ||
|
26e1c3514f | ||
|
22c83245c5 | ||
|
7900aede14 | ||
|
f877c6ae5a | ||
|
ca681f7041 | ||
|
a01da8bbf8 | ||
|
f3a65d9636 | ||
|
559f4c550f | ||
|
03c635a4b5 | ||
|
34a4cd0a34 | ||
|
3b9b32f404 | ||
|
9c724a9802 | ||
|
7a6e8a1b17 | ||
|
369c12e038 | ||
|
0fa5795b85 | ||
|
c00c7c0af0 | ||
|
cbaed4bb5e | ||
|
f74a7348f6 | ||
|
8626b23e4e | ||
|
0086874277 | ||
|
7fc18d9309 | ||
|
974f1a385a | ||
|
6900b4f6f5 | ||
|
d90e4bdb74 | ||
|
276c989772 | ||
|
ea99110d24 | ||
|
221a59fe6f | ||
|
eaa5646483 | ||
|
041bc3adc5 | ||
|
e64b756943 | ||
|
201ea3ee8e | ||
|
9303ce3e69 | ||
|
06c085ab6e | ||
|
c576ef1e7c | ||
|
11bed5827d | ||
|
fab83e2456 | ||
|
1d25e9d173 | ||
|
9c21f22923 | ||
|
3aa697f993 | ||
|
8b9848ac56 | ||
|
8b8c1093b6 | ||
|
d0d6c097fc | ||
|
6be5e46994 | ||
|
45694b504a | ||
|
41dbc50f9c | ||
|
4d2ad866f3 | ||
|
3cafca04aa | ||
|
594f51b859 | ||
|
fb56131dd9 | ||
|
a34e19629c | ||
|
3c12a027d4 | ||
|
cb28e03386 | ||
|
7393746da2 | ||
|
6828c809e4 | ||
|
28479149cc | ||
|
237c03c8ea | ||
|
e73c85cb23 | ||
|
b6b2711298 | ||
|
3b7130439a | ||
|
2c919adb74 | ||
|
60231c65b9 | ||
|
f196047832 | ||
|
240ca32e57 | ||
|
fa37c26c4d | ||
|
d7dbfc7cc1 | ||
|
d9ab5262b1 | ||
|
fb124e3741 | ||
|
479bf783d2 | ||
|
f0f3a6c99d | ||
|
f57b7835e2 | ||
|
1df3186e0e | ||
|
0b7c27828d | ||
|
0a19d4ccd6 | ||
|
9f3da13860 | ||
|
bf812ef714 | ||
|
b1ac38fadc | ||
|
fb0d12c6cb | ||
|
34952f09e1 | ||
|
34a7de2970 | ||
|
0ff827419e | ||
|
b29440aee6 | ||
|
11b5605815 | ||
|
844587669e | ||
|
f6c3664d71 | ||
|
c5864a8ce6 | ||
|
27c7114af6 | ||
|
0791ac1b44 | ||
|
1de5cd3ba5 | ||
|
729accb482 | ||
|
942acef594 | ||
|
fb2f339fec | ||
|
98044462b1 | ||
|
0dcb318f62 | ||
|
f32143469f | ||
|
3a30508b94 | ||
|
e0b9d78fab | ||
|
8d6765cf48 | ||
|
12bb392a0f | ||
|
08df685fe7 | ||
|
c8d1be772d | ||
|
887e9bc7b5 | ||
|
9f2e7c2f34 | ||
|
d7bb8884af | ||
|
464e792496 | ||
|
18c3281f9e | ||
|
8e2b1be127 | ||
|
b61b7787cb | ||
|
b465083f45 | ||
|
154655a85a | ||
|
59e89e62d7 | ||
|
d5d7bdaeb5 | ||
|
b2f82948ee | ||
|
428e4e4a85 | ||
|
1e83741c9a | ||
|
621d6a9516 | ||
|
3550821fb4 | ||
|
5b0c40da24 | ||
|
e0ac521438 | ||
|
c29458f3ec | ||
|
bf94d763ba | ||
|
8a37aa1517 | ||
|
f3d24df6f2 | ||
|
fd5d8270dc | ||
|
be612d9e0c | ||
|
4a7434d0b0 | ||
|
ad2141be2d | ||
|
f94639fadf | ||
|
89faae660f | ||
|
0f422256d6 | ||
|
acc1adbe7a | ||
|
8002ac9e0a | ||
|
6d30cf04db | ||
|
430b092a5f | ||
|
3eb5fdb581 | ||
|
9663bd3abb | ||
|
5a4d9ddb21 | ||
|
3be3c622dc | ||
|
cd6b555e19 | ||
|
671302b5c0 | ||
|
4f34cdb0a8 | ||
|
bd690a9f93 | ||
|
51f267d9d4 | ||
|
47f53ad958 | ||
|
c73cdd800f | ||
|
f535ec8278 | ||
|
238755752f | ||
|
c71a3195af | ||
|
54a9328b20 | ||
|
3e48522477 | ||
|
251a44b776 | ||
|
be7a8379b4 | ||
|
defce60385 | ||
|
354b4b8604 | ||
|
5b7dab2dd6 | ||
|
8a5601e42f | ||
|
232541df44 | ||
|
a346b1ff57 | ||
|
d96d604e53 | ||
|
e704f87f86 | ||
|
8f5639afcb | ||
|
03950c90f7 | ||
|
47a8b7c14a | ||
|
2a04d2c799 | ||
|
8de922724b | ||
|
67b8a28a2f | ||
|
51a575159a | ||
|
524229a297 | ||
|
754e70cf3e | ||
|
84bc4dcb0f | ||
|
10eaa8ef1d | ||
|
c3124c3085 | ||
|
8d5b8b477e | ||
|
d7d2a9a3db | ||
|
25a4c5a9ed | ||
|
5c45bbe57b | ||
|
d41d04c0f5 | ||
|
e422d7f4f7 | ||
|
cdc682d5a4 | ||
|
9cc93c64aa | ||
|
fa7a1cc5ef | ||
|
17712eeb19 | ||
|
41c3a5a7be | ||
|
8765222d22 | ||
|
645f814544 | ||
|
308cfe0ab3 | ||
|
e5e8d20a3a | ||
|
a107193e4b | ||
|
55eae65b39 | ||
|
3f125c8c70 | ||
|
75e8b2ac87 | ||
|
ee114368ad | ||
|
525a87f58e | ||
|
44cae2fb2e | ||
|
30a453884e | ||
|
3b58d94f71 | ||
|
8abb86fec4 | ||
|
16a089780e | ||
|
09b6468d30 | ||
|
80fb6d4aa4 | ||
|
1f04873517 | ||
|
799207e838 | ||
|
34866b4836 | ||
|
be530dfea2 | ||
|
d12a1a47d5 | ||
|
8d42e3501e | ||
|
2711e41bcd | ||
|
5e1eddb939 | ||
|
23e7f53bd3 | ||
|
000b6b5ae5 | ||
|
864f24bd2c | ||
|
5d8df28d27 | ||
|
f9a5affad9 | ||
|
ab81ef8fa7 | ||
|
95d8f7ea12 | ||
|
5316bf7487 | ||
|
a6f774e901 | ||
|
f171bc8b59 | ||
|
289bbb350e | ||
|
d247a2c8bf | ||
|
88ed52aec9 | ||
|
cb23bcba29 | ||
|
2c7ed24796 | ||
|
4c6bd5b5b6 | ||
|
aeb7b41d44 | ||
|
5bdec59de1 | ||
|
dfaba1ab95 | ||
|
a62fd1af27 | ||
|
7a89681722 | ||
|
51da40e621 | ||
|
d8f0a9ecea | ||
|
cf7e015f25 | ||
|
1af330f29f | ||
|
9afa1770d1 | ||
|
3ebbcce1c7 | ||
|
2c7c721933 | ||
|
7523647391 | ||
|
9700cd9097 | ||
|
eab7faa0c1 | ||
|
a56c1e38c7 | ||
|
40a2d17052 | ||
|
b14fa8e687 | ||
|
678e436f2e | ||
|
ff81c4c99c | ||
|
420658e6cb | ||
|
593ddd851b | ||
|
1243402657 | ||
|
1a117a7728 | ||
|
2b2ee140c3 | ||
|
d97f5cd795 | ||
|
f3f0b8e403 | ||
|
660f9459da | ||
|
10952eb2cf | ||
|
cdad742700 | ||
|
a9e8f60ef6 | ||
|
a8b7b26068 | ||
|
ba911137fa | ||
|
d3f007af18 | ||
|
2929fa0e79 | ||
|
297a564bee | ||
|
53b8247cb5 | ||
|
59db9f8018 | ||
|
b73b14f72c | ||
|
41597d9bed | ||
|
b37317d8b0 | ||
|
87dc451108 | ||
|
ca4456eda8 | ||
|
993df6bc22 | ||
|
61be92e26a | ||
|
c59b61c0da | ||
|
3e214851a4 | ||
|
a47b602b08 | ||
|
a083b859e4 | ||
|
948199deac | ||
|
c356620ec1 | ||
|
f79ebf09a2 | ||
|
c7620992d2 | ||
|
ce1bafdce9 | ||
|
9872e588c8 | ||
|
d609edf4f1 | ||
|
3a99d321a8 | ||
|
4bb3d999ac | ||
|
40101dc311 | ||
|
e9c6deffee | ||
|
9c29bc69f7 | ||
|
1e12429564 | ||
|
795704f0f1 | ||
|
981b9cdc8c | ||
|
3f724339db | ||
|
70c857b728 | ||
|
c84683c88b | ||
|
b68a2613f8 | ||
|
28afa6e77a | ||
|
496ce6b349 | ||
|
ce9512b78b | ||
|
4eb59a6b1c | ||
|
80b1ee0a4c | ||
|
f993afb26d | ||
|
7c80519cbf | ||
|
8250c32f49 | ||
|
2fe1ff8582 | ||
|
17ee98e1a5 | ||
|
2ee8f5d80f | ||
|
3f302bca8c | ||
|
c909e5820e | ||
|
a1b85269a4 | ||
|
faa1f83ab4 | ||
|
308c505c3d | ||
|
0eacd2aaae | ||
|
18ae46ad4b | ||
|
65c2b21df1 | ||
|
772acaf31f | ||
|
f8d0745e27 | ||
|
d719c6a5ab | ||
|
769efa16af | ||
|
86b4e98ac6 | ||
|
3bf8c316a6 | ||
|
e37c92ec6d | ||
|
a5dd9a0c5d | ||
|
7a4a945f13 | ||
|
1d18e26eca | ||
|
ac4b8df5e4 | ||
|
3bc9fb5889 | ||
|
632cbb8efa | ||
|
789a12aaaf | ||
|
ecdbe09e10 | ||
|
1dc31c2786 | ||
|
32470bf619 | ||
|
8b61bfd638 | ||
|
8a7a208905 | ||
|
0215103e92 | ||
|
c2d1be8981 | ||
|
4951c9f821 | ||
|
726adc43ec | ||
|
3c6ae8b59e | ||
|
605be3f7f8 | ||
|
c51bc70e0f | ||
|
e89d7e3029 | ||
|
3c07a729a6 | ||
|
84c0ed50a5 | ||
|
02c126a7c2 | ||
|
114ed20e64 | ||
|
4b0f45f667 | ||
|
36068ae019 | ||
|
9d681c2bb3 | ||
|
3af1fac7b0 | ||
|
761ee0d827 | ||
|
fb8bc3f818 | ||
|
826a7da808 | ||
|
cbd55ade68 | ||
|
5705ee6ef8 | ||
|
3f5c6d0c1b | ||
|
e58066e244 | ||
|
ee48b6a88f | ||
|
9ac09ed4de | ||
|
22603348aa | ||
|
fec73daaa3 | ||
|
c6b68648f4 | ||
|
1ecb5d1d83 | ||
|
dc786d3db5 | ||
|
74fe23ec35 | ||
|
b0bff54b08 | ||
|
1b541d8d6e | ||
|
f29ac588dd | ||
|
0696667734 | ||
|
1793d71db6 | ||
|
4211e1941b | ||
|
4bdfef5a18 | ||
|
8a37f53685 | ||
|
4e1ad6e9a8 | ||
|
fb10e1aa57 | ||
|
3c283a381e | ||
|
dac4d5be12 | ||
|
530857182d | ||
|
9441f77faa | ||
|
3cc8b4c327 | ||
|
6b19647d57 | ||
|
7bd42d0d96 | ||
|
c60e8cfaf7 | ||
|
7fd002c006 | ||
|
db6c50f109 | ||
|
aa4789d632 | ||
|
ee8de13e14 | ||
|
7dde5f6a8d | ||
|
736f003f2e | ||
|
47af21e8f1 | ||
|
605cbef653 | ||
|
388ad0c05c | ||
|
2ebbb6f1f7 | ||
|
d54f1c7477 | ||
|
b78f5ec4c3 | ||
|
9fd3bf04b7 | ||
|
e97bb3de83 | ||
|
c2daf8dfa4 | ||
|
09b718c439 | ||
|
c177bb3a50 | ||
|
977a247a06 | ||
|
899a3e2f13 | ||
|
8ee4ecb48d | ||
|
f7e6f7fa23 | ||
|
1f80e360fc | ||
|
d7011316d0 | ||
|
d3671b344f | ||
|
a60cccbf9f | ||
|
3e72f5f10e | ||
|
b94b78971c | ||
|
4d08161ac2 | ||
|
8954e48140 | ||
|
aa99aa4e85 | ||
|
d79febcd06 | ||
|
13fc7f3a05 | ||
|
14309e1ddc | ||
|
5513967926 | ||
|
eacd875f3b | ||
|
c4fe07c7af | ||
|
1186e3f91a | ||
|
f354385bf5 | ||
|
cabe001590 | ||
|
89f691e141 | ||
|
4a63291144 | ||
|
593b77064c | ||
|
9fefc88656 | ||
|
a3bfddfa5e | ||
|
36da48798a | ||
|
a0f28f90fa | ||
|
851229a01f | ||
|
c9c854cea7 | ||
|
a38436e889 | ||
|
23fc384f2c | ||
|
1540119723 | ||
|
574f42d79a | ||
|
536b0700b0 | ||
|
5ba761eb85 | ||
|
611ac379bb | ||
|
03f32a7ead | ||
|
50ea2bb20d | ||
|
525daedd5a | ||
|
e118031ef8 | ||
|
45eedbe58c | ||
|
e37c932fca | ||
|
5eb778bf4d | ||
|
ab9b890b52 | ||
|
31c746e5dc | ||
|
f01f731107 | ||
|
70f0f5a8ca | ||
|
cc357c4db8 | ||
|
97f4aecfc1 | ||
|
2af0f87c8b | ||
|
b062d94eef | ||
|
6c1b0c0ed2 | ||
|
ddcdc684e2 | ||
|
eae89f92e6 | ||
|
01d115b06b | ||
|
79057965a8 | ||
|
dcd4d95c8e | ||
|
cf61d96df0 | ||
|
f8da79f828 | ||
|
9750e7d70e | ||
|
50aa2bb6b9 | ||
|
1d1dd597ed | ||
|
cfe5537ee5 | ||
|
7869eb3fc4 | ||
|
6dfa0602f0 | ||
|
75a40b2251 | ||
|
28fb109ed0 | ||
|
48607afac5 | ||
|
b6ea9ef21a | ||
|
b8dd44baa9 | ||
|
c4f1fde75b | ||
|
667170e2c7 | ||
|
53429e6551 | ||
|
ac8f97f2b3 | ||
|
41c0d2f8cb | ||
|
1f3a43dbe6 | ||
|
369e195a44 | ||
|
15006fedb9 | ||
|
e35b23f54d | ||
|
f72b0a6032 | ||
|
ac9ed061ec | ||
|
d919fa3344 | ||
|
79913fde35 | ||
|
da634d0a8b | ||
|
fac54cb426 | ||
|
3f19b9b7c1 | ||
|
dc48695ab9 | ||
|
0a31a35098 | ||
|
86f2541695 | ||
|
181c4ccaaa | ||
|
ed848087d5 | ||
|
edd66be5be | ||
|
246995dbc8 | ||
|
b931fbe5ab | ||
|
e014ff015d | ||
|
4fa5f40232 | ||
|
9b15be97aa | ||
|
a7ada46bd9 | ||
|
9d16788ad9 | ||
|
6ce89aecc3 | ||
|
963d0ce7e3 | ||
|
0f08d7f851 | ||
|
44c514eb9c | ||
|
513cbdda93 | ||
|
e1ba152352 | ||
|
446e764500 | ||
|
901d00caa6 | ||
|
094790d2c9 | ||
|
1c0163a5cc | ||
|
8fa7e5817a | ||
|
01b89d5682 | ||
|
9f01c1a803 | ||
|
46f0f50016 | ||
|
b8070dbbd7 | ||
|
3b16d803c9 | ||
|
de195c23a6 | ||
|
d3b8908886 | ||
|
2688176c77 | ||
|
a5839317aa | ||
|
a0aab26a41 | ||
|
27713812a0 | ||
|
cf2c5fda4f | ||
|
a9684c0dbf | ||
|
c0bf5e1c4d | ||
|
a31e3e7dcb | ||
|
17b41a3337 | ||
|
89a683ae74 | ||
|
008661069b | ||
|
9296e92e1c | ||
|
a34af8d066 | ||
|
8726e04629 | ||
|
2a01c940ec | ||
|
4eab60cbd2 | ||
|
a0e060ac1e | ||
|
397a8ea96e | ||
|
15830339ef | ||
|
b29280285e | ||
|
1633491bff | ||
|
2b0fa1f7dd | ||
|
02b386f80a | ||
|
bf20b9c540 | ||
|
06a12933f3 | ||
|
6dd94d3a79 | ||
|
f2f89c762a | ||
|
e6c2d9ad29 | ||
|
83423254cc | ||
|
1c20ddc966 | ||
|
675e9f22ea | ||
|
77c6fb5b24 | ||
|
082a0140ef | ||
|
9e535ce055 | ||
|
d76dea001b | ||
|
af0f9b0e95 | ||
|
e2082ea942 | ||
|
68923e52a3 | ||
|
9281f6d253 | ||
|
4647845679 | ||
|
cf9cf7dd04 | ||
|
1316b54956 | ||
|
cbc1fadd6f | ||
|
4dd09c9add | ||
|
267dc07e6b | ||
|
d7b4d5dd50 | ||
|
7f220b2fac | ||
|
275c0423aa | ||
|
d3ee4bbc5a | ||
|
85a064861f | ||
|
d0b436bff2 | ||
|
92b2f18072 | ||
|
dfc4eca21f | ||
|
fc7ae675e2 | ||
|
804ad79985 | ||
|
da839880e9 | ||
|
e9d33454b5 | ||
|
d80891efc4 | ||
|
37c1e4025c | ||
|
59a83d3e5b | ||
|
13af92fdc4 | ||
|
0c20ee7d4b | ||
|
89d42c2c75 | ||
|
04611765a4 | ||
|
9dfc4fa1a1 | ||
|
43232d5c14 | ||
|
f7c272d4fa | ||
|
ede21449c8 | ||
|
bb8e553662 | ||
|
f5f4a27a96 | ||
|
d7c9a3e976 | ||
|
35eb649e9d | ||
|
e56a4c9e9b | ||
|
95506e37af | ||
|
e41840c522 | ||
|
2a46a27e6c | ||
|
0bcdc27653 | ||
|
ddf0f74de7 | ||
|
91b21b2334 | ||
|
66e568de3b | ||
|
f5ca97e393 | ||
|
8d06a62485 | ||
|
93f9420993 | ||
|
5b61070c70 | ||
|
dbe1a93526 | ||
|
aa5d9a79d6 | ||
|
86511ea417 | ||
|
1866432db7 | ||
|
cf2ac6df68 | ||
|
33f1f81b8b | ||
|
9d0b581fea | ||
|
c05724cb18 | ||
|
f0714c9f86 | ||
|
cf386750c9 | ||
|
54f428f645 | ||
|
dc2bd20e55 | ||
|
c608ee491f | ||
|
0130afb76e | ||
|
738b926322 | ||
|
bea41c7f3f | ||
|
1bbe660dfa | ||
|
c4bd188da4 | ||
|
5acfa126c8 | ||
|
67134eaba1 | ||
|
5414623791 | ||
|
c93d53f5e3 | ||
|
507683780e | ||
|
e8b9ee5e08 | ||
|
d16154d163 | ||
|
c342041fba | ||
|
bf42a9906d | ||
|
9603e8a7d9 | ||
|
c7c040b825 | ||
|
ac0474f89d | ||
|
bb512e57dc | ||
|
db652ea186 | ||
|
5a9cc19972 | ||
|
1a5fd4eebc | ||
|
8a1b49ff19 | ||
|
b971abe897 | ||
|
43b925ce74 | ||
|
62b742ece3 | ||
|
d16ef949ca | ||
|
23e7cba87f | ||
|
a8e6f30d8e | ||
|
9c49410898 | ||
|
802d74aa6b | ||
|
71f9e49e67 | ||
|
82ea1051b5 | ||
|
6c4d20cd6f | ||
|
04c27802c0 | ||
|
c3b7202f4f | ||
|
81103ef35d | ||
|
0eb5c1c62a | ||
|
a9de951744 | ||
|
a42a1bb09d | ||
|
9fbfc9bd4d | ||
|
242a998bdc | ||
|
9d1bf70234 | ||
|
b8c1cc1a51 | ||
|
eedd20ef96 | ||
|
7c197ad96d | ||
|
654fd03c73 | ||
|
cee16e0fa3 | ||
|
73c471e9ef | ||
|
533b99fbf9 | ||
|
f39eb98bab | ||
|
da77d856a1 | ||
|
b2575b38e7 | ||
|
0a3cf9ad3d | ||
|
00334d0de0 | ||
|
226b886ca8 | ||
|
bc93bdb5bb | ||
|
af214c3a79 | ||
|
4eb10f6621 | ||
|
7d7d469025 | ||
|
fd40bdc0be | ||
|
7e0480ae0e | ||
|
d80265ccd6 | ||
|
1b5a1ae257 | ||
|
d8d24a922a | ||
|
03339b7b5b | ||
|
2028c6e03d | ||
|
2988835af5 | ||
|
62cca96b72 | ||
|
b4dea075a3 | ||
|
533f67d3fa | ||
|
906e2f0eac | ||
|
b8091db6b9 | ||
|
381c067755 | ||
|
2182ab5187 | ||
|
a1593a4a0e | ||
|
aa5740fb61 | ||
|
da92eeae42 | ||
|
12e9e8445d | ||
|
8084be78c5 | ||
|
1ac1c4c26e | ||
|
d4f58034f7 | ||
|
f843300fe5 | ||
|
03b9c94437 | ||
|
a219d175c6 | ||
|
3c3b4176bd | ||
|
022383139b | ||
|
1a1251e877 | ||
|
18b5e1e534 | ||
|
2fece970b8 | ||
|
e20d0c1e69 | ||
|
a9dcf4a860 | ||
|
255f5694aa | ||
|
25701d5a2c | ||
|
a5158f38a3 | ||
|
c76799c555 | ||
|
2bb5b6d0a1 | ||
|
0bbba43ed0 | ||
|
98ca102441 | ||
|
3f3308cd75 | ||
|
6f96e308d0 | ||
|
756f574e4e | ||
|
78294e6a9c | ||
|
4e33577173 | ||
|
607841af64 | ||
|
396726244a | ||
|
c5895d5dbd | ||
|
b407e173e4 | ||
|
6a745c2c0f | ||
|
2da0cad6ae | ||
|
af1fa6234e | ||
|
c9ac7fa909 | ||
|
964afd0689 | ||
|
2a282a3b5f | ||
|
7bb23aeca4 | ||
|
de939d89eb | ||
|
77c975f536 | ||
|
75ab0ebcf5 | ||
|
10273d6e08 | ||
|
16d6973f8a | ||
|
edcd2d665b | ||
|
385c3e5e91 | ||
|
c8e337450b | ||
|
10464af5d1 | ||
|
cbcd1a5474 | ||
|
c9bebed294 | ||
|
d5552a3477 | ||
|
a8b081a052 | ||
|
9e96dc8b35 | ||
|
360075e28a | ||
|
accf79b107 | ||
|
4d58b24c15 | ||
|
c33a8639a7 | ||
|
25fa8d66e6 | ||
|
974a6146fe | ||
|
0392ac98d2 | ||
|
5e3915cbe3 | ||
|
29b809de68 | ||
|
8f73e89ca0 | ||
|
0d0d5d3717 | ||
|
a69e8bfdd9 | ||
|
062a3fdf36 | ||
|
028a33d7f2 | ||
|
14835de9fb | ||
|
447053668f | ||
|
f3aecb27a4 | ||
|
7f0172b3e5 | ||
|
79fe954d79 | ||
|
0501bfa159 | ||
|
a155b7e76c | ||
|
5228b756af | ||
|
04e7596680 | ||
|
f1e66cb2eb | ||
|
4fd35ee072 | ||
|
ee69799262 | ||
|
636a9637f4 | ||
|
9383e66f94 | ||
|
99e6833c85 | ||
|
c203be3fb4 | ||
|
02175a7986 | ||
|
8117df4cd9 | ||
|
7c7dd9dc7f | ||
|
054932f403 | ||
|
aed473ccf9 | ||
|
8268e94cd4 | ||
|
4af98ecdfb | ||
|
2a0fcf6113 | ||
|
0072afca8e | ||
|
61aa5ba36e | ||
|
9f4323252a | ||
|
d22dec74ff | ||
|
8a1a26ce4c | ||
|
5bf3276e8d | ||
|
93dfcb9357 | ||
|
0c8662d2b6 | ||
|
d84f1d14b5 | ||
|
4da31bd566 | ||
|
423d2be5f8 | ||
|
453a1617aa | ||
|
b9258c6178 | ||
|
6800d3372f | ||
|
08f7db20c1 | ||
|
1498940b10 | ||
|
ca45246627 | ||
|
f9355dc989 | ||
|
680f9744c4 | ||
|
2c935c0c72 | ||
|
7198063d96 | ||
|
a650110ba7 | ||
|
54b31d149e | ||
|
a745475808 | ||
|
f11554092b |
@@ -5,9 +5,7 @@ python:
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -yqq rtmpdump
|
||||
sudo: false
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
13
AUTHORS
13
AUTHORS
@@ -127,3 +127,16 @@ Julian Richen
|
||||
Ping O.
|
||||
Mister Hat
|
||||
Peter Ding
|
||||
jackyzy823
|
||||
George Brighton
|
||||
Remita Amine
|
||||
Aurélio A. Heckert
|
||||
Bernhard Minks
|
||||
sceext
|
||||
Zach Bruggeman
|
||||
Tjark Saul
|
||||
slangangular
|
||||
Behrouz Abbasi
|
||||
ngld
|
||||
nyuszika7h
|
||||
Shaun Walbridge
|
||||
|
@@ -125,7 +125,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
|
56
README.md
56
README.md
@@ -54,6 +54,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--dump-user-agent Display the current browser identification
|
||||
--list-extractors List all supported extractors
|
||||
--extractor-descriptions Output descriptions of all supported extractors
|
||||
--force-generic-extractor Force extraction to use the generic extractor
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
||||
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
||||
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
||||
@@ -74,7 +75,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER Playlist video to end at (default is last)
|
||||
--playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
||||
--playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8"
|
||||
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
||||
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||
--match-title REGEX Download only matching titles (regex or caseless sub-string)
|
||||
@@ -107,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||
|
||||
## Filesystem Options:
|
||||
@@ -189,8 +190,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--all-formats Download all available video formats
|
||||
--prefer-free-formats Prefer free video formats unless a specific one is requested
|
||||
-F, --list-formats List all available formats
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifests and related data on YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv. Ignored if no
|
||||
merge is required
|
||||
|
||||
## Subtitle Options:
|
||||
@@ -213,7 +214,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
|
||||
5)
|
||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)
|
||||
--postprocessor-args ARGS Give these arguments to the postprocessor
|
||||
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
|
||||
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||
--embed-subs Embed subtitles in the video (only for mkv and mp4 videos)
|
||||
@@ -234,7 +236,34 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, system wide configuration file is located at `/etc/youtube-dl.conf` and user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configration file youtube-dl will always extract the audio, not copy the mtime and use proxy:
|
||||
```
|
||||
--extract-audio
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
```
|
||||
|
||||
You can use `--ignore-config` if you want to disable configuration file for a particular youtube-dl run.
|
||||
|
||||
### Authentication with `.netrc` file ###
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in shell command history. You can achieve this using [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
```
|
||||
After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
|
||||
```
|
||||
machine <extractor> login <login> password <password>
|
||||
```
|
||||
For example:
|
||||
```
|
||||
machine youtube login myaccount@gmail.com password my_youtube_password
|
||||
machine twitch login my_twitch_account_name password my_twitch_password
|
||||
```
|
||||
To activate authentication with `.netrc` file you should pass `--netrc` to youtube-dl or to place it in [configuration file](#configuration).
|
||||
|
||||
On Windows you may also need to setup `%HOME%` environment variable manually.
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
@@ -250,6 +279,7 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||
- `playlist`: The name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The index of the video in the playlist, a five-digit number.
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
@@ -267,7 +297,7 @@ youtube-dl_test_video_.mp4 # A simple file name
|
||||
By default youtube-dl tries to download the best quality, but sometimes you may want to download other format.
|
||||
The simplest case is requesting a specific format, for example `-f 22`. You can get the list of available formats using `--list-formats`, you can also use a file extension (currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names `best`, `bestvideo`, `bestaudio` and `worst`.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`.
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
@@ -379,7 +409,7 @@ In February 2015, the new YouTube player contained a character sequence in a str
|
||||
|
||||
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
||||
|
||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
|
||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
||||
|
||||
### SyntaxError: Non-ASCII character ###
|
||||
|
||||
@@ -417,6 +447,12 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
||||
youtube-dl -- -wNyEUrxzFU
|
||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||
|
||||
### How do I pass cookies to youtube-dl?
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that cookies file must be in Mozilla/Netscape format and the first line of cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in cookies file and convert newlines if necessary to correspond your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when particular extractor does not implement it explicitly.
|
||||
|
||||
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||
|
||||
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||
@@ -516,7 +552,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
@@ -544,7 +580,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L117-L265). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
|
@@ -17,6 +17,7 @@
|
||||
- **AcademicEarth:Course**
|
||||
- **AddAnime**
|
||||
- **AdobeTV**
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **Aftenposten**
|
||||
- **Aftonbladet**
|
||||
@@ -27,7 +28,8 @@
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
- **Aparat**
|
||||
- **AppleDaily**
|
||||
- **AppleConnect**
|
||||
- **AppleDaily**: 臺灣蘋果日報
|
||||
- **AppleTrailers**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
@@ -44,11 +46,12 @@
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **Azubu**
|
||||
- **BaiduVideo**
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **BeatportPro**
|
||||
- **Beeg**
|
||||
@@ -83,7 +86,7 @@
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **Cinemassacre**
|
||||
- **clipfish**
|
||||
- **Clipfish**
|
||||
- **cliphunter**
|
||||
- **Clipsyndicate**
|
||||
- **Cloudy**
|
||||
@@ -105,13 +108,15 @@
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**
|
||||
- **CtsNews**: 華視新聞
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **dailymotion**
|
||||
- **dailymotion:playlist**
|
||||
- **dailymotion:user**
|
||||
- **DailymotionCloud**
|
||||
- **daum.net**
|
||||
- **DBTV**
|
||||
- **DCN**
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
@@ -119,7 +124,7 @@
|
||||
- **Discovery**
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **DouyuTV**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **dramafever**
|
||||
- **dramafever:series**
|
||||
- **DRBonanza**
|
||||
@@ -145,6 +150,7 @@
|
||||
- **EroProfile**
|
||||
- **Escapist**
|
||||
- **ESPN** (Currently broken)
|
||||
- **EsriVideo**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
@@ -215,12 +221,15 @@
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **Ina**
|
||||
- **Indavideo**
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima**
|
||||
- **iqiyi**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **Izlesene**
|
||||
@@ -241,9 +250,16 @@
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
- **Ku6**
|
||||
- **kuwo:album**: 酷我音乐 - 专辑
|
||||
- **kuwo:category**: 酷我音乐 - 分类
|
||||
- **kuwo:chart**: 酷我音乐 - 排行榜
|
||||
- **kuwo:mv**: 酷我音乐 - MV
|
||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **Letv**
|
||||
- **Lecture2Go**
|
||||
- **Letv**: 乐视网
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
@@ -281,9 +297,11 @@
|
||||
- **Motherless**
|
||||
- **Motorsport**: motorsport.com
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **movshare**: MovShare
|
||||
- **MPORA**
|
||||
- **MSNBC**
|
||||
- **MTV**
|
||||
- **mtviggy.com**
|
||||
- **mtvservices:embedded**
|
||||
@@ -291,9 +309,11 @@
|
||||
- **MusicPlayOn**
|
||||
- **MusicVault**
|
||||
- **muzu.tv**
|
||||
- **Mwave**
|
||||
- **MySpace**
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
- **Myvi**
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **N-JOY**
|
||||
@@ -309,11 +329,18 @@
|
||||
- **NDTV**
|
||||
- **NerdCubedFeed**
|
||||
- **Nerdist**
|
||||
- **netease:album**: 网易云音乐 - 专辑
|
||||
- **netease:djradio**: 网易云音乐 - 电台
|
||||
- **netease:mv**: 网易云音乐 - MV
|
||||
- **netease:playlist**: 网易云音乐 - 歌单
|
||||
- **netease:program**: 网易云音乐 - 电台节目
|
||||
- **netease:singer**: 网易云音乐 - 歌手
|
||||
- **netease:song**: 网易云音乐
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **Newstube**
|
||||
- **NextMedia**
|
||||
- **NextMediaActionNews**
|
||||
- **NextMedia**: 蘋果日報
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **nhl.com**
|
||||
@@ -329,13 +356,13 @@
|
||||
- **Nowness**
|
||||
- **NowTV**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo.nl**
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKTV**
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
@@ -344,6 +371,7 @@
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
- **OpenFilm**
|
||||
@@ -354,15 +382,20 @@
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **PBS**
|
||||
- **Periscope**: Periscope
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
- **Playwire**
|
||||
- **pluralsight**
|
||||
- **pluralsight:course**
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
@@ -377,10 +410,12 @@
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **qqmusic**
|
||||
- **qqmusic:album**
|
||||
- **qqmusic:singer**
|
||||
- **qqmusic:toplist**
|
||||
- **qqmusic**: QQ音乐
|
||||
- **qqmusic:album**: QQ音乐 - 专辑
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **Quickscope**: Quick Scope
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
@@ -389,6 +424,7 @@
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedTube**
|
||||
- **Restudy**
|
||||
- **ReverbNation**
|
||||
@@ -404,6 +440,7 @@
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RTVNH**
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
@@ -427,6 +464,7 @@
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||
- **Shahid**
|
||||
- **Shared**
|
||||
- **ShareSix**
|
||||
- **Sina**
|
||||
@@ -436,6 +474,8 @@
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
- **smotri:community**: Smotri.com community videos
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **SnagFilms**
|
||||
- **SnagFilmsEmbed**
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **soompi**
|
||||
@@ -462,6 +502,7 @@
|
||||
- **SportBox**
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
- **Srf**
|
||||
- **SRMediathek**: Saarländischer Rundfunk
|
||||
- **SSA**
|
||||
@@ -487,9 +528,9 @@
|
||||
- **TechTalks**
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **tegenlicht.vpro.nl**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **TenPlay**
|
||||
@@ -497,7 +538,9 @@
|
||||
- **TF1**
|
||||
- **TheOnion**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheSixtyOne**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
- **THVideoPlaylist**
|
||||
@@ -538,10 +581,11 @@
|
||||
- **twitch:stream**
|
||||
- **twitch:video**
|
||||
- **twitch:vod**
|
||||
- **TwitterCard**
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Ultimedia**
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
@@ -560,7 +604,6 @@
|
||||
- **Viddler**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
- **VideoBam**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videolectures.net**
|
||||
@@ -586,11 +629,12 @@
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **vk.com**
|
||||
- **vk.com:user-videos**: vk.com:All of a user's videos
|
||||
- **vk**: VK
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl and ntr.nl
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
@@ -603,32 +647,36 @@
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **WNL**
|
||||
- **WorldStarHipHop**
|
||||
- **wrzuta.pl**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **Xstream**
|
||||
- **XTube**
|
||||
- **XTubeUser**: XTube user profile
|
||||
- **Xuite**
|
||||
- **Xuite**: 隨意窩Xuite影音
|
||||
- **XVideos**
|
||||
- **XXXYMovies**
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **Yam**
|
||||
- **Yam**: 蕃薯藤yam天空部落
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YesJapan**
|
||||
- **yinyuetai:video**: 音悦Tai
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
- **Youku**
|
||||
- **youku**: 优酷
|
||||
- **YouPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
|
@@ -133,8 +133,8 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(
|
||||
isinstance(got, list),
|
||||
'Expected field %s to be a list, but it is of type %s' % (
|
||||
isinstance(got, (list, dict)),
|
||||
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||
info_field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
@@ -160,7 +160,7 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
# Are checkable fields missing from the test case definition?
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in got_dict.items()
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit'))
|
||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||
if missing_keys:
|
||||
def _repr(v):
|
||||
|
@@ -15,7 +15,7 @@ from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_str
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.postprocessor.common import PostProcessor
|
||||
from youtube_dl.utils import match_filter_func
|
||||
from youtube_dl.utils import ExtractorError, match_filter_func
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
@@ -105,6 +105,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||
{'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
|
||||
@@ -136,6 +137,11 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '35')
|
||||
|
||||
ydl = YDL({'format': 'example-with-dashes'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'example-with-dashes')
|
||||
|
||||
def test_format_selection_audio(self):
|
||||
formats = [
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||
@@ -229,21 +235,70 @@ class TestFormatSelection(unittest.TestCase):
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
|
||||
for f1id, f2id in zip(order, order[1:]):
|
||||
f1 = YoutubeIE._formats[f1id].copy()
|
||||
f1['format_id'] = f1id
|
||||
f1['url'] = 'url:' + f1id
|
||||
f2 = YoutubeIE._formats[f2id].copy()
|
||||
f2['format_id'] = f2id
|
||||
f2['url'] = 'url:' + f2id
|
||||
def format_info(f_id):
|
||||
info = YoutubeIE._formats[f_id].copy()
|
||||
info['format_id'] = f_id
|
||||
info['url'] = 'url:' + f_id
|
||||
return info
|
||||
formats_order = [format_info(f_id) for f_id in order]
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo+bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '137+141')
|
||||
self.assertEqual(downloaded['ext'], 'mp4')
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '38')
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo/best,bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['137', '141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['137+141', '248+141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['136+141', '247+141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['248+141'])
|
||||
|
||||
for f1, f2 in zip(formats_order, formats_order[1:]):
|
||||
info_dict = _make_result([f1, f2], extractor='youtube')
|
||||
ydl = YDL({'format': 'best/bestvideo'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1id)
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
info_dict = _make_result([f2, f1], extractor='youtube')
|
||||
ydl = YDL({'format': 'best/bestvideo'})
|
||||
@@ -251,7 +306,18 @@ class TestFormatSelection(unittest.TestCase):
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1id)
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
def test_invalid_format_specs(self):
|
||||
def assert_syntax_error(format_spec):
|
||||
ydl = YDL({'format': format_spec})
|
||||
info_dict = _make_result([{'format_id': 'foo', 'url': TEST_URL}])
|
||||
self.assertRaises(SyntaxError, ydl.process_ie_result, info_dict)
|
||||
|
||||
assert_syntax_error('bestvideo,,best')
|
||||
assert_syntax_error('+bestaudio')
|
||||
assert_syntax_error('bestvideo+')
|
||||
assert_syntax_error('/')
|
||||
|
||||
def test_format_filtering(self):
|
||||
formats = [
|
||||
@@ -308,6 +374,18 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'G')
|
||||
|
||||
ydl = YDL({'format': 'all[width>=400][width<=600]'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['B', 'C', 'D'])
|
||||
|
||||
ydl = YDL({'format': 'best[height<40]'})
|
||||
try:
|
||||
ydl.process_ie_result(info_dict)
|
||||
except ExtractorError:
|
||||
pass
|
||||
self.assertEqual(ydl.downloaded_info_dicts, [])
|
||||
|
||||
|
||||
class TestYoutubeDL(unittest.TestCase):
|
||||
def test_subtitles(self):
|
||||
|
@@ -14,6 +14,8 @@ from youtube_dl.utils import get_filesystem_encoding
|
||||
from youtube_dl.compat import (
|
||||
compat_getenv,
|
||||
compat_expanduser,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,5 +44,28 @@ class TestCompat(unittest.TestCase):
|
||||
dir(youtube_dl.compat))) - set(['unicode_literals'])
|
||||
self.assertEqual(all_names, sorted(present_names))
|
||||
|
||||
def test_compat_urllib_parse_unquote(self):
|
||||
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
|
||||
self.assertEqual(compat_urllib_parse_unquote(''), '')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%'), '%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
|
||||
self.assertEqual(
|
||||
compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
|
||||
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
|
||||
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
|
||||
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
|
||||
self.assertEqual(
|
||||
compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
|
||||
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
|
||||
|
||||
def test_compat_urllib_parse_unquote_plus(self):
|
||||
self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
|
||||
self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -136,7 +136,9 @@ def generator(test_case):
|
||||
# We're not using .download here sine that is just a shim
|
||||
# for outside error handling, and returns the exit code
|
||||
# instead of the result dict.
|
||||
res_dict = ydl.extract_info(test_case['url'])
|
||||
res_dict = ydl.extract_info(
|
||||
test_case['url'],
|
||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
|
@@ -25,6 +25,7 @@ from youtube_dl.extractor import (
|
||||
RaiIE,
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
)
|
||||
@@ -307,6 +308,18 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||
|
||||
|
||||
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
|
||||
IE = ThePlatformFeedIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
|
||||
|
||||
|
||||
class TestRtveSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||
IE = RTVEALaCartaIE
|
||||
|
@@ -235,12 +235,21 @@ class TestUtil(unittest.TestCase):
|
||||
<node x="a"/>
|
||||
<node x="a" y="c" />
|
||||
<node x="b" y="d" />
|
||||
<node x="" />
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4])
|
||||
|
||||
def test_xpath_with_ns(self):
|
||||
testxml = '''<root xmlns:media="http://example.com/">
|
||||
@@ -324,6 +333,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
|
||||
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
|
@@ -21,24 +21,24 @@ import subprocess
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import tokenize
|
||||
import traceback
|
||||
|
||||
if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_get_terminal_size,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
compat_tokenize_tokenize,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .utils import (
|
||||
escape_url,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
@@ -49,7 +49,6 @@ from .utils import (
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
HEADRequest,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
@@ -139,6 +138,7 @@ class YoutubeDL(object):
|
||||
outtmpl: Template for output names.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
ignoreerrors: Do not stop on download errors.
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
nooverwrites: Prevent overwriting files.
|
||||
playliststart: Playlist item to start at.
|
||||
playlistend: Playlist item to end at.
|
||||
@@ -261,6 +261,8 @@ class YoutubeDL(object):
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
otherwise prefer avconv.
|
||||
postprocessor_args: A list of additional command-line arguments for the
|
||||
postprocessor.
|
||||
"""
|
||||
|
||||
params = None
|
||||
@@ -626,13 +628,16 @@ class YoutubeDL(object):
|
||||
info_dict.setdefault(key, value)
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||
process=True):
|
||||
process=True, force_generic_extractor=False):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
|
||||
if not ie_key and force_generic_extractor:
|
||||
ie_key = 'Generic'
|
||||
|
||||
if ie_key:
|
||||
ies = [self.get_info_extractor(ie_key)]
|
||||
else:
|
||||
@@ -847,8 +852,8 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
def _apply_format_filter(self, format_spec, available_formats):
|
||||
" Returns a tuple of the remaining format_spec and filtered formats "
|
||||
def _build_format_filter(self, filter_spec):
|
||||
" Returns a function to filter the formats according to the filter_spec "
|
||||
|
||||
OPERATORS = {
|
||||
'<': operator.lt,
|
||||
@@ -858,13 +863,13 @@ class YoutubeDL(object):
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*\[
|
||||
operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||
\]$
|
||||
$
|
||||
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||
m = operator_rex.search(format_spec)
|
||||
m = operator_rex.search(filter_spec)
|
||||
if m:
|
||||
try:
|
||||
comparison_value = int(m.group('value'))
|
||||
@@ -875,7 +880,7 @@ class YoutubeDL(object):
|
||||
if comparison_value is None:
|
||||
raise ValueError(
|
||||
'Invalid value %r in format specification %r' % (
|
||||
m.group('value'), format_spec))
|
||||
m.group('value'), filter_spec))
|
||||
op = OPERATORS[m.group('op')]
|
||||
|
||||
if not m:
|
||||
@@ -883,85 +888,283 @@ class YoutubeDL(object):
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)\s*\[
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9_-]+)
|
||||
\s*\]$
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.search(format_spec)
|
||||
m = str_operator_rex.search(filter_spec)
|
||||
if m:
|
||||
comparison_value = m.group('value')
|
||||
op = STR_OPERATORS[m.group('op')]
|
||||
|
||||
if not m:
|
||||
raise ValueError('Invalid format specification %r' % format_spec)
|
||||
raise ValueError('Invalid filter specification %r' % filter_spec)
|
||||
|
||||
def _filter(f):
|
||||
actual_value = f.get(m.group('key'))
|
||||
if actual_value is None:
|
||||
return m.group('none_inclusive')
|
||||
return op(actual_value, comparison_value)
|
||||
new_formats = [f for f in available_formats if _filter(f)]
|
||||
return _filter
|
||||
|
||||
new_format_spec = format_spec[:-len(m.group(0))]
|
||||
if not new_format_spec:
|
||||
new_format_spec = 'best'
|
||||
def build_format_selector(self, format_spec):
|
||||
def syntax_error(note, start):
|
||||
message = (
|
||||
'Invalid format specification: '
|
||||
'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
|
||||
return SyntaxError(message)
|
||||
|
||||
return (new_format_spec, new_formats)
|
||||
PICKFIRST = 'PICKFIRST'
|
||||
MERGE = 'MERGE'
|
||||
SINGLE = 'SINGLE'
|
||||
GROUP = 'GROUP'
|
||||
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
|
||||
|
||||
def select_format(self, format_spec, available_formats):
|
||||
while format_spec.endswith(']'):
|
||||
format_spec, available_formats = self._apply_format_filter(
|
||||
format_spec, available_formats)
|
||||
if not available_formats:
|
||||
return None
|
||||
def _parse_filter(tokens):
|
||||
filter_parts = []
|
||||
for type, string, start, _, _ in tokens:
|
||||
if type == tokenize.OP and string == ']':
|
||||
return ''.join(filter_parts)
|
||||
else:
|
||||
filter_parts.append(string)
|
||||
|
||||
if format_spec in ['best', 'worst', None]:
|
||||
format_idx = 0 if format_spec == 'worst' else -1
|
||||
audiovideo_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
return audiovideo_formats[format_idx]
|
||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||
elif (all(f.get('acodec') != 'none' for f in available_formats) or
|
||||
all(f.get('vcodec') != 'none' for f in available_formats)):
|
||||
return available_formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
return audio_formats[-1]
|
||||
elif format_spec == 'worstaudio':
|
||||
audio_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
return audio_formats[0]
|
||||
elif format_spec == 'bestvideo':
|
||||
video_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
return video_formats[-1]
|
||||
elif format_spec == 'worstvideo':
|
||||
video_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
return video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
filter_f = lambda f: f['format_id'] == format_spec
|
||||
matches = list(filter(filter_f, available_formats))
|
||||
if matches:
|
||||
return matches[-1]
|
||||
return None
|
||||
def _remove_unused_ops(tokens):
|
||||
# Remove operators that we don't use and join them with the sourrounding strings
|
||||
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
||||
last_string, last_start, last_end, last_line = None, None, None, None
|
||||
for type, string, start, end, line in tokens:
|
||||
if type == tokenize.OP and string == '[':
|
||||
if last_string:
|
||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||
last_string = None
|
||||
yield type, string, start, end, line
|
||||
# everything inside brackets will be handled by _parse_filter
|
||||
for type, string, start, end, line in tokens:
|
||||
yield type, string, start, end, line
|
||||
if type == tokenize.OP and string == ']':
|
||||
break
|
||||
elif type == tokenize.OP and string in ALLOWED_OPS:
|
||||
if last_string:
|
||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||
last_string = None
|
||||
yield type, string, start, end, line
|
||||
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
|
||||
if not last_string:
|
||||
last_string = string
|
||||
last_start = start
|
||||
last_end = end
|
||||
else:
|
||||
last_string += string
|
||||
if last_string:
|
||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||
|
||||
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
|
||||
selectors = []
|
||||
current_selector = None
|
||||
for type, string, start, _, _ in tokens:
|
||||
# ENCODING is only defined in python 3.x
|
||||
if type == getattr(tokenize, 'ENCODING', None):
|
||||
continue
|
||||
elif type in [tokenize.NAME, tokenize.NUMBER]:
|
||||
current_selector = FormatSelector(SINGLE, string, [])
|
||||
elif type == tokenize.OP:
|
||||
if string == ')':
|
||||
if not inside_group:
|
||||
# ')' will be handled by the parentheses group
|
||||
tokens.restore_last_token()
|
||||
break
|
||||
elif inside_merge and string in ['/', ',']:
|
||||
tokens.restore_last_token()
|
||||
break
|
||||
elif inside_choice and string == ',':
|
||||
tokens.restore_last_token()
|
||||
break
|
||||
elif string == ',':
|
||||
if not current_selector:
|
||||
raise syntax_error('"," must follow a format selector', start)
|
||||
selectors.append(current_selector)
|
||||
current_selector = None
|
||||
elif string == '/':
|
||||
if not current_selector:
|
||||
raise syntax_error('"/" must follow a format selector', start)
|
||||
first_choice = current_selector
|
||||
second_choice = _parse_format_selection(tokens, inside_choice=True)
|
||||
current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
|
||||
elif string == '[':
|
||||
if not current_selector:
|
||||
current_selector = FormatSelector(SINGLE, 'best', [])
|
||||
format_filter = _parse_filter(tokens)
|
||||
current_selector.filters.append(format_filter)
|
||||
elif string == '(':
|
||||
if current_selector:
|
||||
raise syntax_error('Unexpected "("', start)
|
||||
group = _parse_format_selection(tokens, inside_group=True)
|
||||
current_selector = FormatSelector(GROUP, group, [])
|
||||
elif string == '+':
|
||||
video_selector = current_selector
|
||||
audio_selector = _parse_format_selection(tokens, inside_merge=True)
|
||||
if not video_selector or not audio_selector:
|
||||
raise syntax_error('"+" must be between two format selectors', start)
|
||||
current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
|
||||
else:
|
||||
raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
|
||||
elif type == tokenize.ENDMARKER:
|
||||
break
|
||||
if current_selector:
|
||||
selectors.append(current_selector)
|
||||
return selectors
|
||||
|
||||
def _build_selector_function(selector):
|
||||
if isinstance(selector, list):
|
||||
fs = [_build_selector_function(s) for s in selector]
|
||||
|
||||
def selector_function(formats):
|
||||
for f in fs:
|
||||
for format in f(formats):
|
||||
yield format
|
||||
return selector_function
|
||||
elif selector.type == GROUP:
|
||||
selector_function = _build_selector_function(selector.selector)
|
||||
elif selector.type == PICKFIRST:
|
||||
fs = [_build_selector_function(s) for s in selector.selector]
|
||||
|
||||
def selector_function(formats):
|
||||
for f in fs:
|
||||
picked_formats = list(f(formats))
|
||||
if picked_formats:
|
||||
return picked_formats
|
||||
return []
|
||||
elif selector.type == SINGLE:
|
||||
format_spec = selector.selector
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
if not formats:
|
||||
return
|
||||
if format_spec == 'all':
|
||||
for f in formats:
|
||||
yield f
|
||||
elif format_spec in ['best', 'worst', None]:
|
||||
format_idx = 0 if format_spec == 'worst' else -1
|
||||
audiovideo_formats = [
|
||||
f for f in formats
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
yield audiovideo_formats[format_idx]
|
||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||
elif (all(f.get('acodec') != 'none' for f in formats) or
|
||||
all(f.get('vcodec') != 'none' for f in formats)):
|
||||
yield formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
f for f in formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
yield audio_formats[-1]
|
||||
elif format_spec == 'worstaudio':
|
||||
audio_formats = [
|
||||
f for f in formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
yield audio_formats[0]
|
||||
elif format_spec == 'bestvideo':
|
||||
video_formats = [
|
||||
f for f in formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
yield video_formats[-1]
|
||||
elif format_spec == 'worstvideo':
|
||||
video_formats = [
|
||||
f for f in formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
yield video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
filter_f = lambda f: f['format_id'] == format_spec
|
||||
matches = list(filter(filter_f, formats))
|
||||
if matches:
|
||||
yield matches[-1]
|
||||
elif selector.type == MERGE:
|
||||
def _merge(formats_info):
|
||||
format_1, format_2 = [f['format_id'] for f in formats_info]
|
||||
# The first format must contain the video and the
|
||||
# second the audio
|
||||
if formats_info[0].get('vcodec') == 'none':
|
||||
self.report_error('The first format must '
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
output_ext = (
|
||||
formats_info[0]['ext']
|
||||
if self.params.get('merge_output_format') is None
|
||||
else self.params['merge_output_format'])
|
||||
return {
|
||||
'requested_formats': formats_info,
|
||||
'format': '%s+%s' % (formats_info[0].get('format'),
|
||||
formats_info[1].get('format')),
|
||||
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||
formats_info[1].get('format_id')),
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
'resolution': formats_info[0].get('resolution'),
|
||||
'fps': formats_info[0].get('fps'),
|
||||
'vcodec': formats_info[0].get('vcodec'),
|
||||
'vbr': formats_info[0].get('vbr'),
|
||||
'stretched_ratio': formats_info[0].get('stretched_ratio'),
|
||||
'acodec': formats_info[1].get('acodec'),
|
||||
'abr': formats_info[1].get('abr'),
|
||||
'ext': output_ext,
|
||||
}
|
||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
for pair in itertools.product(video_selector(formats), audio_selector(formats)):
|
||||
yield _merge(pair)
|
||||
|
||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||
|
||||
def final_selector(formats):
|
||||
for _filter in filters:
|
||||
formats = list(filter(_filter, formats))
|
||||
return selector_function(formats)
|
||||
return final_selector
|
||||
|
||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||
try:
|
||||
tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
|
||||
except tokenize.TokenError:
|
||||
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
||||
|
||||
class TokenIterator(object):
|
||||
def __init__(self, tokens):
|
||||
self.tokens = tokens
|
||||
self.counter = 0
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self.counter >= len(self.tokens):
|
||||
raise StopIteration()
|
||||
value = self.tokens[self.counter]
|
||||
self.counter += 1
|
||||
return value
|
||||
|
||||
next = __next__
|
||||
|
||||
def restore_last_token(self):
|
||||
self.counter -= 1
|
||||
|
||||
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
|
||||
return _build_selector_function(parsed_selector)
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = std_headers.copy()
|
||||
@@ -1004,7 +1207,7 @@ class YoutubeDL(object):
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
t.get('id'), t.get('url')))
|
||||
for i, t in enumerate(thumbnails):
|
||||
if 'width' in t and 'height' in t:
|
||||
if t.get('width') and t.get('height'):
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
if t.get('id') is None:
|
||||
t['id'] = '%d' % i
|
||||
@@ -1033,12 +1236,6 @@ class YoutubeDL(object):
|
||||
info_dict['id'], info_dict.get('subtitles'),
|
||||
info_dict.get('automatic_captions'))
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in ['Youku']:
|
||||
if download:
|
||||
self.process_info(info_dict)
|
||||
return info_dict
|
||||
|
||||
# We now pick which formats have to be downloaded
|
||||
if info_dict.get('formats') is None:
|
||||
# There's only one format available
|
||||
@@ -1104,62 +1301,15 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format_list = []
|
||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||
info_dict['extractor'] in ['youtube', 'ted']):
|
||||
info_dict['extractor'] in ['youtube', 'ted'] and
|
||||
not info_dict.get('is_live')):
|
||||
merger = FFmpegMergerPP(self)
|
||||
if merger.available and merger.can_merge():
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
formats_to_download = []
|
||||
if req_format == 'all':
|
||||
formats_to_download = formats
|
||||
else:
|
||||
for rfstr in req_format.split(','):
|
||||
# We can accept formats requested in the format: 34/5/best, we pick
|
||||
# the first that is available, starting from left
|
||||
req_formats = rfstr.split('/')
|
||||
for rf in req_formats:
|
||||
if re.match(r'.+?\+.+?', rf) is not None:
|
||||
# Two formats have been requested like '137+139'
|
||||
format_1, format_2 = rf.split('+')
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
# The first format must contain the video and the
|
||||
# second the audio
|
||||
if formats_info[0].get('vcodec') == 'none':
|
||||
self.report_error('The first format must '
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
output_ext = (
|
||||
formats_info[0]['ext']
|
||||
if self.params.get('merge_output_format') is None
|
||||
else self.params['merge_output_format'])
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': '%s+%s' % (formats_info[0].get('format'),
|
||||
formats_info[1].get('format')),
|
||||
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||
formats_info[1].get('format_id')),
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
'resolution': formats_info[0].get('resolution'),
|
||||
'fps': formats_info[0].get('fps'),
|
||||
'vcodec': formats_info[0].get('vcodec'),
|
||||
'vbr': formats_info[0].get('vbr'),
|
||||
'stretched_ratio': formats_info[0].get('stretched_ratio'),
|
||||
'acodec': formats_info[1].get('acodec'),
|
||||
'abr': formats_info[1].get('abr'),
|
||||
'ext': output_ext,
|
||||
}
|
||||
else:
|
||||
selected_format = None
|
||||
else:
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if selected_format is not None:
|
||||
formats_to_download.append(selected_format)
|
||||
break
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
formats_to_download = list(format_selector(formats))
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
@@ -1499,7 +1649,8 @@ class YoutubeDL(object):
|
||||
for url in url_list:
|
||||
try:
|
||||
# It also downloads the videos
|
||||
res = self.extract_info(url)
|
||||
res = self.extract_info(
|
||||
url, force_generic_extractor=self.params.get('force_generic_extractor', False))
|
||||
except UnavailableVideoError:
|
||||
self.report_error('unable to download video')
|
||||
except MaxDownloadsReached:
|
||||
@@ -1706,27 +1857,6 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
|
||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
req_is_string = isinstance(req, compat_basestring)
|
||||
url = req if req_is_string else req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
if req_is_string:
|
||||
req = url_escaped
|
||||
else:
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
req = req_type(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
|
@@ -169,7 +169,7 @@ def _real_main(argv=None):
|
||||
if not opts.audioquality.isdigit():
|
||||
parser.error('invalid audio quality specified')
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
|
||||
parser.error('invalid video recode format specified')
|
||||
if opts.convertsubtitles is not None:
|
||||
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
|
||||
@@ -263,6 +263,9 @@ def _real_main(argv=None):
|
||||
external_downloader_args = None
|
||||
if opts.external_downloader_args:
|
||||
external_downloader_args = shlex.split(opts.external_downloader_args)
|
||||
postprocessor_args = None
|
||||
if opts.postprocessor_args:
|
||||
postprocessor_args = shlex.split(opts.postprocessor_args)
|
||||
match_filter = (
|
||||
None if opts.match_filter is None
|
||||
else match_filter_func(opts.match_filter))
|
||||
@@ -293,6 +296,7 @@ def _real_main(argv=None):
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
'ratelimit': opts.ratelimit,
|
||||
'nooverwrites': opts.nooverwrites,
|
||||
'retries': opts_retries,
|
||||
@@ -366,6 +370,7 @@ def _real_main(argv=None):
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'postprocessor_args': postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
}
|
||||
|
||||
|
@@ -9,6 +9,7 @@ import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import itertools
|
||||
|
||||
|
||||
try:
|
||||
@@ -41,6 +42,11 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import cookielib as compat_cookiejar
|
||||
|
||||
try:
|
||||
import http.cookies as compat_cookies
|
||||
except ImportError: # Python 2
|
||||
import Cookie as compat_cookies
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
@@ -74,42 +80,74 @@ except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
except ImportError:
|
||||
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
||||
if string == '':
|
||||
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
||||
except ImportError: # Python 2
|
||||
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
||||
else re.compile('([\x00-\x7f]+)'))
|
||||
|
||||
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
||||
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
||||
# is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
|
||||
|
||||
def compat_urllib_parse_unquote_to_bytes(string):
|
||||
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
|
||||
# Note: strings are encoded as UTF-8. This is only an issue if it contains
|
||||
# unescaped non-ASCII characters, which URIs should not.
|
||||
if not string:
|
||||
# Is it a string-like object?
|
||||
string.split
|
||||
return b''
|
||||
if isinstance(string, unicode):
|
||||
string = string.encode('utf-8')
|
||||
bits = string.split(b'%')
|
||||
if len(bits) == 1:
|
||||
return string
|
||||
res = string.split('%')
|
||||
if len(res) == 1:
|
||||
res = [bits[0]]
|
||||
append = res.append
|
||||
for item in bits[1:]:
|
||||
try:
|
||||
append(compat_urllib_parse._hextochr[item[:2]])
|
||||
append(item[2:])
|
||||
except KeyError:
|
||||
append(b'%')
|
||||
append(item)
|
||||
return b''.join(res)
|
||||
|
||||
def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
||||
"""Replace %xx escapes by their single-character equivalent. The optional
|
||||
encoding and errors parameters specify how to decode percent-encoded
|
||||
sequences into Unicode characters, as accepted by the bytes.decode()
|
||||
method.
|
||||
By default, percent-encoded sequences are decoded with UTF-8, and invalid
|
||||
sequences are replaced by a placeholder character.
|
||||
|
||||
unquote('abc%20def') -> 'abc def'.
|
||||
"""
|
||||
if '%' not in string:
|
||||
string.split
|
||||
return string
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
if errors is None:
|
||||
errors = 'replace'
|
||||
# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
|
||||
pct_sequence = b''
|
||||
string = res[0]
|
||||
for item in res[1:]:
|
||||
try:
|
||||
if not item:
|
||||
raise ValueError
|
||||
pct_sequence += item[:2].decode('hex')
|
||||
rest = item[2:]
|
||||
if not rest:
|
||||
# This segment was just a single percent-encoded character.
|
||||
# May be part of a sequence of code units, so delay decoding.
|
||||
# (Stored in pct_sequence).
|
||||
continue
|
||||
except ValueError:
|
||||
rest = '%' + item
|
||||
# Encountered non-percent-encoded characters. Flush the current
|
||||
# pct_sequence.
|
||||
string += pct_sequence.decode(encoding, errors) + rest
|
||||
pct_sequence = b''
|
||||
if pct_sequence:
|
||||
# Flush the final pct_sequence
|
||||
string += pct_sequence.decode(encoding, errors)
|
||||
return string
|
||||
bits = _asciire.split(string)
|
||||
res = [bits[0]]
|
||||
append = res.append
|
||||
for i in range(1, len(bits), 2):
|
||||
append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
|
||||
append(bits[i + 1])
|
||||
return ''.join(res)
|
||||
|
||||
def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
|
||||
"""Like unquote(), but also replace plus signs by spaces, as required for
|
||||
unquoting HTML form values.
|
||||
|
||||
unquote_plus('%7e/abc+def') -> '~/abc def'
|
||||
"""
|
||||
string = string.replace('+', ' ')
|
||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
@@ -388,12 +426,27 @@ else:
|
||||
pass
|
||||
return _terminal_size(columns, lines)
|
||||
|
||||
try:
|
||||
itertools.count(start=0, step=1)
|
||||
compat_itertools_count = itertools.count
|
||||
except TypeError: # Python 2.6
|
||||
def compat_itertools_count(start=0, step=1):
|
||||
n = start
|
||||
while True:
|
||||
yield n
|
||||
n += step
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
from tokenize import tokenize as compat_tokenize_tokenize
|
||||
else:
|
||||
from tokenize import generate_tokens as compat_tokenize_tokenize
|
||||
|
||||
__all__ = [
|
||||
'compat_HTTPError',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_expanduser',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
@@ -401,6 +454,7 @@ __all__ = [
|
||||
'compat_html_entities',
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_ord',
|
||||
'compat_parse_qs',
|
||||
@@ -408,9 +462,12 @@ __all__ = [
|
||||
'compat_socket_create_connection',
|
||||
'compat_str',
|
||||
'compat_subprocess_get_DEVNULL',
|
||||
'compat_tokenize_tokenize',
|
||||
'compat_urllib_error',
|
||||
'compat_urllib_parse',
|
||||
'compat_urllib_parse_unquote',
|
||||
'compat_urllib_parse_unquote_plus',
|
||||
'compat_urllib_parse_unquote_to_bytes',
|
||||
'compat_urllib_parse_urlparse',
|
||||
'compat_urllib_request',
|
||||
'compat_urlparse',
|
||||
|
@@ -8,6 +8,7 @@ from .hls import NativeHlsFD
|
||||
from .http import HttpFD
|
||||
from .rtsp import RtspFD
|
||||
from .rtmp import RtmpFD
|
||||
from .dash import DashSegmentsFD
|
||||
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
@@ -20,6 +21,7 @@ PROTOCOL_MAP = {
|
||||
'mms': RtspFD,
|
||||
'rtsp': RtspFD,
|
||||
'f4m': F4mFD,
|
||||
'http_dash_segments': DashSegmentsFD,
|
||||
}
|
||||
|
||||
|
||||
|
66
youtube_dl/downloader/dash.py
Normal file
66
youtube_dl/downloader/dash.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class DashSegmentsFD(FileDownloader):
|
||||
"""
|
||||
Download segments in a DASH manifest
|
||||
"""
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
base_url = info_dict['url']
|
||||
segment_urls = info_dict['segment_urls']
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||
byte_counter = 0
|
||||
|
||||
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
|
||||
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
|
||||
req = compat_urllib_request.Request(target_url)
|
||||
if remaining_bytes is not None:
|
||||
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||
|
||||
data = self.ydl.urlopen(req).read()
|
||||
|
||||
if remaining_bytes is not None:
|
||||
data = data[:remaining_bytes]
|
||||
|
||||
outf.write(data)
|
||||
return len(data)
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
append_url_to_file(
|
||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||
'initialization segment')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
segment_len = append_url_to_file(
|
||||
outf, combine_url(base_url, segment_url),
|
||||
'segment %d / %d' % (i + 1, len(segment_urls)),
|
||||
remaining_bytes)
|
||||
byte_counter += segment_len
|
||||
if remaining_bytes is not None:
|
||||
remaining_bytes -= segment_len
|
||||
if remaining_bytes <= 0:
|
||||
break
|
||||
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
|
||||
return True
|
@@ -45,11 +45,13 @@ class ExternalFD(FileDownloader):
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||
|
||||
def _source_address(self, command_option):
|
||||
source_address = self.params.get('source_address')
|
||||
if source_address is None:
|
||||
def _option(self, command_option, param):
|
||||
param = self.params.get(param)
|
||||
if param is None:
|
||||
return []
|
||||
return [command_option, source_address]
|
||||
if isinstance(param, bool):
|
||||
return [command_option]
|
||||
return [command_option, param]
|
||||
|
||||
def _configuration_args(self, default=[]):
|
||||
ex_args = self.params.get('external_downloader_args')
|
||||
@@ -77,7 +79,17 @@ class CurlFD(ExternalFD):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', '%s: %s' % (key, val)]
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@@ -88,7 +100,9 @@ class WgetFD(ExternalFD):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--bind-address')
|
||||
cmd += self._option('--bind-address', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._option('--no-check-certificate', 'nocheckcertificate')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@@ -105,10 +119,19 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['%s:%s' % (key, val)]
|
||||
return cmd
|
||||
|
||||
_BY_NAME = dict(
|
||||
(klass.get_basename(), klass)
|
||||
for name, klass in globals().items()
|
||||
@@ -123,5 +146,6 @@ def list_external_downloaders():
|
||||
def get_external_downloader(external_downloader):
|
||||
""" Given the name of the executable, see whether we support the given
|
||||
downloader . """
|
||||
bn = os.path.basename(external_downloader)
|
||||
# Drop .exe extension on Windows
|
||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||
return _BY_NAME[bn]
|
||||
|
@@ -7,8 +7,7 @@ import os
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
@@ -16,8 +15,6 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
struct_pack,
|
||||
struct_unpack,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -226,16 +223,13 @@ def _add_ns(prop):
|
||||
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
def to_screen(self, *args, **kargs):
|
||||
pass
|
||||
|
||||
|
||||
class F4mFD(FileDownloader):
|
||||
class F4mFD(FragmentFD):
|
||||
"""
|
||||
A downloader for f4m manifests or AdobeHDS.
|
||||
"""
|
||||
|
||||
FD_NAME = 'f4m'
|
||||
|
||||
def _get_unencrypted_media(self, doc):
|
||||
media = doc.findall(_add_ns('media'))
|
||||
if not media:
|
||||
@@ -288,7 +282,7 @@ class F4mFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
@@ -320,67 +314,20 @@ class F4mFD(FileDownloader):
|
||||
# For some akamai manifests we'll need to add a query to the fragment url
|
||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': total_frags,
|
||||
}
|
||||
|
||||
self._prepare_frag_download(ctx)
|
||||
|
||||
dest_stream = ctx['dest_stream']
|
||||
|
||||
write_flv_header(dest_stream)
|
||||
if not live:
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': 0,
|
||||
'frag_index': 0,
|
||||
'frag_count': total_frags,
|
||||
'filename': filename,
|
||||
'tmpfilename': tmpfilename,
|
||||
}
|
||||
start = time.time()
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
frag_total_bytes = s.get('total_bytes', 0)
|
||||
if s['status'] == 'finished':
|
||||
state['downloaded_bytes'] += frag_total_bytes
|
||||
state['frag_index'] += 1
|
||||
|
||||
estimated_size = (
|
||||
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
time_now = time.time()
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
state['elapsed'] = time_now - start
|
||||
|
||||
if s['status'] == 'finished':
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||
frag_total_bytes)
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
progress += frag_progress / float(total_frags)
|
||||
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||
state['speed'] = s.get('speed')
|
||||
self._hook_progress(state)
|
||||
|
||||
http_dl.add_progress_hook(frag_progress_hook)
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
frags_filenames = []
|
||||
while fragments_list:
|
||||
@@ -391,9 +338,9 @@ class F4mFD(FileDownloader):
|
||||
url += '?' + akamai_pv.strip(';')
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
url += info_dict.get('extra_param_to_segment_url')
|
||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||
try:
|
||||
success = http_dl.download(frag_filename, {'url': url})
|
||||
success = ctx['dl'].download(frag_filename, {'url': url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
@@ -425,20 +372,9 @@ class F4mFD(FileDownloader):
|
||||
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||
self.report_warning(msg)
|
||||
|
||||
dest_stream.close()
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
elapsed = time.time() - start
|
||||
self.try_rename(tmpfilename, filename)
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
|
||||
fsize = os.path.getsize(encodeFilename(filename))
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
||||
|
||||
return True
|
||||
|
111
youtube_dl/downloader/fragment.py
Normal file
111
youtube_dl/downloader/fragment.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
)
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
def to_screen(self, *args, **kargs):
|
||||
pass
|
||||
|
||||
|
||||
class FragmentFD(FileDownloader):
|
||||
"""
|
||||
A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
|
||||
"""
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
|
||||
self.report_destination(ctx['filename'])
|
||||
dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'retries': self.params.get('retries', 0),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
|
||||
ctx.update({
|
||||
'dl': dl,
|
||||
'dest_stream': dest_stream,
|
||||
'tmpfilename': tmpfilename,
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
total_frags = ctx['total_frags']
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': 0,
|
||||
'frag_index': 0,
|
||||
'frag_count': total_frags,
|
||||
'filename': ctx['filename'],
|
||||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
start = time.time()
|
||||
ctx['started'] = start
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
frag_total_bytes = s.get('total_bytes', 0)
|
||||
if s['status'] == 'finished':
|
||||
state['downloaded_bytes'] += frag_total_bytes
|
||||
state['frag_index'] += 1
|
||||
|
||||
estimated_size = (
|
||||
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
time_now = time.time()
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
state['elapsed'] = time_now - start
|
||||
|
||||
if s['status'] == 'finished':
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||
frag_total_bytes)
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
progress += frag_progress / float(total_frags)
|
||||
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||
state['speed'] = s.get('speed')
|
||||
self._hook_progress(state)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return start
|
||||
|
||||
def _finish_frag_download(self, ctx):
|
||||
ctx['dest_stream'].close()
|
||||
elapsed = time.time() - ctx['started']
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
fsize = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': ctx['filename'],
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
@@ -4,12 +4,11 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .fragment import FragmentFD
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
@@ -33,6 +32,8 @@ class HlsFD(FileDownloader):
|
||||
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
args.append(encodeFilename(tmpfilename, True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
@@ -51,54 +52,50 @@ class HlsFD(FileDownloader):
|
||||
return False
|
||||
|
||||
|
||||
class NativeHlsFD(FileDownloader):
|
||||
class NativeHlsFD(FragmentFD):
|
||||
""" A more limited implementation that does not require ffmpeg """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
self.to_screen(
|
||||
'[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
|
||||
data = self.ydl.urlopen(url).read()
|
||||
s = data.decode('utf-8', 'ignore')
|
||||
segment_urls = []
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
|
||||
s = manifest.decode('utf-8', 'ignore')
|
||||
fragment_urls = []
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
segment_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(url, line))
|
||||
segment_urls.append(segment_url)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||
byte_counter = 0
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
for i, segurl in enumerate(segment_urls):
|
||||
self.to_screen(
|
||||
'[hlsnative] %s: Downloading segment %d / %d' %
|
||||
(info_dict['id'], i + 1, len(segment_urls)))
|
||||
seg_req = compat_urllib_request.Request(segurl)
|
||||
if remaining_bytes is not None:
|
||||
seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||
|
||||
segment = self.ydl.urlopen(seg_req).read()
|
||||
if remaining_bytes is not None:
|
||||
segment = segment[:remaining_bytes]
|
||||
remaining_bytes -= len(segment)
|
||||
outf.write(segment)
|
||||
byte_counter += len(segment)
|
||||
if remaining_bytes is not None and remaining_bytes <= 0:
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
fragment_urls.append(segment_url)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
break
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
self.try_rename(tmpfilename, filename)
|
||||
'total_frags': len(fragment_urls),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
frags_filenames = []
|
||||
for i, frag_url in enumerate(fragment_urls):
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
ctx['dest_stream'].write(down.read())
|
||||
frags_filenames.append(frag_filename)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
|
||||
return True
|
||||
|
@@ -4,6 +4,7 @@ import errno
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
@@ -57,6 +58,24 @@ class HttpFD(FileDownloader):
|
||||
# Establish connection
|
||||
try:
|
||||
data = self.ydl.urlopen(request)
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
# set in response despite of requested Range (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||
if resume_len > 0:
|
||||
content_range = data.headers.get('Content-Range')
|
||||
if content_range:
|
||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||
# Content-Range is present and matches requested Range, resume is possible
|
||||
if content_range_m and resume_len == int(content_range_m.group(1)):
|
||||
break
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
self.report_unable_to_resume()
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
|
@@ -4,7 +4,10 @@ from .abc import ABCIE
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .adobetv import AdobeTVIE
|
||||
from .adobetv import (
|
||||
AdobeTVIE,
|
||||
AdobeTVVideoIE,
|
||||
)
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aftenposten import AftenpostenIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
@@ -16,9 +19,14 @@ from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import ARDIE, ARDMediathekIE
|
||||
from .ard import (
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
SportschauIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTvIE,
|
||||
ArteTVPlus7IE,
|
||||
@@ -35,7 +43,10 @@ from .azubu import AzubuIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .beatportpro import BeatportProIE
|
||||
@@ -103,9 +114,11 @@ from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionUserIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import DCNIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
@@ -140,12 +153,12 @@ from .ellentv import (
|
||||
)
|
||||
from .elpais import ElPaisIE
|
||||
from .embedly import EmbedlyIE
|
||||
from .empflix import EMPFlixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .escapist import EscapistIE
|
||||
from .espn import ESPNIE
|
||||
from .esri import EsriVideoIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
@@ -230,11 +243,16 @@ from .imdb import (
|
||||
)
|
||||
from .imgur import ImgurIE
|
||||
from .ina import InaIE
|
||||
from .indavideo import (
|
||||
IndavideoIE,
|
||||
IndavideoEmbedIE,
|
||||
)
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@@ -257,8 +275,17 @@ from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
KuwoAlbumIE,
|
||||
KuwoChartIE,
|
||||
KuwoSingerIE,
|
||||
KuwoCategoryIE,
|
||||
KuwoMvIE,
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .letv import (
|
||||
LetvIE,
|
||||
LetvTvIE,
|
||||
@@ -318,8 +345,10 @@ from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .mwave import MwaveIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import NationalGeographicIE
|
||||
@@ -330,6 +359,7 @@ from .nbc import (
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
MSNBCIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
@@ -339,6 +369,15 @@ from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nerdist import NerdistIE
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicIE,
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicSingerIE,
|
||||
NetEaseMusicListIE,
|
||||
NetEaseMusicMvIE,
|
||||
NetEaseMusicProgramIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
)
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
@@ -368,7 +407,8 @@ from .npo import (
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
TegenlichtVproIE,
|
||||
VPROIE,
|
||||
WNLIE
|
||||
)
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
@@ -384,6 +424,7 @@ from .nytimes import (
|
||||
from .nuvid import NuvidIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
OoyalaIE,
|
||||
OoyalaExternalIE,
|
||||
@@ -398,15 +439,25 @@ from .orf import (
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .periscope import (
|
||||
PeriscopeIE,
|
||||
QuickscopeIE,
|
||||
)
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
from .pluralsight import (
|
||||
PluralsightIE,
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornhd import PornHdIE
|
||||
@@ -427,6 +478,7 @@ from .qqmusic import (
|
||||
QQMusicSingerIE,
|
||||
QQMusicAlbumIE,
|
||||
QQMusicToplistIE,
|
||||
QQMusicPlaylistIE,
|
||||
)
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
@@ -436,6 +488,7 @@ from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import RaiIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .restudy import RestudyIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
@@ -450,6 +503,7 @@ from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@@ -476,6 +530,7 @@ from .senateisvp import SenateISVPIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
@@ -487,6 +542,10 @@ from .smotri import (
|
||||
SmotriUserIE,
|
||||
SmotriBroadcastIE,
|
||||
)
|
||||
from .snagfilms import (
|
||||
SnagFilmsIE,
|
||||
SnagFilmsEmbedIE,
|
||||
)
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .soompi import (
|
||||
@@ -551,6 +610,7 @@ from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .tenplay import TenPlayIE
|
||||
@@ -558,8 +618,12 @@ from .testurl import TestURLIE
|
||||
from .testtube import TestTubeIE
|
||||
from .tf1 import TF1IE
|
||||
from .theonion import TheOnionIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .theplatform import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
@@ -567,7 +631,11 @@ from .tmz import (
|
||||
TMZIE,
|
||||
TMZArticleIE,
|
||||
)
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .tnaflix import (
|
||||
TNAFlixIE,
|
||||
EMPFlixIE,
|
||||
MovieFapIE,
|
||||
)
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
THVideoPlaylistIE
|
||||
@@ -611,6 +679,7 @@ from .twitch import (
|
||||
TwitchBookmarksIE,
|
||||
TwitchStreamIE,
|
||||
)
|
||||
from .twitter import TwitterCardIE
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
@@ -636,7 +705,6 @@ from .vgtv import (
|
||||
from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
@@ -687,7 +755,10 @@ from .wdr import (
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .webofstories import WebOfStoriesIE
|
||||
from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
)
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
@@ -696,7 +767,10 @@ from .wrzuta import WrzutaIE
|
||||
from .wsj import WSJIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
)
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xstream import XstreamIE
|
||||
@@ -715,6 +789,7 @@ from .yandexmusic import (
|
||||
YandexMusicPlaylistIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .yinyuetai import YinYueTaiIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
|
@@ -5,6 +5,8 @@ from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
float_or_none,
|
||||
ISO639Utils,
|
||||
)
|
||||
|
||||
|
||||
@@ -69,3 +71,61 @@ class AdobeTVIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class AdobeTVVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
|
||||
'url': 'https://video.tv.adobe.com/v/2456/',
|
||||
'md5': '43662b577c018ad707a63766462b1e87',
|
||||
'info_dict': {
|
||||
'id': '2456',
|
||||
'ext': 'mp4',
|
||||
'title': 'New experience with Acrobat DC',
|
||||
'description': 'New experience with Acrobat DC',
|
||||
'duration': 248.667,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_params = self._parse_json(self._search_regex(
|
||||
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
|
||||
video_id)
|
||||
|
||||
formats = [{
|
||||
'url': source['src'],
|
||||
'width': source.get('width'),
|
||||
'height': source.get('height'),
|
||||
'tbr': source.get('bitrate'),
|
||||
} for source in player_params['sources']]
|
||||
|
||||
# For both metadata and downloaded files the duration varies among
|
||||
# formats. I just pick the max one
|
||||
duration = max(filter(None, [
|
||||
float_or_none(source.get('duration'), scale=1000)
|
||||
for source in player_params['sources']]))
|
||||
|
||||
subtitles = {}
|
||||
for translation in player_params.get('translations', []):
|
||||
lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
|
||||
if lang_id not in subtitles:
|
||||
subtitles[lang_id] = []
|
||||
subtitles[lang_id].append({
|
||||
'url': translation['vttPath'],
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': player_params['title'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
50
youtube_dl/extractor/appleconnect.py
Normal file
50
youtube_dl/extractor/appleconnect.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': '10d0f2799111df4cb1c924520ca78f98',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'ext': 'm4v',
|
||||
'title': 'Energy',
|
||||
'uploader': 'Drake',
|
||||
'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
try:
|
||||
video_json = self._html_search_regex(
|
||||
r'class="auc-video-data">(\{.*?\})', webpage, 'json')
|
||||
except ExtractorError:
|
||||
raise ExtractorError('This post doesn\'t contain a video', expected=True)
|
||||
|
||||
video_data = self._parse_json(video_json, video_id)
|
||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_data['sslSrc'],
|
||||
'title': video_data['title'],
|
||||
'description': video_data['description'],
|
||||
'uploader': video_data['artistName'],
|
||||
'thumbnail': video_data['artworkUrl'],
|
||||
'timestamp': timestamp,
|
||||
'like_count': like_count,
|
||||
}
|
@@ -8,6 +8,7 @@ from .generic import GenericIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
qualities,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@@ -22,19 +23,125 @@ class ARDMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
'info_dict': {
|
||||
'id': '29582122',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ich liebe das Leben trotzdem',
|
||||
'description': 'md5:45e4c225c72b27993314b31a84a5261c',
|
||||
'duration': 4557,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
'info_dict': {
|
||||
'id': '29522730',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
|
||||
'info_dict': {
|
||||
'id': '28488308',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tod eines Fußballers',
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
|
||||
'info_dict': {
|
||||
'id': '22490580',
|
||||
'ext': 'mp4',
|
||||
'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
|
||||
'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
|
||||
},
|
||||
'skip': 'Blocked outside of Germany',
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
media_info = self._download_json(
|
||||
media_info_url, video_id, 'Downloading media JSON')
|
||||
|
||||
formats = self._extract_formats(media_info, video_id)
|
||||
|
||||
if not formats:
|
||||
if '"fsk"' in webpage:
|
||||
raise ExtractorError(
|
||||
'This video is only available after 20:00', expected=True)
|
||||
elif media_info.get('_geoblocked'):
|
||||
raise ExtractorError('This video is not available due to geo restriction', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(media_info.get('_duration'))
|
||||
thumbnail = media_info.get('_previewImage')
|
||||
|
||||
subtitles = {}
|
||||
subtitle_url = media_info.get('_subtitleUrl')
|
||||
if subtitle_url:
|
||||
subtitles['de'] = [{
|
||||
'ext': 'srt',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
type_ = media_info.get('_type')
|
||||
media_array = media_info.get('_mediaArray', [])
|
||||
formats = []
|
||||
for num, media in enumerate(media_array):
|
||||
for stream in media.get('_mediaStreamArray', []):
|
||||
stream_urls = stream.get('_stream')
|
||||
if not stream_urls:
|
||||
continue
|
||||
if not isinstance(stream_urls, list):
|
||||
stream_urls = [stream_urls]
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
video_id, preference=-1, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls'))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
}
|
||||
elif stream_url.startswith('http'):
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
}
|
||||
else:
|
||||
continue
|
||||
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
if type_ == 'audio':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
# determine video id from url
|
||||
m = re.match(self._VALID_URL, url)
|
||||
@@ -92,46 +199,22 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'format_id': fid,
|
||||
'url': furl,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info = {
|
||||
'formats': formats,
|
||||
}
|
||||
else: # request JSON file
|
||||
media_info = self._download_json(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
|
||||
# The second element of the _mediaArray contains the standard http urls
|
||||
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
|
||||
if not streams:
|
||||
if '"fsk"' in webpage:
|
||||
raise ExtractorError('This video is only available after 20:00')
|
||||
info = self._extract_media_info(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
|
||||
|
||||
formats = []
|
||||
for s in streams:
|
||||
if type(s['_stream']) == list:
|
||||
for index, url in enumerate(s['_stream'][::-1]):
|
||||
quality = s['_quality'] + index
|
||||
formats.append({
|
||||
'quality': quality,
|
||||
'url': url,
|
||||
'format_id': '%s-%s' % (determine_ext(url), quality)
|
||||
})
|
||||
continue
|
||||
|
||||
format = {
|
||||
'quality': s['_quality'],
|
||||
'url': s['_stream'],
|
||||
}
|
||||
|
||||
format['format_id'] = '%s-%s' % (
|
||||
determine_ext(format['url']), format['quality'])
|
||||
|
||||
formats.append(format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
@@ -189,3 +272,41 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class SportschauIE(ARDMediathekIE):
|
||||
IE_NAME = 'Sportschau'
|
||||
_VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html',
|
||||
'info_dict': {
|
||||
'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100',
|
||||
'ext': 'mp4',
|
||||
'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
base_url = mobj.group('baseurl')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = get_element_by_attribute('class', 'headline', webpage)
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
info = self._extract_media_info(
|
||||
base_url + '-mc_defaultQuality-h.json', webpage, video_id)
|
||||
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
|
||||
return info
|
||||
|
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class BaiduVideoIE(InfoExtractor):
|
||||
IE_DESC = '百度视频'
|
||||
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
|
||||
|
780
youtube_dl/extractor/bbc.py
Normal file
780
youtube_dl/extractor/bbc.py
Normal file
@@ -0,0 +1,780 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
]
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
'id': 'b04v209v',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, The Essential New Tune Special',
|
||||
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
'id': 'p02n76xf',
|
||||
'ext': 'flv',
|
||||
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
||||
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||
'info_dict': {
|
||||
'id': 'b05zmgw1',
|
||||
'ext': 'flv',
|
||||
'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
|
||||
'title': 'Royal Academy Summer Exhibition',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Skip DASH until supported
|
||||
elif transfer_format == 'dash':
|
||||
pass
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int_or_none(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int_or_none(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _raise_extractor_error(self, media_selection_error):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
|
||||
expected=True)
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
last_exception = None
|
||||
for mediaselector_url in self._MEDIASELECTOR_URLS:
|
||||
try:
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id == 'notukerror':
|
||||
last_exception = e
|
||||
continue
|
||||
self._raise_extractor_error(e)
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
def _process_media_selector(self, media_selection, programme_id):
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
return formats, subtitles
|
||||
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
|
||||
version = playlist.get('defaultAvailableVersion')
|
||||
if version:
|
||||
smp_config = version['smpConfig']
|
||||
title = smp_config['title']
|
||||
description = smp_config['summary']
|
||||
for item in smp_config['items']:
|
||||
kind = item['kind']
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
programme_id = item.get('vpid')
|
||||
duration = int_or_none(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
return self._process_legacy_playlist(playlist_id)
|
||||
|
||||
def _process_legacy_playlist_url(self, url, display_id):
|
||||
playlist = self._download_legacy_playlist_url(url, display_id)
|
||||
return self._extract_from_legacy_playlist(playlist, display_id)
|
||||
|
||||
def _process_legacy_playlist(self, playlist_id):
|
||||
return self._process_legacy_playlist_url(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
|
||||
|
||||
def _download_legacy_playlist_url(self, url, playlist_id=None):
|
||||
return self._download_xml(
|
||||
url, playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
|
||||
def get_programme_id(item):
|
||||
def get_from_attributes(item):
|
||||
for p in('identifier', 'group'):
|
||||
value = item.get(p)
|
||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||
return value
|
||||
get_from_attributes(item)
|
||||
mediator = item.find('./{http://bbc.co.uk/2008/emp/playlist}mediator')
|
||||
if mediator is not None:
|
||||
return get_from_attributes(mediator)
|
||||
|
||||
programme_id = get_programme_id(item)
|
||||
duration = int_or_none(item.get('duration'))
|
||||
# TODO: programme_id can be None and media items can be incorporated right inside
|
||||
# playlist's item (e.g. http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
# as f4m and m3u8
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = None
|
||||
|
||||
tviplayer = self._search_regex(
|
||||
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||
webpage, 'player', default=None)
|
||||
|
||||
if tviplayer:
|
||||
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
programme_id = player.get('vpid')
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._search_regex(
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BBCIE(BBCCoUkIE):
|
||||
IE_NAME = 'bbc'
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
||||
# notukerror for non UK (?) users (e.g.
|
||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
|
||||
# Provides fewer formats, but works everywhere for everybody (hopefully)
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
# article with multiple videos embedded with data-media-meta containing
|
||||
# playlist.sxml, externalId and no direct video links
|
||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# article with multiple videos embedded with data-media-meta (more videos)
|
||||
'url': 'http://www.bbc.com/news/business-28299555',
|
||||
'info_dict': {
|
||||
'id': 'business-28299555',
|
||||
'title': 'Farnborough Airshow: Video highlights',
|
||||
'description': 'BBC reports and video highlights at the Farnborough Airshow.',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'skip': 'Save time',
|
||||
}, {
|
||||
# article with multiple videos embedded with `new SMP()`
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
|
||||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BBC Blogs - Adam Curtis - BUGGER',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
# single video embedded with mediaAssetPage.init()
|
||||
'url': 'http://www.bbc.com/news/world-europe-32041533',
|
||||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with single video embedded with data-media-meta containing
|
||||
# direct video links (for now these are extracted) and playlist.xml (with
|
||||
# media items as f4m and m3u8 - currently unsupported)
|
||||
'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
|
||||
'info_dict': {
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'duration': 47,
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video embedded with mediaAssetPage.init() (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'duration': 87,
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||
'info_dict': {
|
||||
'id': 'p02w6qjc',
|
||||
'ext': 'mp4',
|
||||
'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
|
||||
'duration': 56,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
'info_dict': {
|
||||
'id': 'p018zqqg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyundai Santa Fe Sport: Rock star',
|
||||
'description': 'md5:b042a26142c4154a6e472933cf20793d',
|
||||
'timestamp': 1368473503,
|
||||
'upload_date': '20130513',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video with playlist.sxml URL
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# custom redirection to www.bbc.com
|
||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BBCCoUkIE.suitable(url) else super(BBCIE, cls).suitable(url)
|
||||
|
||||
def _extract_from_media_meta(self, media_meta, video_id):
|
||||
# Direct links to media in media metadata (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
# TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
|
||||
source_files = media_meta.get('sourceFiles')
|
||||
if source_files:
|
||||
return [{
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'ext': f.get('encoding'),
|
||||
'tbr': float_or_none(f.get('bitrate'), 1000),
|
||||
'filesize': int_or_none(f.get('filesize')),
|
||||
} for format_id, f in source_files.items() if f.get('url')], []
|
||||
|
||||
programme_id = media_meta.get('externalId')
|
||||
if programme_id:
|
||||
return self._download_media_selector(programme_id)
|
||||
|
||||
# Process playlist.sxml as legacy playlist
|
||||
href = media_meta.get('href')
|
||||
if href:
|
||||
playlist = self._download_legacy_playlist_url(href)
|
||||
_, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
|
||||
return formats, subtitles
|
||||
|
||||
return [], []
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
[r'"datePublished":\s*"([^"]+)',
|
||||
r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
|
||||
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'],
|
||||
webpage, 'date', default=None))
|
||||
|
||||
# single video with playlist.sxml URL (e.g. http://www.bbc.com/sport/0/football/3365340ng)
|
||||
playlist = self._search_regex(
|
||||
r'<param[^>]+name="playlist"[^>]+value="([^"]+)"',
|
||||
webpage, 'playlist', default=None)
|
||||
if playlist:
|
||||
programme_id, title, description, duration, formats, subtitles = \
|
||||
self._process_legacy_playlist_url(playlist, playlist_id)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-video-player-vpid="([\da-z]{8})"',
|
||||
r'<param[^>]+name="externalIdentifier"[^>]+value="([\da-z]{8})"'],
|
||||
webpage, 'vpid', default=None)
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
# digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
|
||||
digital_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
|
||||
programme_id, fatal=False)
|
||||
page_info = digital_data.get('page', {}).get('pageInfo', {})
|
||||
title = page_info.get('pageName') or self._og_search_title(webpage)
|
||||
description = page_info.get('description') or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+[\da-z]{8}(?:\b[^"]+)?'
|
||||
entries = []
|
||||
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
|
||||
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
|
||||
if embed_url and re.match(EMBED_URL, embed_url):
|
||||
entries.append(embed_url)
|
||||
entries.extend(re.findall(
|
||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
||||
playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||
medias = extract_all(r"data-media-meta='({[^']+})'")
|
||||
|
||||
if not medias:
|
||||
# Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
|
||||
media_asset = self._search_regex(
|
||||
r'mediaAssetPage\.init\(\s*({.+?}), "/',
|
||||
webpage, 'media asset', default=None)
|
||||
if media_asset:
|
||||
media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
|
||||
medias = []
|
||||
for video in media_asset_page.get('videos', {}).values():
|
||||
medias.extend(video.values())
|
||||
|
||||
if not medias:
|
||||
# Multiple video playlist with single `now playing` entry (e.g.
|
||||
# http://www.bbc.com/news/video_and_audio/must_see/33767813)
|
||||
vxp_playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
|
||||
webpage, 'playlist data'),
|
||||
playlist_id)
|
||||
playlist_medias = []
|
||||
for item in vxp_playlist:
|
||||
media = item.get('media')
|
||||
if not media:
|
||||
continue
|
||||
playlist_medias.append(media)
|
||||
# Download single video if found media with asset id matching the video id from URL
|
||||
if item.get('advert', {}).get('assetId') == playlist_id:
|
||||
medias = [media]
|
||||
break
|
||||
# Fallback to the whole playlist
|
||||
if not medias:
|
||||
medias = playlist_medias
|
||||
|
||||
entries = []
|
||||
for num, media_meta in enumerate(medias, start=1):
|
||||
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = media_meta.get('externalId')
|
||||
if not video_id:
|
||||
video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
|
||||
|
||||
title = media_meta.get('caption')
|
||||
if not title:
|
||||
title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
|
||||
|
||||
duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
|
||||
|
||||
images = []
|
||||
for image in media_meta.get('images', {}).values():
|
||||
images.extend(image.values())
|
||||
if 'image' in media_meta:
|
||||
images.append(media_meta['image'])
|
||||
|
||||
thumbnails = [{
|
||||
'url': image.get('href'),
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in images]
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
@@ -1,380 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Man in Black: Series 3: The Printed Name',
|
||||
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Episode is no longer available on BBC iPlayer Radio',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
|
||||
'info_dict': {
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
'id': 'b04v209v',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, The Essential New Tune Special',
|
||||
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
'id': 'p02n76xf',
|
||||
'ext': 'flv',
|
||||
'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
|
||||
'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
|
||||
'duration': 3540,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_connection(self, connection, programme_id):
|
||||
formats = []
|
||||
protocol = connection.get('protocol')
|
||||
supplier = connection.get('supplier')
|
||||
if protocol == 'http':
|
||||
href = connection.get('href')
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, supplier),
|
||||
})
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
'format_id': supplier,
|
||||
})
|
||||
elif protocol == 'rtmp':
|
||||
application = connection.get('application', 'ondemand')
|
||||
auth_string = connection.get('authString')
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
formats.append({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
'ext': 'flv',
|
||||
'format_id': supplier,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
||||
|
||||
def _extract_video(self, media, programme_id):
|
||||
formats = []
|
||||
vbr = int(media.get('bitrate'))
|
||||
vcodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int(media.get('width'))
|
||||
height = int(media.get('height'))
|
||||
file_size = int(media.get('media_file_size'))
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _extract_audio(self, media, programme_id):
|
||||
formats = []
|
||||
abr = int(media.get('bitrate'))
|
||||
acodec = media.get('encoding')
|
||||
service = media.get('service')
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
format.update({
|
||||
'format_id': '%s_%s' % (service, format['format_id']),
|
||||
'abr': abr,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.extend(conn_formats)
|
||||
return formats
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
for connection in self._extract_connections(media):
|
||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||
srt = ''
|
||||
|
||||
def _extract_text(p):
|
||||
if p.text is not None:
|
||||
stripped_text = p.text.strip()
|
||||
if stripped_text:
|
||||
return stripped_text
|
||||
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
|
||||
for pos, p in enumerate(ps):
|
||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
|
||||
subtitles[lang] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
{
|
||||
'data': srt,
|
||||
'ext': 'srt',
|
||||
},
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
|
||||
version = playlist.get('defaultAvailableVersion')
|
||||
if version:
|
||||
smp_config = version['smpConfig']
|
||||
title = smp_config['title']
|
||||
description = smp_config['summary']
|
||||
for item in smp_config['items']:
|
||||
kind = item['kind']
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
programme_id = item.get('vpid')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
playlist = self._download_xml(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
|
||||
playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = None
|
||||
|
||||
tviplayer = self._search_regex(
|
||||
r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
|
||||
webpage, 'player', default=None)
|
||||
|
||||
if tviplayer:
|
||||
player = self._parse_json(tviplayer, group_id).get('player', {})
|
||||
duration = int_or_none(player.get('duration'))
|
||||
programme_id = player.get('vpid')
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._search_regex(
|
||||
r'<p class="medium-description">([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
else:
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -1,7 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -57,7 +57,7 @@ class BetIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
media_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
||||
webpage, 'media URL'))
|
||||
|
||||
|
@@ -41,8 +41,15 @@ class BiliBiliIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
|
||||
raise ExtractorError('The video does not exist or was deleted', expected=True)
|
||||
if '(此视频不存在或被删除)' in webpage:
|
||||
raise ExtractorError(
|
||||
'The video does not exist or was deleted', expected=True)
|
||||
|
||||
if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage:
|
||||
raise ExtractorError(
|
||||
'The video is not available in your region due to copyright reasons',
|
||||
expected=True)
|
||||
|
||||
video_code = self._search_regex(
|
||||
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
||||
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -14,6 +13,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,10 +24,10 @@ class BlipTVIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||
'md5': '80baf1ec5c3d2019037c1c707d676b9f',
|
||||
'info_dict': {
|
||||
'id': '5779306',
|
||||
'ext': 'mov',
|
||||
'ext': 'm4v',
|
||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||
'timestamp': 1323138843,
|
||||
@@ -100,6 +101,20 @@ class BlipTVIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
}
|
||||
},
|
||||
{
|
||||
# missing duration
|
||||
'url': 'http://blip.tv/rss/flash/6700880',
|
||||
'info_dict': {
|
||||
'id': '6684191',
|
||||
'ext': 'm4v',
|
||||
'title': 'Cowboy Bebop: Gateway Shuffle Review',
|
||||
'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
|
||||
'timestamp': 1386639757,
|
||||
'upload_date': '20131210',
|
||||
'uploader': 'sfdebris',
|
||||
'uploader_id': '706520',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
@@ -128,35 +143,34 @@ class BlipTVIE(InfoExtractor):
|
||||
|
||||
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
||||
|
||||
def blip(s):
|
||||
return '{http://blip.tv/dtd/blip/1.0}%s' % s
|
||||
|
||||
def media(s):
|
||||
return '{http://search.yahoo.com/mrss/}%s' % s
|
||||
|
||||
def itunes(s):
|
||||
return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s
|
||||
def _x(p):
|
||||
return xpath_with_ns(p, {
|
||||
'blip': 'http://blip.tv/dtd/blip/1.0',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||
})
|
||||
|
||||
item = rss.find('channel/item')
|
||||
|
||||
video_id = item.find(blip('item_id')).text
|
||||
title = item.find('./title').text
|
||||
description = clean_html(compat_str(item.find(blip('puredescription')).text))
|
||||
timestamp = parse_iso8601(item.find(blip('datestamp')).text)
|
||||
uploader = item.find(blip('user')).text
|
||||
uploader_id = item.find(blip('userid')).text
|
||||
duration = int(item.find(blip('runtime')).text)
|
||||
media_thumbnail = item.find(media('thumbnail'))
|
||||
thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text
|
||||
categories = [category.text for category in item.findall('category')]
|
||||
video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
|
||||
title = xpath_text(item, 'title', 'title', fatal=True)
|
||||
description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
|
||||
timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
|
||||
uploader = xpath_text(item, _x('blip:user'), 'uploader')
|
||||
uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
|
||||
duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
|
||||
media_thumbnail = item.find(_x('media:thumbnail'))
|
||||
thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
|
||||
else xpath_text(item, 'image', 'thumbnail'))
|
||||
categories = [category.text for category in item.findall('category') if category is not None]
|
||||
|
||||
formats = []
|
||||
subtitles_urls = {}
|
||||
|
||||
media_group = item.find(media('group'))
|
||||
for media_content in media_group.findall(media('content')):
|
||||
media_group = item.find(_x('media:group'))
|
||||
for media_content in media_group.findall(_x('media:content')):
|
||||
url = media_content.get('url')
|
||||
role = media_content.get(blip('role'))
|
||||
role = media_content.get(_x('blip:role'))
|
||||
msg = self._download_webpage(
|
||||
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
||||
video_id, 'Resolving URL for %s' % role)
|
||||
@@ -175,8 +189,8 @@ class BlipTVIE(InfoExtractor):
|
||||
'url': real_url,
|
||||
'format_id': role,
|
||||
'format_note': media_type,
|
||||
'vcodec': media_content.get(blip('vcodec')) or 'none',
|
||||
'acodec': media_content.get(blip('acodec')),
|
||||
'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
|
||||
'acodec': media_content.get(_x('blip:acodec')),
|
||||
'filesize': media_content.get('filesize'),
|
||||
'width': int_or_none(media_content.get('width')),
|
||||
'height': int_or_none(media_content.get('height')),
|
||||
|
@@ -18,6 +18,7 @@ class BreakIE(InfoExtractor):
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
'title': 'When Girls Act Like D-Bags',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
||||
|
@@ -13,6 +13,7 @@ from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -119,7 +120,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
|
||||
try:
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||
except xml.etree.ElementTree.ParseError:
|
||||
except compat_xml_parse_error:
|
||||
return
|
||||
|
||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||
|
@@ -106,15 +106,11 @@ class CanalplusIE(InfoExtractor):
|
||||
continue
|
||||
format_id = fmt.tag
|
||||
if format_id == 'HLS':
|
||||
hls_formats = self._extract_m3u8_formats(format_url, video_id, 'flv')
|
||||
for fmt in hls_formats:
|
||||
fmt['preference'] = preference(format_id)
|
||||
formats.extend(hls_formats)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', preference=preference(format_id)))
|
||||
elif format_id == 'HDS':
|
||||
hds_formats = self._extract_f4m_formats(format_url + '?hdcore=2.11.3', video_id)
|
||||
for fmt in hds_formats:
|
||||
fmt['preference'] = preference(format_id)
|
||||
formats.extend(hds_formats)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=2.11.3', video_id, preference=preference(format_id)))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -88,7 +89,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
|
||||
req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_json(req, video_id)
|
||||
|
@@ -1,53 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class ClipfishIE(InfoExtractor):
|
||||
IE_NAME = 'clipfish'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
'md5': '2521cd644e862936cf2e698206e47385',
|
||||
'md5': '79bc922f3e8a9097b3d68a93780fd475',
|
||||
'info_dict': {
|
||||
'id': '3966754',
|
||||
'ext': 'mp4',
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'timestamp': 1370938118,
|
||||
'upload_date': '20130611',
|
||||
'duration': 82,
|
||||
},
|
||||
'skip': 'Blocked in the US'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||
(video_id, int(time.time())))
|
||||
doc = self._download_xml(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
title = doc.find('title').text
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||
raise ExtractorError('Cannot find video URL in document %r' %
|
||||
xml_bytes)
|
||||
thumbnail = doc.find('imageurl').text
|
||||
duration = parse_duration(doc.find('duration').text)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = self._parse_json(
|
||||
js_to_json(self._html_search_regex(
|
||||
'(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.append({
|
||||
'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - Video')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(video_info.get('length'))
|
||||
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
@@ -10,9 +8,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class ClipsyndicateIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
_VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
|
||||
'info_dict': {
|
||||
@@ -22,11 +20,13 @@ class ClipsyndicateIE(InfoExtractor):
|
||||
'duration': 612,
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
js_player = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||
video_id, 'Downlaoding player')
|
||||
|
@@ -36,7 +36,7 @@ class ComCarCoffIE(InfoExtractor):
|
||||
webpage, 'full data json'))
|
||||
|
||||
video_id = full_data['activeVideo']['video']
|
||||
video_data = full_data['videos'][video_id]
|
||||
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
||||
thumbnails = [{
|
||||
'url': video_data['images']['thumb'],
|
||||
}, {
|
||||
|
@@ -14,26 +14,35 @@ import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_cookies,
|
||||
compat_getpass,
|
||||
compat_HTTPError,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
age_restricted,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
_NO_DEFAULT = object()
|
||||
|
||||
|
||||
class InfoExtractor(object):
|
||||
@@ -63,7 +72,7 @@ class InfoExtractor(object):
|
||||
|
||||
Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* ext Will be calculated from url if missing
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
Calculated from the format_id, width, height.
|
||||
@@ -153,7 +162,7 @@ class InfoExtractor(object):
|
||||
lower to higher preference, each element is a dictionary
|
||||
with the "ext" entry and one of:
|
||||
* "data": The subtitles file contents
|
||||
* "url": A url pointing to the subtitles file
|
||||
* "url": A URL pointing to the subtitles file
|
||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||
automatically generated captions
|
||||
duration: Length of the video in seconds, as an integer.
|
||||
@@ -174,13 +183,18 @@ class InfoExtractor(object):
|
||||
Set to "root" to indicate that this is a
|
||||
comment to the original video.
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
webpage_url: The URL to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
by YoutubeDL if it's missing)
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
|
||||
is_live: True, False, or None (=unknown). Whether this video is a
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
start_time: Time in seconds where the reproduction should start, as
|
||||
specified in the URL.
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
specified in the URL.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@@ -191,8 +205,8 @@ class InfoExtractor(object):
|
||||
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "title" and "id" attributes with the same
|
||||
semantics as videos (see above).
|
||||
Additionally, playlists can have "title", "description" and "id" attributes
|
||||
with the same semantics as videos (see above).
|
||||
|
||||
|
||||
_type "multi_video" indicates that there are multiple videos that
|
||||
@@ -499,7 +513,7 @@ class InfoExtractor(object):
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
"""Returns a URL that points to a page that should be processed"""
|
||||
# TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
'url': url,
|
||||
@@ -523,7 +537,7 @@ class InfoExtractor(object):
|
||||
video_info['description'] = playlist_description
|
||||
return video_info
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Perform a regex search on the given string, using a single or a list of
|
||||
patterns returning the first matching group.
|
||||
@@ -549,7 +563,7 @@ class InfoExtractor(object):
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
else:
|
||||
return mobj.group(group)
|
||||
elif default is not _NO_DEFAULT:
|
||||
elif default is not NO_DEFAULT:
|
||||
return default
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||
@@ -557,7 +571,7 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||
"""
|
||||
@@ -597,7 +611,7 @@ class InfoExtractor(object):
|
||||
|
||||
return (username, password)
|
||||
|
||||
def _get_tfa_info(self):
|
||||
def _get_tfa_info(self, note='two-factor verification code'):
|
||||
"""
|
||||
Get the two-factor authentication info
|
||||
TODO - asking the user will be required for sms/phone verify
|
||||
@@ -611,7 +625,7 @@ class InfoExtractor(object):
|
||||
if downloader_params.get('twofactor', None) is not None:
|
||||
return downloader_params['twofactor']
|
||||
|
||||
return None
|
||||
return compat_getpass('Type %s and press [Return]: ' % note)
|
||||
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
@@ -624,6 +638,12 @@ class InfoExtractor(object):
|
||||
template % (content_re, property_re),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _meta_regex(prop):
|
||||
return r'''(?isx)<meta
|
||||
(?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
if name is None:
|
||||
name = 'OpenGraph %s' % prop
|
||||
@@ -633,7 +653,7 @@ class InfoExtractor(object):
|
||||
return unescapeHTML(escaped)
|
||||
|
||||
def _og_search_thumbnail(self, html, **kargs):
|
||||
return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)
|
||||
return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
|
||||
|
||||
def _og_search_description(self, html, **kargs):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
@@ -654,9 +674,7 @@ class InfoExtractor(object):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?isx)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
|
||||
self._meta_regex(name),
|
||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
@@ -705,6 +723,27 @@ class InfoExtractor(object):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
hidden_inputs = {}
|
||||
for input in re.findall(r'<input([^>]+)>', html):
|
||||
if not re.search(r'type=(["\'])hidden\1', input):
|
||||
continue
|
||||
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
||||
if not name:
|
||||
continue
|
||||
value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
|
||||
if not value:
|
||||
continue
|
||||
hidden_inputs[name.group('value')] = value.group('value')
|
||||
return hidden_inputs
|
||||
|
||||
def _form_hidden_inputs(self, form_id, html):
|
||||
form = self._search_regex(
|
||||
r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
|
||||
html, '%s form' % form_id, group='form')
|
||||
return self._hidden_inputs(form)
|
||||
|
||||
def _sort_formats(self, formats, field_preference=None):
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found')
|
||||
@@ -815,10 +854,14 @@ class InfoExtractor(object):
|
||||
self.to_screen(msg)
|
||||
time.sleep(timeout)
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip()):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest')
|
||||
'Unable to download f4m manifest',
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
|
||||
transform_source=transform_source)
|
||||
|
||||
formats = []
|
||||
manifest_version = '1.0'
|
||||
@@ -828,8 +871,19 @@ class InfoExtractor(object):
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
|
||||
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
manifest_url = (
|
||||
media_url if media_url.startswith('http://') or media_url.startswith('https://')
|
||||
else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url))
|
||||
# If media_url is itself a f4m manifest do the recursive extraction
|
||||
# since bitrates in parent manifest (this one) and media_url manifest
|
||||
# may differ leading to inability to resolve the format by requested
|
||||
# bitrate in f4m downloader
|
||||
if determine_ext(manifest_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id))
|
||||
continue
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||
@@ -846,7 +900,8 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, note=None, errnote=None):
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True):
|
||||
|
||||
formats = [{
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||
@@ -866,7 +921,10 @@ class InfoExtractor(object):
|
||||
m3u8_doc = self._download_webpage(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
errnote=errnote or 'Failed to download m3u8 information')
|
||||
errnote=errnote or 'Failed to download m3u8 information',
|
||||
fatal=fatal)
|
||||
if m3u8_doc is False:
|
||||
return m3u8_doc
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
@@ -927,69 +985,221 @@ class InfoExtractor(object):
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
# TODO: improve extraction
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
||||
smil = self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
@staticmethod
|
||||
def _xpath_ns(path, namespace=None):
|
||||
if not namespace:
|
||||
return path
|
||||
out = []
|
||||
for c in path.split('/'):
|
||||
if not c or c == '.':
|
||||
out.append(c)
|
||||
else:
|
||||
out.append('{%s}%s' % (namespace, c))
|
||||
return '/'.join(out)
|
||||
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
|
||||
if smil is False:
|
||||
assert not fatal
|
||||
return []
|
||||
|
||||
base = smil.find('./head/meta').get('base')
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
return self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
|
||||
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
if smil is False:
|
||||
return {}
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
def _download_smil(self, smil_url, video_id, fatal=True):
|
||||
return self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
|
||||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
description = None
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
name = meta.attrib.get('name')
|
||||
content = meta.attrib.get('content')
|
||||
if not name or not content:
|
||||
continue
|
||||
if not title and name == 'title':
|
||||
title = content
|
||||
elif not description and name in ('description', 'abstract'):
|
||||
description = content
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title or video_id,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _parse_smil_namespace(self, smil):
|
||||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
if b:
|
||||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
if smil.findall('./body/seq/video'):
|
||||
video = smil.findall('./body/seq/video')[0]
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
else:
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
http_count = 0
|
||||
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
ext = video.get('ext')
|
||||
src_ext = determine_ext(src)
|
||||
streamer = video.get('streamer') or base
|
||||
|
||||
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
||||
rtmp_count += 1
|
||||
formats.append({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
if transform_rtmp_url:
|
||||
streamer, src = transform_rtmp_url(streamer, src)
|
||||
formats[-1].update({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
})
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls'))
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
f4m_url = src_url
|
||||
if not f4m_params:
|
||||
f4m_params = {
|
||||
'hdcore': '3.2.0',
|
||||
'plugin': 'flowplayer-3.2.0.1',
|
||||
}
|
||||
f4m_url += '&' if '?' in f4m_url else '?'
|
||||
f4m_url += compat_urllib_parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
|
||||
continue
|
||||
|
||||
if src_url.startswith('http'):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
'ext': ext or src_ext or 'flv',
|
||||
'format_id': 'http-%d' % (bitrate or http_count),
|
||||
'tbr': bitrate,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
return ([], rtmp_count)
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
if not proto:
|
||||
if base:
|
||||
if base.startswith('rtmp'):
|
||||
proto = 'rtmp'
|
||||
elif base.startswith('http'):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
return ([{
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
elif proto.startswith('http'):
|
||||
return ([{
|
||||
'url': base + src,
|
||||
'ext': ext or 'flv',
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
subtitles = {}
|
||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||
src = textstream.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = textstream.get('ext') or determine_ext(src)
|
||||
if not ext:
|
||||
type_ = textstream.get('type')
|
||||
SUBTITLES_TYPES = {
|
||||
'text/vtt': 'vtt',
|
||||
'text/srt': 'srt',
|
||||
'application/smptett+xml': 'tt',
|
||||
}
|
||||
if type_ in SUBTITLES_TYPES:
|
||||
ext = SUBTITLES_TYPES[type_]
|
||||
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': src,
|
||||
'ext': ext,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
||||
xspf = self._download_xml(
|
||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
||||
'Unable to download xspf manifest', fatal=fatal)
|
||||
if xspf is False:
|
||||
return []
|
||||
return self._parse_xspf(xspf, playlist_id)
|
||||
|
||||
def _parse_xspf(self, playlist, playlist_id):
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||
description = xpath_text(
|
||||
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||
thumbnail = xpath_text(
|
||||
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
})
|
||||
return entries
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
@@ -1025,6 +1235,12 @@ class InfoExtractor(object):
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
||||
req = compat_urllib_request.Request(url)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def get_testcases(self, include_onlymatching=False):
|
||||
t = getattr(self, '_TEST', None)
|
||||
if t:
|
||||
@@ -1063,6 +1279,23 @@ class InfoExtractor(object):
|
||||
def _get_subtitles(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
@staticmethod
|
||||
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
|
||||
""" Merge subtitle items for one language. Items with duplicated URLs
|
||||
will be dropped. """
|
||||
list1_urls = set([item['url'] for item in subtitle_list1])
|
||||
ret = list(subtitle_list1)
|
||||
ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
|
||||
""" Merge two subtitle dictionaries, language by language. """
|
||||
ret = dict(subtitle_dict1)
|
||||
for lang in subtitle_dict2:
|
||||
ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
|
||||
return ret
|
||||
|
||||
def extract_automatic_captions(self, *args, **kwargs):
|
||||
if (self._downloader.params.get('writeautomaticsub', False) or
|
||||
self._downloader.params.get('listsubtitles')):
|
||||
@@ -1076,7 +1309,7 @@ class InfoExtractor(object):
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
Base class for paged search queries extractors.
|
||||
They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||
They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||
Instances should define _SEARCH_KEY and _MAX_RESULTS.
|
||||
"""
|
||||
|
||||
|
@@ -12,12 +12,15 @@ from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@@ -27,7 +30,7 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
@@ -45,6 +48,22 @@ class CrunchyrollIE(InfoExtractor):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
||||
'info_dict': {
|
||||
'id': '589804',
|
||||
'ext': 'flv',
|
||||
'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
|
||||
'description': 'md5:fe2743efedb49d279552926d0bd0cd9e',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Danny Choo Network',
|
||||
'upload_date': '20120213',
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
@@ -238,7 +257,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
video_upload_date = unified_strdate(video_upload_date)
|
||||
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
|
||||
|
||||
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||
playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
|
||||
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
@@ -251,16 +270,31 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
||||
# urlencode doesn't work!
|
||||
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format
|
||||
streamdata_req = compat_urllib_request.Request(
|
||||
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
|
||||
% (stream_id, stream_format, stream_quality),
|
||||
compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8'))
|
||||
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
|
||||
streamdata = self._download_xml(
|
||||
streamdata_req, video_id,
|
||||
note='Downloading media info for %s' % video_format)
|
||||
video_url = streamdata.find('./host').text
|
||||
video_play_path = streamdata.find('./file').text
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
video_url = stream_info.find('./host').text
|
||||
video_play_path = stream_info.find('./file').text
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
formats.append({
|
||||
'url': direct_video_url,
|
||||
'format_id': video_format,
|
||||
})
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
|
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601, ExtractorError
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
IE_DESC = '華視新聞'
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
|
@@ -13,8 +13,9 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -28,10 +29,16 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||
return request
|
||||
|
||||
def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
|
||||
request = self._build_request(url)
|
||||
return self._download_webpage_handle(request, *args, **kwargs)
|
||||
|
||||
def _download_webpage_no_ff(self, url, *args, **kwargs):
|
||||
request = self._build_request(url)
|
||||
return self._download_webpage(request, *args, **kwargs)
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||
IE_NAME = 'dailymotion'
|
||||
|
||||
@@ -50,9 +57,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'x2iuewm',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'IGN',
|
||||
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
|
||||
'description': 'Several come bundled with the Steam Controller.',
|
||||
'thumbnail': 're:^https?:.*\.(?:jpg|png)$',
|
||||
'duration': 74,
|
||||
'timestamp': 1425657362,
|
||||
'upload_date': '20150306',
|
||||
'uploader': 'IGN',
|
||||
'uploader_id': 'xijv66',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
# Vevo video
|
||||
@@ -86,38 +101,106 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://www.dailymotion.com/video/%s' % video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = self._build_request(url)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# It may just embed a vevo video:
|
||||
m_vevo = re.search(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage)
|
||||
if m_vevo is not None:
|
||||
vevo_id = m_vevo.group('id')
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
webpage = self._download_webpage_no_ff(
|
||||
'https://www.dailymotion.com/video/%s' % video_id, video_id)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<meta property="video:release_date" content="([0-9]{4})-([0-9]{2})-([0-9]{2}).+?"/>', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"',
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)'],
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
player_v5 = self._search_regex(
|
||||
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
formats = []
|
||||
for quality, media_list in metadata['qualities'].items():
|
||||
for media in media_list:
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
type_ = media.get('type')
|
||||
if type_ == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
if type_ == 'application/x-mpegURL' or determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
'format_id': quality,
|
||||
}
|
||||
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = metadata['title']
|
||||
duration = int_or_none(metadata.get('duration'))
|
||||
timestamp = int_or_none(metadata.get('created_time'))
|
||||
thumbnail = metadata.get('poster_url')
|
||||
uploader = metadata.get('owner', {}).get('screenname')
|
||||
uploader_id = metadata.get('owner', {}).get('id')
|
||||
|
||||
subtitles = {}
|
||||
for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': determine_ext(subtitle_url),
|
||||
'url': subtitle_url,
|
||||
} for subtitle_url in subtitle.get('urls', [])]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'age_limit': age_limit,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# vevo embed
|
||||
vevo_id = self._search_regex(
|
||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||
webpage, 'vevo embed', default=None)
|
||||
if vevo_id:
|
||||
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
|
||||
|
||||
# fallback old player
|
||||
embed_page = self._download_webpage_no_ff(
|
||||
'https://www.dailymotion.com/embed/video/%s' % video_id,
|
||||
video_id, 'Downloading embed page')
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'video:release_date', webpage, 'upload date'))
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE),
|
||||
video_id)
|
||||
|
||||
embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_request = self._build_request(embed_url)
|
||||
embed_page = self._download_webpage(
|
||||
embed_request, video_id, 'Downloading embed page')
|
||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE)
|
||||
info = json.loads(info)
|
||||
if info.get('error') is not None:
|
||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||
raise ExtractorError(msg, expected=True)
|
||||
@@ -138,16 +221,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError('Unable to extract video URL')
|
||||
self._sort_formats(formats)
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
title = self._og_search_title(webpage, default=None)
|
||||
if title is None:
|
||||
title = self._html_search_regex(
|
||||
@@ -158,12 +236,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': info['owner.screenname'],
|
||||
'upload_date': video_upload_date,
|
||||
'timestamp': timestamp,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'age_limit': age_limit,
|
||||
'view_count': view_count,
|
||||
'duration': info['duration']
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
@@ -197,18 +277,26 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
}]
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = []
|
||||
video_ids = set()
|
||||
processed_urls = set()
|
||||
for pagenum in itertools.count(1):
|
||||
request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
|
||||
webpage = self._download_webpage(request,
|
||||
id, 'Downloading page %s' % pagenum)
|
||||
page_url = self._PAGE_TEMPLATE % (id, pagenum)
|
||||
webpage, urlh = self._download_webpage_handle_no_ff(
|
||||
page_url, id, 'Downloading page %s' % pagenum)
|
||||
if urlh.geturl() in processed_urls:
|
||||
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
|
||||
page_url, urlh.geturl()), id)
|
||||
break
|
||||
|
||||
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
|
||||
processed_urls.add(urlh.geturl())
|
||||
|
||||
for video_id in re.findall(r'data-xid="(.+?)"', webpage):
|
||||
if video_id not in video_ids:
|
||||
yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
video_ids.add(video_id)
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
for video_id in orderedSet(video_ids)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -225,7 +313,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P<user>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
@@ -234,6 +322,17 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -251,3 +350,52 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
'title': full_user,
|
||||
'entries': self._extract_entries(user),
|
||||
}
|
||||
|
||||
|
||||
class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/'
|
||||
_VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
|
||||
_VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
|
||||
|
||||
_TESTS = [{
|
||||
# From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
|
||||
# Tested at FranceTvInfo_2
|
||||
'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
|
||||
'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_dmcloud_url(self, webpage):
|
||||
mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
mobj = re.search(
|
||||
r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL,
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage_no_ff(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
video_info = self._parse_json(self._search_regex(
|
||||
r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
|
||||
|
||||
# TODO: parse ios_url, which is in fact a manifest
|
||||
video_url = video_info['mp4_url']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': video_info.get('thumbnail_url'),
|
||||
}
|
||||
|
84
youtube_dl/extractor/dcn.py
Normal file
84
youtube_dl/extractor/dcn.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '17375',
|
||||
'ext': 'mp4',
|
||||
'title': 'رحلة العمر : الحلقة 1',
|
||||
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
|
||||
video = self._download_json(request, video_id)
|
||||
title = video.get('title_en') or video['title_ar']
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
|
||||
+ compat_urllib_parse.urlencode({
|
||||
'id': video['id'],
|
||||
'user_id': video['user_id'],
|
||||
'signature': video['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), video_id)
|
||||
|
||||
m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
rtsp_url = self._search_regex(
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
img = video.get('img')
|
||||
thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
|
||||
duration = int_or_none(video.get('duration'))
|
||||
description = video.get('description_en') or video.get('description_ar')
|
||||
timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@@ -3,42 +3,47 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class DFBIE(InfoExtractor):
|
||||
IE_NAME = 'tv.dfb.de'
|
||||
_VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
|
||||
'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/',
|
||||
# The md5 is different each time
|
||||
'info_dict': {
|
||||
'id': '9070',
|
||||
'id': '11633',
|
||||
'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland',
|
||||
'ext': 'flv',
|
||||
'title': 'Highlights des Empfangs in Berlin',
|
||||
'upload_date': '20140716',
|
||||
'title': 'U 19-EM: Stimmen zum Spiel gegen Russland',
|
||||
'upload_date': '20150714',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_info = self._download_xml(
|
||||
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
|
||||
video_id)
|
||||
display_id)
|
||||
video_info = player_info.find('video')
|
||||
|
||||
f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
|
||||
f4m_info = self._download_xml(
|
||||
self._proto_relative_url(video_info.find('url').text.strip()), display_id)
|
||||
token_el = f4m_info.find('token')
|
||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
||||
formats = self._extract_f4m_formats(manifest_url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_info.find('title').text,
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
|
||||
'upload_date': unified_strdate(video_info.find('time_date').text),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,10 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
)
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class DHMIE(InfoExtractor):
|
||||
@@ -34,24 +31,14 @@ class DHMIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
||||
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(
|
||||
'./{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
|
||||
|
||||
video_url = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}location',
|
||||
'video url', fatal=True)
|
||||
thumbnail = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}image',
|
||||
'thumbnail')
|
||||
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||
|
||||
title = self._search_regex(
|
||||
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
||||
@@ -63,11 +50,10 @@ class DHMIE(InfoExtractor):
|
||||
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
||||
webpage, 'duration', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
entries[0].update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -9,6 +9,7 @@ from ..compat import (compat_str, compat_basestring)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
|
@@ -6,6 +6,8 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,7 +19,54 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class DramaFeverIE(InfoExtractor):
|
||||
class DramaFeverBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
|
||||
_NETRC_MACHINE = 'dramafever'
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
|
||||
_consumer_secret = None
|
||||
|
||||
def _get_consumer_secret(self):
|
||||
mainjs = self._download_webpage(
|
||||
'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
|
||||
None, 'Downloading main.js', fatal=False)
|
||||
if not mainjs:
|
||||
return self._CONSUMER_SECRET
|
||||
return self._search_regex(
|
||||
r"var\s+cs\s*=\s*'([^']+)'", mainjs,
|
||||
'consumer secret', default=self._CONSUMER_SECRET)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
self._consumer_secret = self._get_consumer_secret()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
if all(logout_pattern not in response
|
||||
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class DramaFeverIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
|
||||
_TEST = {
|
||||
@@ -85,6 +134,23 @@ class DramaFeverIE(InfoExtractor):
|
||||
'url': href,
|
||||
}]
|
||||
|
||||
series_id, episode_number = video_id.split('.')
|
||||
episode_info = self._download_json(
|
||||
# We only need a single episode info, so restricting page size to one episode
|
||||
# and dealing with page number as with episode number
|
||||
r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1'
|
||||
% (self._consumer_secret, series_id, episode_number),
|
||||
video_id, 'Downloading episode info JSON', fatal=False)
|
||||
if episode_info:
|
||||
value = episode_info.get('value')
|
||||
if value:
|
||||
subfile = value[0].get('subfile') or value[0].get('new_subfile')
|
||||
if subfile and subfile != 'http://www.dramafever.com/st/':
|
||||
subtitles.setdefault('English', []).append({
|
||||
'ext': 'srt',
|
||||
'url': subfile,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -97,7 +163,7 @@ class DramaFeverIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class DramaFeverSeriesIE(InfoExtractor):
|
||||
class DramaFeverSeriesIE(DramaFeverBaseIE):
|
||||
IE_NAME = 'dramafever:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
|
||||
_TESTS = [{
|
||||
@@ -118,27 +184,14 @@ class DramaFeverSeriesIE(InfoExtractor):
|
||||
'playlist_count': 20,
|
||||
}]
|
||||
|
||||
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
|
||||
_PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
|
||||
|
||||
def _get_consumer_secret(self, video_id):
|
||||
mainjs = self._download_webpage(
|
||||
'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
|
||||
video_id, 'Downloading main.js', fatal=False)
|
||||
if not mainjs:
|
||||
return self._CONSUMER_SECRET
|
||||
return self._search_regex(
|
||||
r"var\s+cs\s*=\s*'([^']+)'", mainjs,
|
||||
'consumer secret', default=self._CONSUMER_SECRET)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
consumer_secret = self._get_consumer_secret(series_id)
|
||||
|
||||
series = self._download_json(
|
||||
'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
|
||||
% (consumer_secret, series_id),
|
||||
% (self._consumer_secret, series_id),
|
||||
series_id, 'Downloading series JSON')['series'][series_id]
|
||||
|
||||
title = clean_html(series['name'])
|
||||
@@ -148,11 +201,14 @@ class DramaFeverSeriesIE(InfoExtractor):
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._download_json(
|
||||
'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
|
||||
% (consumer_secret, series_id, self._PAGE_SIZE, page_num),
|
||||
% (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
|
||||
series_id, 'Downloading episodes JSON page #%d' % page_num)
|
||||
for episode in episodes.get('value', []):
|
||||
episode_url = episode.get('episode_url')
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, episode['episode_url']),
|
||||
compat_urlparse.urljoin(url, episode_url),
|
||||
'DramaFever', episode.get('guid')))
|
||||
if page_num == episodes['num_pages']:
|
||||
break
|
||||
|
@@ -15,7 +15,6 @@ class DRBonanzaIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
|
||||
'md5': 'fe330252ddea607635cf2eb2c99a0af3',
|
||||
'info_dict': {
|
||||
'id': '65517',
|
||||
'ext': 'mp4',
|
||||
@@ -26,6 +25,9 @@ class DRBonanzaIE(InfoExtractor):
|
||||
'upload_date': '20110120',
|
||||
'duration': 3664,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmp
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
|
||||
'md5': '6dfe039417e76795fb783c52da3de11d',
|
||||
@@ -93,6 +95,11 @@ class DRBonanzaIE(InfoExtractor):
|
||||
'format_id': file['Type'].replace('Video', ''),
|
||||
'preference': preferencemap.get(file['Type'], -10),
|
||||
})
|
||||
if format['url'].startswith('rtmp'):
|
||||
rtmp_url = format['url']
|
||||
format['rtmp_live'] = True # --resume does not work
|
||||
if '/bonanza/' in rtmp_url:
|
||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
||||
formats.append(format)
|
||||
elif file['Type'] == "Thumb":
|
||||
thumbnail = file['Location']
|
||||
@@ -111,9 +118,6 @@ class DRBonanzaIE(InfoExtractor):
|
||||
description = '%s\n%s\n%s\n' % (
|
||||
info['Description'], info['Actors'], info['Colophon'])
|
||||
|
||||
for f in formats:
|
||||
f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
|
||||
f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
|
||||
self._sort_formats(formats)
|
||||
|
||||
display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
|
||||
|
@@ -36,25 +36,24 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
|
||||
[r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'],
|
||||
webpage, 'title')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'like count', fatal=False))
|
||||
dislike_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'like count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'<span class="comments_count">([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
def extract_count(id_, name):
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_,
|
||||
webpage, '%s count' % name, fatal=False))
|
||||
|
||||
like_count = extract_count('rate_likes', 'like')
|
||||
dislike_count = extract_count('rate_dislikes', 'dislike')
|
||||
comment_count = extract_count('comments_count', 'comment')
|
||||
|
||||
cats_str = self._search_regex(
|
||||
r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||
|
||||
return {
|
||||
|
@@ -1,9 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class EHowIE(InfoExtractor):
|
||||
@@ -26,7 +24,7 @@ class EHowIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
|
||||
final_url = compat_urllib_parse.unquote(video_url)
|
||||
final_url = compat_urllib_parse_unquote(video_url)
|
||||
uploader = self._html_search_meta('uploader', webpage)
|
||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||
|
||||
|
@@ -1,31 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .tnaflix import TNAFlixIE
|
||||
|
||||
|
||||
class EMPFlixIE(TNAFlixIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
|
||||
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
|
||||
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
|
||||
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
|
||||
'info_dict': {
|
||||
'id': '33051',
|
||||
'display_id': 'Amateur-Finger-Fuck',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amateur Finger Fuck',
|
||||
'description': 'Amateur solo finger fucking.',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
74
youtube_dl/extractor/esri.py
Normal file
74
youtube_dl/extractor/esri.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class EsriVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications',
|
||||
'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc',
|
||||
'info_dict': {
|
||||
'id': '1124',
|
||||
'ext': 'mp4',
|
||||
'title': 'ArcGIS Online - Developing Applications',
|
||||
'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 185,
|
||||
'upload_date': '20120419',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for width, height, content in re.findall(
|
||||
r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage):
|
||||
for video_url, ext, filesize in re.findall(
|
||||
r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content):
|
||||
formats.append({
|
||||
'url': compat_urlparse.urljoin(url, video_url),
|
||||
'ext': ext.lower(),
|
||||
'format_id': '%s-%s' % (ext.lower(), height),
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
'filesize_approx': parse_filesize(filesize),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_meta('title', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description', fatal=False)
|
||||
|
||||
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
[r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"],
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'last-modified', webpage, 'upload date', fatal=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats
|
||||
}
|
@@ -9,7 +9,7 @@ from ..compat import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,6 +17,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
limit_length,
|
||||
urlencode_postdata,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,6 +44,7 @@ class FacebookIE(InfoExtractor):
|
||||
'id': '637842556329505',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
'uploader': 'Tennis on Facebook',
|
||||
}
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
@@ -50,6 +53,7 @@ class FacebookIE(InfoExtractor):
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
@@ -136,7 +140,7 @@ class FacebookIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
video_data = params['video_data'][0]
|
||||
|
||||
@@ -161,6 +165,7 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = limit_length(video_title, 80)
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -168,4 +173,5 @@ class FacebookIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('video_duration')),
|
||||
'thumbnail': video_data.get('thumbnail_src'),
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
@@ -6,9 +6,9 @@ from .common import InfoExtractor
|
||||
|
||||
class FazIE(InfoExtractor):
|
||||
IE_NAME = 'faz.net'
|
||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
||||
'info_dict': {
|
||||
'id': '12610585',
|
||||
@@ -16,7 +16,22 @@ class FazIE(InfoExtractor):
|
||||
'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
|
||||
'description': 'md5:1453fbf9a0d041d985a47306192ea253',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.faz.net/foobarblafasel-13659345.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -86,7 +86,7 @@ class FC2IE(InfoExtractor):
|
||||
|
||||
info_url = (
|
||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
|
@@ -30,6 +30,10 @@ class FolketingetIE(InfoExtractor):
|
||||
'upload_date': '20141120',
|
||||
'duration': 3960,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -32,6 +32,7 @@ class FourTubeIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -6,18 +6,15 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
)
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@@ -58,12 +55,12 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
# See https://github.com/rg3/youtube-dl/issues/3963
|
||||
# m3u8 urls work fine
|
||||
continue
|
||||
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||
'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
elif video_url.startswith('rtmp'):
|
||||
@@ -86,7 +83,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
'title': info['titre'],
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -131,12 +128,26 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
},
|
||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
|
||||
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
|
||||
'info_dict': {
|
||||
'id': '556e03339473995ee145930c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les entreprises familiales : le secret de la réussite',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
@@ -145,11 +156,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetv'
|
||||
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
||||
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
||||
(?:
|
||||
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
||||
| (emissions?|jt)/(?P<key>[^/?]+)
|
||||
)'''
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?france[2345o]\.fr/
|
||||
(?:
|
||||
emissions/[^/]+/(?:videos|diffusions)|
|
||||
emission/[^/]+|
|
||||
videos|
|
||||
jt
|
||||
)
|
||||
/|
|
||||
embed\.francetv\.fr/\?ue=
|
||||
)
|
||||
(?P<id>[^/?]+)
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
# france2
|
||||
@@ -206,24 +227,46 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
},
|
||||
# franceo
|
||||
{
|
||||
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||
'md5': '52f0bfe202848b15915a2f39aaa8981b',
|
||||
'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
|
||||
'md5': '47d5816d3b24351cdce512ad7ab31da8',
|
||||
'info_dict': {
|
||||
'id': '108634970',
|
||||
'id': '125377621',
|
||||
'ext': 'flv',
|
||||
'title': 'Infô Afrique',
|
||||
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410822000,
|
||||
'title': 'Infô soir',
|
||||
'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
|
||||
'upload_date': '20150718',
|
||||
'timestamp': 1437241200,
|
||||
'duration': 414,
|
||||
},
|
||||
},
|
||||
{
|
||||
# francetv embed
|
||||
'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
|
||||
'info_dict': {
|
||||
'id': 'EV_30231',
|
||||
'ext': 'flv',
|
||||
'title': 'Alcaline, le concert avec Calogero',
|
||||
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
|
||||
'upload_date': '20150226',
|
||||
'timestamp': 1424989860,
|
||||
'duration': 5400,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.franceo.fr/videos/125377617',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'href="http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
@@ -53,7 +53,7 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
'url': '%s%d.%s' % (link[0], bitrate, link[1]),
|
||||
'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])),
|
||||
'format_id': '%s-%d' % (link[1], bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
|
@@ -5,7 +5,7 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -75,7 +75,7 @@ class GameSpotIE(InfoExtractor):
|
||||
return {
|
||||
'id': data_video['guid'],
|
||||
'display_id': page_id,
|
||||
'title': compat_urllib_parse.unquote(data_video['title']),
|
||||
'title': compat_urllib_parse_unquote(data_video['title']),
|
||||
'formats': formats,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
|
@@ -7,7 +7,10 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
HEADRequest,
|
||||
)
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
@@ -73,10 +76,20 @@ class GDCVaultIE(InfoExtractor):
|
||||
return video_formats
|
||||
|
||||
def _parse_flv(self, xml_description):
|
||||
video_formats = []
|
||||
formats = []
|
||||
akamai_url = xml_description.find('./metadata/akamaiHost').text
|
||||
audios = xml_description.find('./metadata/audios')
|
||||
if audios is not None:
|
||||
for audio in audios:
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(audio.get('url'), '.flv'),
|
||||
'ext': 'flv',
|
||||
'vcodec': 'none',
|
||||
'format_id': audio.get('code'),
|
||||
})
|
||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||
video_formats.append({
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(slide_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
@@ -86,7 +99,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'format_id': 'slides',
|
||||
})
|
||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||
video_formats.append({
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
@@ -95,7 +108,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'preference': -1,
|
||||
'format_id': 'speaker',
|
||||
})
|
||||
return video_formats
|
||||
return formats
|
||||
|
||||
def _login(self, webpage_url, display_id):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -133,16 +146,18 @@ class GDCVaultIE(InfoExtractor):
|
||||
r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
|
||||
start_page, 'url', default=None)
|
||||
if direct_url:
|
||||
video_url = 'http://www.gdcvault.com/' + direct_url
|
||||
title = self._html_search_regex(
|
||||
r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
|
||||
start_page, 'title')
|
||||
video_url = 'http://www.gdcvault.com' + direct_url
|
||||
# resolve the url so that we can detect the correct extension
|
||||
head = self._request_webpage(HEADRequest(video_url), video_id)
|
||||
video_url = head.geturl()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
}
|
||||
|
||||
@@ -168,8 +183,8 @@ class GDCVaultIE(InfoExtractor):
|
||||
# Fallback to the older format
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
|
||||
xml_decription_url = xml_root + 'xml/' + xml_name
|
||||
xml_description = self._download_xml(xml_decription_url, display_id)
|
||||
xml_description_url = xml_root + 'xml/' + xml_name
|
||||
xml_description = self._download_xml(xml_description_url, display_id)
|
||||
|
||||
video_title = xml_description.find('./metadata/title').text
|
||||
video_formats = self._parse_mp4(xml_description)
|
||||
|
@@ -8,7 +8,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -37,12 +36,18 @@ from .rutv import RUTVIE
|
||||
from .tvc import TVCIE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .smotri import SmotriIE
|
||||
from .myvi import MyviIE
|
||||
from .condenast import CondeNastIE
|
||||
from .udn import UDNEmbedIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .bliptv import BlipTVIE
|
||||
from .svt import SVTIE
|
||||
from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .snagfilms import SnagFilmsEmbedIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -125,6 +130,89 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
}
|
||||
},
|
||||
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
|
||||
{
|
||||
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
|
||||
'info_dict': {
|
||||
'id': 'smil',
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'formats': 'mincount:16',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'force_generic_extractor': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
|
||||
{
|
||||
'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
|
||||
'info_dict': {
|
||||
'id': 'hds',
|
||||
'ext': 'flv',
|
||||
'title': 'hds',
|
||||
'formats': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from https://www.restudy.dk/video/play/id/1637
|
||||
{
|
||||
'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
|
||||
'info_dict': {
|
||||
'id': 'video_1637',
|
||||
'ext': 'flv',
|
||||
'title': 'video_1637',
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
|
||||
{
|
||||
'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
|
||||
'info_dict': {
|
||||
'id': 'smil-service',
|
||||
'ext': 'flv',
|
||||
'title': 'smil-service',
|
||||
'formats': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
|
||||
{
|
||||
'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
|
||||
'info_dict': {
|
||||
'id': '4719370',
|
||||
'ext': 'mp4',
|
||||
'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
|
||||
{
|
||||
'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
|
||||
'info_dict': {
|
||||
'id': 'mZlp2ctYIUEB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tikibad ontruimd wegens brand',
|
||||
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 33,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@@ -231,6 +319,19 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
{
|
||||
# ooyala video embedded with http://player.ooyala.com/iframe.js
|
||||
'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
|
||||
'info_dict': {
|
||||
'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
|
||||
'ext': 'mp4',
|
||||
'title': '"Steve Jobs: Man in the Machine" trailer',
|
||||
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# multiple ooyala embeds on SBN network websites
|
||||
{
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
@@ -271,14 +372,6 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
},
|
||||
# BBC iPlayer embeds
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
|
||||
'info_dict': {
|
||||
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||
@@ -333,6 +426,26 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Myvi.ru embed
|
||||
{
|
||||
'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
|
||||
'info_dict': {
|
||||
'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ужастики, русский трейлер (2015)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 153,
|
||||
}
|
||||
},
|
||||
# XHamster embed
|
||||
{
|
||||
'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
|
||||
'info_dict': {
|
||||
'id': 'showthread',
|
||||
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
# Embedded TED video
|
||||
{
|
||||
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||
@@ -382,6 +495,26 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
# francetv embed
|
||||
{
|
||||
'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
|
||||
'info_dict': {
|
||||
'id': 'EV_30231',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alcaline, le concert avec Calogero',
|
||||
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
|
||||
'upload_date': '20150226',
|
||||
'timestamp': 1424989860,
|
||||
'duration': 5400,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Forbidden'
|
||||
]
|
||||
},
|
||||
# Condé Nast embed
|
||||
{
|
||||
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
||||
@@ -655,6 +788,18 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'John Carlson Postgame 2/25/15',
|
||||
},
|
||||
},
|
||||
# Kaltura embed (different embed code)
|
||||
{
|
||||
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
|
||||
'info_dict': {
|
||||
'id': '1_a52wc67y',
|
||||
'ext': 'flv',
|
||||
'upload_date': '20150127',
|
||||
'uploader_id': 'PremierMedia',
|
||||
'timestamp': int,
|
||||
'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
|
||||
},
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@@ -812,6 +957,50 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
|
||||
'uploader': 'Rogers Sportsnet',
|
||||
},
|
||||
},
|
||||
# Dailymotion Cloud video
|
||||
{
|
||||
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
|
||||
'md5': '49444254273501a64675a7e68c502681',
|
||||
'info_dict': {
|
||||
'id': '5585de919473990de4bee11b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le débat',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
}
|
||||
},
|
||||
# OnionStudios embed
|
||||
{
|
||||
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
|
||||
'info_dict': {
|
||||
'id': '2855',
|
||||
'ext': 'mp4',
|
||||
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
'uploader': 'ClickHole',
|
||||
'uploader_id': 'clickhole',
|
||||
}
|
||||
},
|
||||
# SnagFilms embed
|
||||
{
|
||||
'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
|
||||
'info_dict': {
|
||||
'id': '74849a00-85a9-11e1-9660-123139220831',
|
||||
'ext': 'mp4',
|
||||
'title': '#whilewewatch',
|
||||
}
|
||||
},
|
||||
# AdobeTVVideo embed
|
||||
{
|
||||
'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
|
||||
'md5': '43662b577c018ad707a63766462b1e87',
|
||||
'info_dict': {
|
||||
'id': '2456',
|
||||
'ext': 'mp4',
|
||||
'title': 'New experience with Acrobat DC',
|
||||
'description': 'New experience with Acrobat DC',
|
||||
'duration': 248.667,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -979,7 +1168,9 @@ class GenericIE(InfoExtractor):
|
||||
}
|
||||
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||
force = self._downloader.params.get('force_generic_extractor', False)
|
||||
self._downloader.report_warning(
|
||||
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
||||
|
||||
if not full_response:
|
||||
request = compat_urllib_request.Request(url)
|
||||
@@ -1015,11 +1206,15 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed?
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
try:
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
return self._parse_smil(doc, url, video_id)
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
@@ -1031,7 +1226,7 @@ class GenericIE(InfoExtractor):
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
||||
# Unescaping the whole page allows to handle those cases in a generic way
|
||||
webpage = compat_urllib_parse.unquote(webpage)
|
||||
webpage = compat_urllib_parse_unquote(webpage)
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
@@ -1089,18 +1284,15 @@ class GenericIE(InfoExtractor):
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl)
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1))
|
||||
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||
if vimeo_url is not None:
|
||||
return self.url_result(vimeo_url)
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
@@ -1228,7 +1420,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
@@ -1294,7 +1486,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
|
||||
return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
|
||||
|
||||
# Look for funnyordie embed
|
||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||
@@ -1327,6 +1519,11 @@ class GenericIE(InfoExtractor):
|
||||
if pornhub_url:
|
||||
return self.url_result(pornhub_url, 'PornHub')
|
||||
|
||||
# Look for embedded XHamster player
|
||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||
if xhamster_urls:
|
||||
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
@@ -1352,11 +1549,23 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||
|
||||
# Look for embedded francetv player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded smotri.com player
|
||||
smotri_url = SmotriIE._extract_url(webpage)
|
||||
if smotri_url:
|
||||
return self.url_result(smotri_url, 'Smotri')
|
||||
|
||||
# Look for embedded Myvi.ru player
|
||||
myvi_url = MyviIE._extract_url(webpage)
|
||||
if myvi_url:
|
||||
return self.url_result(myvi_url)
|
||||
|
||||
# Look for embeded soundcloud player
|
||||
mobj = re.search(
|
||||
r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
|
||||
@@ -1436,8 +1645,8 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = re.search(
|
||||
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
|
||||
re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
|
||||
|
||||
@@ -1494,6 +1703,30 @@ class GenericIE(InfoExtractor):
|
||||
if senate_isvp_url:
|
||||
return self.url_result(senate_isvp_url, 'SenateISVP')
|
||||
|
||||
# Look for Dailymotion Cloud videos
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
|
||||
# Look for OnionStudios embeds
|
||||
onionstudios_url = OnionStudiosIE._extract_url(webpage)
|
||||
if onionstudios_url:
|
||||
return self.url_result(onionstudios_url)
|
||||
|
||||
# Look for SnagFilms embeds
|
||||
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
|
||||
if snagfilms_url:
|
||||
return self.url_result(snagfilms_url)
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))),
|
||||
'AdobeTVVideo')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
@@ -1522,7 +1755,7 @@ class GenericIE(InfoExtractor):
|
||||
if not found:
|
||||
# Broaden the findall a little bit: JWPlayer JS loader
|
||||
found = filter_video(re.findall(
|
||||
r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
|
||||
r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
|
||||
if not found:
|
||||
# Flow player
|
||||
found = filter_video(re.findall(r'''(?xs)
|
||||
@@ -1561,7 +1794,7 @@ class GenericIE(InfoExtractor):
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = compat_urlparse.urljoin(url, found.group(1))
|
||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
@@ -1573,7 +1806,7 @@ class GenericIE(InfoExtractor):
|
||||
entries = []
|
||||
for video_url in found:
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
|
||||
|
||||
# Sometimes, jwplayer extraction will result in a YouTube URL
|
||||
if YoutubeIE.suitable(video_url):
|
||||
@@ -1583,7 +1816,8 @@ class GenericIE(InfoExtractor):
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
if determine_ext(video_url) == 'smil':
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_smil_formats(video_url, video_id),
|
||||
@@ -1591,6 +1825,8 @@ class GenericIE(InfoExtractor):
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
|
@@ -6,12 +6,13 @@ from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
qualities,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
'id': 'DeadlyDecisiveGermanpinscher',
|
||||
@@ -27,14 +28,33 @@ class GfycatIE(InfoExtractor):
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
|
||||
'info_dict': {
|
||||
'id': 'JauntyTimelyAmazontreeboa',
|
||||
'ext': 'mp4',
|
||||
'title': 'JauntyTimelyAmazontreeboa',
|
||||
'timestamp': 1411720126,
|
||||
'upload_date': '20140926',
|
||||
'uploader': 'anonymous',
|
||||
'duration': 3.52,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
gfy = self._download_json(
|
||||
'http://gfycat.com/cajax/get/%s' % video_id,
|
||||
video_id, 'Downloading video info')['gfyItem']
|
||||
video_id, 'Downloading video info')
|
||||
if 'error' in gfy:
|
||||
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
||||
gfy = gfy['gfyItem']
|
||||
|
||||
title = gfy.get('title') or gfy['gfyName']
|
||||
description = gfy.get('description')
|
||||
|
@@ -78,12 +78,7 @@ class GorillaVidIE(InfoExtractor):
|
||||
if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
countdown = int_or_none(self._search_regex(
|
||||
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -19,20 +17,19 @@ class HentaiStigmaIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
|
||||
r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
|
||||
webpage, 'title')
|
||||
wrap_url = self._html_search_regex(
|
||||
r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||
r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url')
|
||||
r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -58,11 +58,7 @@ class HostingBulkIE(InfoExtractor):
|
||||
r'<img src="([^"]+)".+?class="pic"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
request = compat_urllib_request.Request(url, urlencode_postdata(fields))
|
||||
request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
@@ -1,8 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class HowcastIE(InfoExtractor):
|
||||
@@ -13,29 +12,31 @@ class HowcastIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '390161',
|
||||
'ext': 'mp4',
|
||||
'description': 'The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here\'s the proper way to tie a square knot.',
|
||||
'title': 'How to Tie a Square Knot Properly',
|
||||
}
|
||||
'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
|
||||
'timestamp': 1276081287,
|
||||
'upload_date': '20100609',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
||||
webpage, 'video URL')
|
||||
|
||||
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
||||
webpage, 'description', fatal=False)
|
||||
embed_code = self._search_regex(
|
||||
r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b',
|
||||
webpage, 'ooyala embed code')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % embed_code,
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': video_description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'timestamp': parse_iso8601(self._html_search_meta(
|
||||
'article:published_time', webpage, 'timestamp')),
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class HowStuffWorksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
|
||||
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
|
||||
@@ -46,6 +46,10 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -46,7 +46,7 @@ class ImdbIE(InfoExtractor):
|
||||
format_info = info['videoPlayerObject']['video']
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': format_info['url'],
|
||||
'url': format_info['videoInfoList'][0]['videoUrl'],
|
||||
})
|
||||
|
||||
return {
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||
|
142
youtube_dl/extractor/indavideo.py
Normal file
142
youtube_dl/extractor/indavideo.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class IndavideoEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'md5': 'f79b009c66194acacd40712a6778acfa',
|
||||
'info_dict': {
|
||||
'id': '1837039',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cicatánc',
|
||||
'description': '',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'cukiajanlo',
|
||||
'uploader_id': '83729',
|
||||
'timestamp': 1439193826,
|
||||
'upload_date': '20150810',
|
||||
'duration': 72,
|
||||
'age_limit': 0,
|
||||
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||
video_id)['data']
|
||||
|
||||
title = video['title']
|
||||
|
||||
video_urls = video.get('video_files', [])
|
||||
video_file = video.get('video_file')
|
||||
if video:
|
||||
video_urls.append(video_file)
|
||||
video_urls = list(set(video_urls))
|
||||
|
||||
video_prefix = video_urls[0].rsplit('/', 1)[0]
|
||||
|
||||
for flv_file in video.get('flv_files', []):
|
||||
flv_url = '%s/%s' % (video_prefix, flv_file)
|
||||
if flv_url not in video_urls:
|
||||
video_urls.append(flv_url)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None),
|
||||
} for video_url in video_urls]
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = video.get('date')
|
||||
if timestamp:
|
||||
# upload date is in CEST
|
||||
timestamp = parse_iso8601(timestamp + ' +0200', ' ')
|
||||
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(thumbnail)
|
||||
} for thumbnail in video.get('thumbnails', [])]
|
||||
|
||||
tags = [tag['title'] for tag in video.get('tags', [])]
|
||||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video.get('user_name'),
|
||||
'uploader_id': video.get('user_id'),
|
||||
'timestamp': timestamp,
|
||||
'duration': int_or_none(video.get('length')),
|
||||
'age_limit': parse_age_limit(video.get('age_limit')),
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class IndavideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/video/Vicces_cica_1',
|
||||
'md5': '8c82244ba85d2a2310275b318eb51eac',
|
||||
'info_dict': {
|
||||
'id': '1335611',
|
||||
'display_id': 'Vicces_cica_1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vicces cica',
|
||||
'description': 'Játszik a tablettel. :D',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Jet_Pack',
|
||||
'uploader_id': '491217',
|
||||
'timestamp': 1390821212,
|
||||
'upload_date': '20140127',
|
||||
'duration': 7,
|
||||
'age_limit': 0,
|
||||
'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
embed_url = self._search_regex(
|
||||
r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'IndavideoEmbed',
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
}
|
@@ -4,14 +4,15 @@ import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class InfoQIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
|
||||
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
|
||||
'info_dict': {
|
||||
@@ -20,7 +21,10 @@ class InfoQIE(InfoExtractor):
|
||||
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
|
||||
'title': 'A Few of My Favorite [Python] Things',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -35,14 +39,14 @@ class InfoQIE(InfoExtractor):
|
||||
# Extract video URL
|
||||
encoded_id = self._search_regex(
|
||||
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
|
||||
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
|
||||
playpath = 'mp4:' + real_id
|
||||
|
||||
video_filename = playpath.split('/')[-1]
|
||||
video_id, extension = video_filename.split('.')
|
||||
|
||||
http_base = self._search_regex(
|
||||
r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
|
||||
r'EXPRESSINSTALL_SWF\s*=\s*[^"]*"((?:https?:)?//[^/"]+/)', webpage,
|
||||
'HTTP base URL')
|
||||
|
||||
formats = [{
|
||||
@@ -52,7 +56,7 @@ class InfoQIE(InfoExtractor):
|
||||
'play_path': playpath,
|
||||
}, {
|
||||
'format_id': 'http',
|
||||
'url': http_base + real_id,
|
||||
'url': compat_urlparse.urljoin(url, http_base) + real_id,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -3,23 +3,18 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import math
|
||||
import os.path
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
import zlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class IqiyiIE(InfoExtractor):
|
||||
IE_NAME = 'iqiyi'
|
||||
IE_DESC = '爱奇艺'
|
||||
|
||||
_VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
|
||||
|
||||
@@ -38,62 +33,57 @@ class IqiyiIE(InfoExtractor):
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '7e49376fecaffa115d951634917fe105',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '41b75ba13bb7ac0e411131f92bc4f6ca',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '0cee1dd0a3d46a83e71e2badeae2aab0',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '4f8ad72373b0c491b582e7c196b0b1f9',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': 'd89ad028bcfad282918e8098e811711d',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '9cb1e5c95da25dff0660c32ae50903b7',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '155116e0ff1867bbc9b98df294faabc9',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'md5': '53f5db77622ae14fa493ed2a278a082b',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = [
|
||||
@@ -211,20 +201,7 @@ class IqiyiIE(InfoExtractor):
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
filename, _ = os.path.splitext(url_basename(swf_url))
|
||||
enc_key_json = self._downloader.cache.load('iqiyi-enc-key', filename)
|
||||
if enc_key_json is not None:
|
||||
return enc_key_json[0]
|
||||
|
||||
req = self._request_webpage(
|
||||
swf_url, video_id, note='download swf content')
|
||||
cn = req.read()
|
||||
cn = zlib.decompress(cn[8:])
|
||||
pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
|
||||
enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
|
||||
|
||||
self._downloader.cache.store('iqiyi-enc-key', filename, [enc_key])
|
||||
|
||||
enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
42
youtube_dl/extractor/ir90tv.py
Normal file
42
youtube_dl/extractor/ir90tv.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
|
||||
|
||||
class Ir90TvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'md5': '411dbd94891381960cb9e13daa47a869',
|
||||
'info_dict': {
|
||||
'id': '95719',
|
||||
'ext': 'mp4',
|
||||
'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), '90tv.ir :: ')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', webpage, 'video url')
|
||||
|
||||
thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)
|
||||
|
||||
return {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'video_url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -8,9 +8,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
|
||||
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
|
||||
'info_dict': {
|
||||
@@ -19,7 +19,10 @@ class JeuxVideoIE(InfoExtractor):
|
||||
'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
|
||||
'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -2,7 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import compat_urllib_parse_unquote_plus
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
)
|
||||
@@ -24,7 +24,7 @@ class KaraoketvIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
page_video_url = self._og_search_video_url(webpage, video_id)
|
||||
config_json = compat_urllib_parse.unquote_plus(self._search_regex(
|
||||
config_json = compat_urllib_parse_unquote_plus(self._search_regex(
|
||||
r'config=(.*)', page_video_url, 'configuration'))
|
||||
|
||||
urls_info_json = self._download_json(
|
||||
|
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
@@ -34,33 +37,28 @@ class KontrTubeIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
video_url = self._search_regex(
|
||||
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
thumbnail = self._search_regex(
|
||||
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'video title')
|
||||
r'(?s)<h2>(.+?)</h2>', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'video description')
|
||||
'description', webpage, 'description')
|
||||
|
||||
mobj = re.search(
|
||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
duration = self._search_regex(
|
||||
r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
|
||||
if duration:
|
||||
duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
|
||||
view_count = self._search_regex(
|
||||
r'Просмотров: <em>([^<]+)</em>',
|
||||
webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = int_or_none(view_count.replace(' ', ''))
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(
|
||||
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
|
||||
if comment_str.startswith('комментариев нет'):
|
||||
comment_count = 0
|
||||
else:
|
||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||
if mobj:
|
||||
comment_count = mobj.group('total')
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
314
youtube_dl/extractor/kuwo.py
Normal file
314
youtube_dl/extractor/kuwo.py
Normal file
@@ -0,0 +1,314 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class KuwoBaseIE(InfoExtractor):
|
||||
_FORMATS = [
|
||||
{'format': 'ape', 'ext': 'ape', 'preference': 100},
|
||||
{'format': 'mp3-320', 'ext': 'mp3', 'br': '320kmp3', 'abr': 320, 'preference': 80},
|
||||
{'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70},
|
||||
{'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60},
|
||||
{'format': 'wma', 'ext': 'wma', 'preference': 20},
|
||||
{'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
|
||||
]
|
||||
|
||||
def _get_formats(self, song_id):
|
||||
formats = []
|
||||
for file_format in self._FORMATS:
|
||||
song_url = self._download_webpage(
|
||||
'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' %
|
||||
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
)
|
||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'format_id': file_format['format'],
|
||||
'format': file_format['format'],
|
||||
'preference': file_format['preference'],
|
||||
'abr': file_format.get('abr'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class KuwoIE(KuwoBaseIE):
|
||||
IE_NAME = 'kuwo:song'
|
||||
IE_DESC = '酷我音乐'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kuwo.cn/yinyue/635632/',
|
||||
'info_dict': {
|
||||
'id': '635632',
|
||||
'ext': 'ape',
|
||||
'title': '爱我别走',
|
||||
'creator': '张震岳',
|
||||
'upload_date': '20080122',
|
||||
'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kuwo.cn/yinyue/6446136/',
|
||||
'info_dict': {
|
||||
'id': '6446136',
|
||||
'ext': 'mp3',
|
||||
'title': '心',
|
||||
'creator': 'IU',
|
||||
'upload_date': '20150518',
|
||||
},
|
||||
'params': {
|
||||
'format': 'mp3-320'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, song_id, note='Download song detail info',
|
||||
errnote='Unable to get song detail info')
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
r'<h1[^>]+title="([^"]+)">', webpage, 'song name')
|
||||
singer_name = self._html_search_regex(
|
||||
r'<div[^>]+class="s_img">\s*<a[^>]+title="([^>]+)"',
|
||||
webpage, 'singer name', fatal=False)
|
||||
lrc_content = clean_html(get_element_by_id('lrcContent', webpage))
|
||||
if lrc_content == '暂无': # indicates no lyrics
|
||||
lrc_content = None
|
||||
|
||||
formats = self._get_formats(song_id)
|
||||
|
||||
album_id = self._html_search_regex(
|
||||
r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
|
||||
webpage, 'album id', fatal=False)
|
||||
|
||||
publish_time = None
|
||||
if album_id is not None:
|
||||
album_info_page = self._download_webpage(
|
||||
'http://www.kuwo.cn/album/%s/' % album_id, song_id,
|
||||
note='Download album detail info',
|
||||
errnote='Unable to get album detail info')
|
||||
|
||||
publish_time = self._html_search_regex(
|
||||
r'发行时间:(\d{4}-\d{2}-\d{2})', album_info_page,
|
||||
'publish time', fatal=False)
|
||||
if publish_time:
|
||||
publish_time = publish_time.replace('-', '')
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'title': song_name,
|
||||
'creator': singer_name,
|
||||
'upload_date': publish_time,
|
||||
'description': lrc_content,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class KuwoAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:album'
|
||||
IE_DESC = '酷我音乐 - 专辑'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.kuwo.cn/album/502294/',
|
||||
'info_dict': {
|
||||
'id': '502294',
|
||||
'title': 'M',
|
||||
'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, album_id, note='Download album info',
|
||||
errnote='Unable to get album info')
|
||||
|
||||
album_name = self._html_search_regex(
|
||||
r'<div[^>]+class="comm"[^<]+<h1[^>]+title="([^"]+)"', webpage,
|
||||
'album name')
|
||||
album_intro = remove_start(
|
||||
clean_html(get_element_by_id('intro', webpage)),
|
||||
'%s简介:' % album_name)
|
||||
|
||||
entries = [
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<p[^>]+class="listen"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+/)"',
|
||||
webpage)
|
||||
]
|
||||
return self.playlist_result(entries, album_id, album_name, album_intro)
|
||||
|
||||
|
||||
class KuwoChartIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:chart'
|
||||
IE_DESC = '酷我音乐 - 排行榜'
|
||||
_VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
|
||||
'info_dict': {
|
||||
'id': '香港中文龙虎榜',
|
||||
'title': '香港中文龙虎榜',
|
||||
'description': 're:\d{4}第\d{2}期',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
chart_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, chart_id, note='Download chart info',
|
||||
errnote='Unable to get chart info')
|
||||
|
||||
chart_name = self._html_search_regex(
|
||||
r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
|
||||
|
||||
chart_desc = self._html_search_regex(
|
||||
r'<p[^>]+class="tabDef">(\d{4}第\d{2}期)</p>', webpage, 'chart desc')
|
||||
|
||||
entries = [
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage)
|
||||
]
|
||||
return self.playlist_result(entries, chart_id, chart_name, chart_desc)
|
||||
|
||||
|
||||
class KuwoSingerIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:singer'
|
||||
IE_DESC = '酷我音乐 - 歌手'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
|
||||
'info_dict': {
|
||||
'id': 'bruno+mars',
|
||||
'title': 'Bruno Mars',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
|
||||
'info_dict': {
|
||||
'id': 'Ali',
|
||||
'title': 'Ali',
|
||||
},
|
||||
'playlist_mincount': 95,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
singer_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, singer_id, note='Download singer info',
|
||||
errnote='Unable to get singer info')
|
||||
|
||||
singer_name = self._html_search_regex(
|
||||
r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name'
|
||||
)
|
||||
|
||||
entries = []
|
||||
first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True
|
||||
for page_num in itertools.count(1):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),
|
||||
singer_id, note='Download song list page #%d' % page_num,
|
||||
errnote='Unable to get song list page #%d' % page_num)
|
||||
|
||||
entries.extend([
|
||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
||||
r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/',
|
||||
webpage)
|
||||
][:10 if first_page_only else None])
|
||||
|
||||
if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage):
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, singer_id, singer_name)
|
||||
|
||||
|
||||
class KuwoCategoryIE(InfoExtractor):
|
||||
IE_NAME = 'kuwo:category'
|
||||
IE_DESC = '酷我音乐 - 分类'
|
||||
_VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
|
||||
'info_dict': {
|
||||
'id': '86375',
|
||||
'title': '八十年代精选',
|
||||
'description': '这些都是属于八十年代的回忆!',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
category_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, category_id, note='Download category info',
|
||||
errnote='Unable to get category info')
|
||||
|
||||
category_name = self._html_search_regex(
|
||||
r'<h1[^>]+title="([^<>]+?)">[^<>]+?</h1>', webpage, 'category name')
|
||||
|
||||
category_desc = remove_start(
|
||||
get_element_by_id('intro', webpage).strip(),
|
||||
'%s简介:' % category_name)
|
||||
|
||||
jsonm = self._parse_json(self._html_search_regex(
|
||||
r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.kuwo.cn/yinyue/%s/' % song['musicrid'], 'Kuwo')
|
||||
for song in jsonm['musiclist']
|
||||
]
|
||||
return self.playlist_result(entries, category_id, category_name, category_desc)
|
||||
|
||||
|
||||
class KuwoMvIE(KuwoBaseIE):
|
||||
IE_NAME = 'kuwo:mv'
|
||||
IE_DESC = '酷我音乐 - MV'
|
||||
_VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.kuwo.cn/mv/6480076/',
|
||||
'info_dict': {
|
||||
'id': '6480076',
|
||||
'ext': 'mkv',
|
||||
'title': '我们家MV',
|
||||
'creator': '2PM',
|
||||
},
|
||||
}
|
||||
_FORMATS = KuwoBaseIE._FORMATS + [
|
||||
{'format': 'mkv', 'ext': 'mkv', 'preference': 250},
|
||||
{'format': 'mp4', 'ext': 'mp4', 'preference': 200},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, song_id, note='Download mv detail info: %s' % song_id,
|
||||
errnote='Unable to get mv detail info: %s' % song_id)
|
||||
|
||||
mobj = re.search(
|
||||
r'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
song_name = mobj.group('song')
|
||||
singer_name = mobj.group('singer')
|
||||
else:
|
||||
raise ExtractorError('Unable to find song or singer names')
|
||||
|
||||
formats = self._get_formats(song_id)
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'title': song_name,
|
||||
'creator': singer_name,
|
||||
'formats': formats,
|
||||
}
|
62
youtube_dl/extractor/lecture2go.py
Normal file
62
youtube_dl/extractor/lecture2go.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Lecture2GoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
|
||||
'md5': 'ac02b570883020d208d405d5a3fd2f7f',
|
||||
'info_dict': {
|
||||
'id': '17473',
|
||||
'ext': 'flv',
|
||||
'title': '2 - Endliche Automaten und reguläre Sprachen',
|
||||
'creator': 'Frank Heitmann',
|
||||
'duration': 5220,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
|
||||
|
||||
formats = []
|
||||
for url in set(re.findall(r'"src","([^"]+)"', webpage)):
|
||||
ext = determine_ext(url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(url, video_id))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
creator = self._html_search_regex(
|
||||
r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'creator': creator,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
}
|
@@ -15,10 +15,12 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LetvIE(InfoExtractor):
|
||||
IE_DESC = '乐视网'
|
||||
_VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -133,7 +135,7 @@ class LetvIE(InfoExtractor):
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
url_info_dict['height'] = format_id[:-1]
|
||||
url_info_dict['height'] = int_or_none(format_id[:-1])
|
||||
|
||||
urls.append(url_info_dict)
|
||||
|
||||
|
@@ -8,9 +8,9 @@ from ..utils import unified_strdate
|
||||
|
||||
|
||||
class LibsynIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||
'info_dict': {
|
||||
@@ -19,12 +19,24 @@ class LibsynIE(InfoExtractor):
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
|
||||
'md5': '6c5cb21acd622d754d3b1a92b582ce42',
|
||||
'info_dict': {
|
||||
'id': '3727166',
|
||||
'ext': 'mp3',
|
||||
'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
|
||||
'upload_date': '20150818',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
url = m.group('mainurl')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
@@ -32,20 +44,18 @@ class LibsynIE(InfoExtractor):
|
||||
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||
|
||||
podcast_title = self._search_regex(
|
||||
r'<h2>([^<]+)</h2>', webpage, 'title')
|
||||
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
|
||||
episode_title = self._search_regex(
|
||||
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
||||
r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
|
||||
|
||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
|
||||
'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
release_date = unified_strdate(self._search_regex(
|
||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||
|
||||
|
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -39,7 +40,6 @@ class LifeNewsIE(InfoExtractor):
|
||||
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
||||
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
||||
'upload_date': '20150402',
|
||||
'uploader': 'embed.life.ru',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/news/153461',
|
||||
@@ -50,7 +50,6 @@ class LifeNewsIE(InfoExtractor):
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
'uploader': 'embed.life.ru',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/video/13035',
|
||||
@@ -72,20 +71,20 @@ class LifeNewsIE(InfoExtractor):
|
||||
if not videos and not iframe_link:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage),
|
||||
' - Первый по срочным новостям — LIFE | NEWS')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
||||
comment_count = self._html_search_regex(
|
||||
r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)
|
||||
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
|
||||
webpage, 'comment count', fatal=False)
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False)
|
||||
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
|
@@ -11,13 +11,13 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LyndaBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
_NETRC_MACHINE = 'lynda'
|
||||
|
||||
@@ -30,18 +30,18 @@ class LyndaBaseIE(InfoExtractor):
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
'username': username.encode('utf-8'),
|
||||
'password': password.encode('utf-8'),
|
||||
'remember': 'false',
|
||||
'stayPut': 'false'
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
# Not (yet) logged in
|
||||
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
||||
m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page)
|
||||
if m is not None:
|
||||
response = m.group('json')
|
||||
response_json = json.loads(response)
|
||||
@@ -65,12 +65,21 @@ class LyndaBaseIE(InfoExtractor):
|
||||
'stayPut': 'false',
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8'))
|
||||
login_page = self._download_webpage(
|
||||
request, None,
|
||||
'Confirming log in and log out from another device')
|
||||
|
||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||
if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
|
||||
if 'login error' in login_page:
|
||||
mobj = re.search(
|
||||
r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>',
|
||||
login_page)
|
||||
if mobj:
|
||||
raise ExtractorError(
|
||||
'lynda returned error: %s - %s'
|
||||
% (mobj.group('title'), clean_html(mobj.group('description'))),
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
|
@@ -2,9 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class MalemotionIE(InfoExtractor):
|
||||
@@ -24,7 +22,7 @@ class MalemotionIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
video_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)</title', webpage, 'title')
|
||||
|
@@ -29,7 +29,7 @@ class MDRIE(InfoExtractor):
|
||||
doc = self._download_xml(domain + xmlurl, video_id)
|
||||
formats = []
|
||||
for a in doc.findall('./assets/asset'):
|
||||
url_el = a.find('.//progressiveDownloadUrl')
|
||||
url_el = a.find('./progressiveDownloadUrl')
|
||||
if url_el is None:
|
||||
continue
|
||||
abr = int(a.find('bitrateAudio').text) // 1000
|
||||
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -155,7 +156,7 @@ class MetacafeIE(InfoExtractor):
|
||||
video_url = None
|
||||
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
||||
if mobj is not None:
|
||||
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
||||
mediaURL = compat_urllib_parse_unquote(mobj.group(1))
|
||||
video_ext = mediaURL[-3:]
|
||||
|
||||
# Extract gdaKey if available
|
||||
|
@@ -5,6 +5,7 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -48,7 +49,7 @@ class MiTeleIE(InfoExtractor):
|
||||
domain = 'http://' + domain
|
||||
info_url = compat_urlparse.urljoin(
|
||||
domain,
|
||||
compat_urllib_parse.unquote(embed_data['flashvars']['host'])
|
||||
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
||||
)
|
||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||
|
||||
|
@@ -3,9 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
@@ -60,7 +58,7 @@ class MixcloudIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group(1)
|
||||
cloudcast_name = mobj.group(2)
|
||||
track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
|
||||
track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
|
@@ -5,9 +5,9 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class MofosexIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||
video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
|
@@ -9,7 +9,10 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class MonikerIE(InfoExtractor):
|
||||
@@ -24,6 +27,14 @@ class MonikerIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
'id': 'jih3nce3x6wn',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vidspot.net/l2ngsmhs8ci5',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
@@ -38,7 +49,10 @@ class MonikerIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
orig_video_id = self._match_id(url)
|
||||
video_id = remove_start(orig_video_id, 'embed-')
|
||||
url = url.replace(orig_video_id, video_id)
|
||||
assert re.match(self._VALID_URL, url) is not None
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>File Not Found<' in orig_webpage:
|
||||
|
58
youtube_dl/extractor/mwave.py
Normal file
58
youtube_dl/extractor/mwave.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class MwaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
|
||||
'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc',
|
||||
'info_dict': {
|
||||
'id': '168859',
|
||||
'ext': 'flv',
|
||||
'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'M COUNTDOWN',
|
||||
'duration': 206,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
vod_info = self._download_json(
|
||||
'http://mwave.interest.me/onair/vod_info.m?vodtype=CL§orid=&endinfo=Y&id=%s' % video_id,
|
||||
video_id, 'Download vod JSON')
|
||||
|
||||
formats = []
|
||||
for num, cdn_info in enumerate(vod_info['cdn']):
|
||||
stream_url = cdn_info.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
stream_name = cdn_info.get('name') or compat_str(num)
|
||||
f4m_stream = self._download_json(
|
||||
stream_url, video_id,
|
||||
'Download %s stream JSON' % stream_name)
|
||||
f4m_url = f4m_stream.get('fileurl')
|
||||
if not f4m_url:
|
||||
continue
|
||||
formats.extend(
|
||||
self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': vod_info['title'],
|
||||
'thumbnail': vod_info.get('cover'),
|
||||
'uploader': vod_info.get('program_title'),
|
||||
'duration': parse_duration(vod_info.get('time')),
|
||||
'view_count': int_or_none(vod_info.get('hit')),
|
||||
'formats': formats,
|
||||
}
|
@@ -35,7 +35,8 @@ class MySpassIE(InfoExtractor):
|
||||
|
||||
# get metadata
|
||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||
metadata = self._download_xml(metadata_url, video_id)
|
||||
metadata = self._download_xml(
|
||||
metadata_url, video_id, transform_source=lambda s: s.strip())
|
||||
|
||||
# extract values from metadata
|
||||
url_flv_el = metadata.find('url_flv')
|
||||
|
60
youtube_dl/extractor/myvi.py
Normal file
60
youtube_dl/extractor/myvi.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .vimple import SprutoBaseIE
|
||||
|
||||
|
||||
class MyviIE(SprutoBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
myvi\.(?:ru/player|tv)/
|
||||
(?:
|
||||
(?:
|
||||
embed/html|
|
||||
flash|
|
||||
api/Video/Get
|
||||
)/|
|
||||
content/preloader\.swf\?.*\bid=
|
||||
)
|
||||
(?P<id>[\da-zA-Z_-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://myvi.ru/player/embed/html/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
|
||||
'md5': '571bbdfba9f9ed229dc6d34cc0f335bf',
|
||||
'info_dict': {
|
||||
'id': 'f16b2bbd-cde8-481c-a981-7cd48605df43',
|
||||
'ext': 'mp4',
|
||||
'title': 'хозяин жизни',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 25,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://myvi.ru/player/content/preloader.swf?id=oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wOYf1WFpPfc_bWTKGVf_Zafr0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://myvi.ru/player/api/Video/Get/oOy4euHA6LVwNNAjhD9_Jq5Ha2Qf0rtVMVFMAZav8wObeRTZaCATzucDQIDph8hQU0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://myvi.tv/embed/html/oTGTNWdyz4Zwy_u1nraolwZ1odenTd9WkTnRfIL9y8VOgHYqOHApE575x4_xxS9Vn0?ap=0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://myvi.ru/player/flash/ocp2qZrHI-eZnHKQBK4cZV60hslH8LALnk0uBfKsB-Q4WnY26SeGoYPi8HWHxu0O30',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//myvi\.(?:ru/player|tv)/(?:embed/html|flash)/[^"]+)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
spruto = self._download_json(
|
||||
'http://myvi.ru/player/api/Video/Get/%s?sig' % video_id, video_id)['sprutoData']
|
||||
|
||||
return self._extract_spruto(spruto, video_id)
|
@@ -10,6 +10,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -107,7 +108,7 @@ class MyVideoIE(InfoExtractor):
|
||||
if not a == '_encxml':
|
||||
params[a] = b
|
||||
else:
|
||||
encxml = compat_urllib_parse.unquote(b)
|
||||
encxml = compat_urllib_parse_unquote(b)
|
||||
if not params.get('domain'):
|
||||
params['domain'] = 'www.myvideo.de'
|
||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||
@@ -135,7 +136,7 @@ class MyVideoIE(InfoExtractor):
|
||||
video_url = None
|
||||
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
|
||||
if mobj:
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
video_url = compat_urllib_parse_unquote(mobj.group(1))
|
||||
if 'myvideo2flash' in video_url:
|
||||
self.report_warning(
|
||||
'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
@@ -147,10 +148,10 @@ class MyVideoIE(InfoExtractor):
|
||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError('unable to extract url')
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
||||
video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
|
||||
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
|
||||
video_file = compat_urllib_parse.unquote(video_file)
|
||||
video_file = compat_urllib_parse_unquote(video_file)
|
||||
|
||||
if not video_file.endswith('f4m'):
|
||||
ppath, prefix = video_file.split('.')
|
||||
@@ -159,7 +160,7 @@ class MyVideoIE(InfoExtractor):
|
||||
video_playpath = ''
|
||||
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
webpage, 'title')
|
||||
|
@@ -8,25 +8,40 @@ from ..utils import (
|
||||
|
||||
|
||||
class NationalGeographicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||
_VALID_URL = r'http://video\.nationalgeographic\.com/.*?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||
'info_dict': {
|
||||
'id': '4DmDACA6Qtk_',
|
||||
'ext': 'flv',
|
||||
'title': 'Mating Crabs Busted by Sharks',
|
||||
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
{
|
||||
'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws',
|
||||
'info_dict': {
|
||||
'id': '_JeBD_D7PlS5',
|
||||
'ext': 'flv',
|
||||
'title': 'The Real Jaws',
|
||||
'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
||||
feed_url = self._search_regex(
|
||||
r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||
guid = self._search_regex(
|
||||
r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"',
|
||||
webpage, 'guid')
|
||||
|
||||
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||
@@ -34,5 +49,6 @@ class NationalGeographicIE(InfoExtractor):
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||
# For some reason, the normal links don't work and we must force the use of f4m
|
||||
# For some reason, the normal links don't work and we must force
|
||||
# the use of f4m
|
||||
{'force_smil_url': True}))
|
||||
|
@@ -124,7 +124,7 @@ class NBCSportsIE(InfoExtractor):
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
(?:feature|nightly-news)/[^/]+/(?P<title>.+))
|
||||
(?:watch|feature|nightly-news)/[^/]+/(?P<title>.+))
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
@@ -169,6 +169,10 @@ class NBCNewsIE(InfoExtractor):
|
||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -232,3 +236,28 @@ class NBCNewsIE(InfoExtractor):
|
||||
'url': info['videoAssets'][-1]['publicUrl'],
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
||||
class MSNBCIE(InfoExtractor):
|
||||
# https URLs redirect to corresponding http ones
|
||||
_VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||
'info_dict': {
|
||||
'id': 'n_hayes_Aimm_140801_272214',
|
||||
'ext': 'mp4',
|
||||
'title': 'The chaotic GOP immigration vote',
|
||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406937606,
|
||||
'upload_date': '20140802',
|
||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._html_search_meta('embedURL', webpage)
|
||||
return self.url_result(embed_url)
|
||||
|
459
youtube_dl/extractor/neteasemusic.py
Normal file
459
youtube_dl/extractor/neteasemusic.py
Normal file
@@ -0,0 +1,459 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from hashlib import md5
|
||||
from base64 import b64encode
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_str,
|
||||
compat_itertools_count,
|
||||
)
|
||||
|
||||
|
||||
class NetEaseMusicBaseIE(InfoExtractor):
|
||||
_FORMATS = ['bMusic', 'mMusic', 'hMusic']
|
||||
_NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
|
||||
_API_BASE = 'http://music.163.com/api/'
|
||||
|
||||
@classmethod
|
||||
def _encrypt(cls, dfsid):
|
||||
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
|
||||
string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
|
||||
salt_len = len(salt_bytes)
|
||||
for i in range(len(string_bytes)):
|
||||
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
|
||||
m = md5()
|
||||
m.update(bytes(string_bytes))
|
||||
result = b64encode(m.digest()).decode('ascii')
|
||||
return result.replace('/', '_').replace('+', '-')
|
||||
|
||||
@classmethod
|
||||
def extract_formats(cls, info):
|
||||
formats = []
|
||||
for song_format in cls._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
continue
|
||||
formats.append({
|
||||
'url': 'http://m1.music.126.net/%s/%s.%s' %
|
||||
(cls._encrypt(details['dfsId']), details['dfsId'],
|
||||
details['extension']),
|
||||
'ext': details.get('extension'),
|
||||
'abr': details.get('bitrate', 0) / 1000,
|
||||
'format_id': song_format,
|
||||
'filesize': details.get('size'),
|
||||
'asr': details.get('sr')
|
||||
})
|
||||
return formats
|
||||
|
||||
@classmethod
|
||||
def convert_milliseconds(cls, ms):
|
||||
return int(round(ms / 1000.0))
|
||||
|
||||
def query_api(self, endpoint, video_id, note):
|
||||
req = compat_urllib_request.Request('%s%s' % (self._API_BASE, endpoint))
|
||||
req.add_header('Referer', self._API_BASE)
|
||||
return self._download_json(req, video_id, note)
|
||||
|
||||
|
||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:song'
|
||||
IE_DESC = '网易云音乐'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/song?id=32102397',
|
||||
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
|
||||
'info_dict': {
|
||||
'id': '32102397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bad Blood (feat. Kendrick Lamar)',
|
||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||
'upload_date': '20150517',
|
||||
'timestamp': 1431878400,
|
||||
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
||||
},
|
||||
}, {
|
||||
'note': 'No lyrics translation.',
|
||||
'url': 'http://music.163.com/#/song?id=29822014',
|
||||
'info_dict': {
|
||||
'id': '29822014',
|
||||
'ext': 'mp3',
|
||||
'title': '听见下雨的声音',
|
||||
'creator': '周杰伦',
|
||||
'upload_date': '20141225',
|
||||
'timestamp': 1419523200,
|
||||
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
||||
},
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
'info_dict': {
|
||||
'id': '17241424',
|
||||
'ext': 'mp3',
|
||||
'title': 'Opus 28',
|
||||
'creator': 'Dustin O\'Halloran',
|
||||
'upload_date': '20080211',
|
||||
'timestamp': 1202745600,
|
||||
},
|
||||
}, {
|
||||
'note': 'Has translated name.',
|
||||
'url': 'http://music.163.com/#/song?id=22735043',
|
||||
'info_dict': {
|
||||
'id': '22735043',
|
||||
'ext': 'mp3',
|
||||
'title': '소원을 말해봐 (Genie)',
|
||||
'creator': '少女时代',
|
||||
'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
|
||||
'upload_date': '20100127',
|
||||
'timestamp': 1264608000,
|
||||
'alt_title': '说出愿望吧(Genie)',
|
||||
}
|
||||
}]
|
||||
|
||||
def _process_lyrics(self, lyrics_info):
|
||||
original = lyrics_info.get('lrc', {}).get('lyric')
|
||||
translated = lyrics_info.get('tlyric', {}).get('lyric')
|
||||
|
||||
if not translated:
|
||||
return original
|
||||
|
||||
lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
|
||||
original_ts_texts = re.findall(lyrics_expr, original)
|
||||
translation_ts_dict = dict(
|
||||
(time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
|
||||
)
|
||||
lyrics = '\n'.join([
|
||||
'%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
|
||||
for time_stamp, text in original_ts_texts
|
||||
])
|
||||
return lyrics
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
params = {
|
||||
'id': song_id,
|
||||
'ids': '[%s]' % song_id
|
||||
}
|
||||
info = self.query_api(
|
||||
'song/detail?' + compat_urllib_parse.urlencode(params),
|
||||
song_id, 'Downloading song info')['songs'][0]
|
||||
|
||||
formats = self.extract_formats(info)
|
||||
self._sort_formats(formats)
|
||||
|
||||
lyrics_info = self.query_api(
|
||||
'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
|
||||
song_id, 'Downloading lyrics data')
|
||||
lyrics = self._process_lyrics(lyrics_info)
|
||||
|
||||
alt_title = None
|
||||
if info.get('transNames'):
|
||||
alt_title = '/'.join(info.get('transNames'))
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'title': info['name'],
|
||||
'alt_title': alt_title,
|
||||
'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
|
||||
'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
|
||||
'thumbnail': info.get('album', {}).get('picUrl'),
|
||||
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||
'description': lyrics,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:album'
|
||||
IE_DESC = '网易云音乐 - 专辑'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://music.163.com/#/album?id=220780',
|
||||
'info_dict': {
|
||||
'id': '220780',
|
||||
'title': 'B\'day',
|
||||
},
|
||||
'playlist_count': 23,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'album/%s?id=%s' % (album_id, album_id),
|
||||
album_id, 'Downloading album data')['album']
|
||||
|
||||
name = info['name']
|
||||
desc = info.get('description')
|
||||
entries = [
|
||||
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||
'NetEaseMusic', song['id'])
|
||||
for song in info['songs']
|
||||
]
|
||||
return self.playlist_result(entries, album_id, name, desc)
|
||||
|
||||
|
||||
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:singer'
|
||||
IE_DESC = '网易云音乐 - 歌手'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'note': 'Singer has aliases.',
|
||||
'url': 'http://music.163.com/#/artist?id=10559',
|
||||
'info_dict': {
|
||||
'id': '10559',
|
||||
'title': '张惠妹 - aMEI;阿密特',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}, {
|
||||
'note': 'Singer has translated name.',
|
||||
'url': 'http://music.163.com/#/artist?id=124098',
|
||||
'info_dict': {
|
||||
'id': '124098',
|
||||
'title': '李昇基 - 이승기',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
singer_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'artist/%s?id=%s' % (singer_id, singer_id),
|
||||
singer_id, 'Downloading singer data')
|
||||
|
||||
name = info['artist']['name']
|
||||
if info['artist']['trans']:
|
||||
name = '%s - %s' % (name, info['artist']['trans'])
|
||||
if info['artist']['alias']:
|
||||
name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
|
||||
|
||||
entries = [
|
||||
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||
'NetEaseMusic', song['id'])
|
||||
for song in info['hotSongs']
|
||||
]
|
||||
return self.playlist_result(entries, singer_id, name)
|
||||
|
||||
|
||||
class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:playlist'
|
||||
IE_DESC = '网易云音乐 - 歌单'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/playlist?id=79177352',
|
||||
'info_dict': {
|
||||
'id': '79177352',
|
||||
'title': 'Billboard 2007 Top 100',
|
||||
'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
|
||||
},
|
||||
'playlist_count': 99,
|
||||
}, {
|
||||
'note': 'Toplist/Charts sample',
|
||||
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
|
||||
'info_dict': {
|
||||
'id': '3733003',
|
||||
'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
|
||||
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
|
||||
list_id, 'Downloading playlist data')['result']
|
||||
|
||||
name = info['name']
|
||||
desc = info.get('description')
|
||||
|
||||
if info.get('specialType') == 10: # is a chart/toplist
|
||||
datestamp = datetime.fromtimestamp(
|
||||
self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
|
||||
name = '%s %s' % (name, datestamp)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
|
||||
'NetEaseMusic', song['id'])
|
||||
for song in info['tracks']
|
||||
]
|
||||
return self.playlist_result(entries, list_id, name, desc)
|
||||
|
||||
|
||||
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:mv'
|
||||
IE_DESC = '网易云音乐 - MV'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://music.163.com/#/mv?id=415350',
|
||||
'info_dict': {
|
||||
'id': '415350',
|
||||
'ext': 'mp4',
|
||||
'title': '이럴거면 그러지말지',
|
||||
'description': '白雅言自作曲唱甜蜜爱情',
|
||||
'creator': '白雅言',
|
||||
'upload_date': '20150520',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mv_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'mv/detail?id=%s&type=mp4' % mv_id,
|
||||
mv_id, 'Downloading mv info')['data']
|
||||
|
||||
formats = [
|
||||
{'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
|
||||
for brs, mv_url in info['brs'].items()
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': mv_id,
|
||||
'title': info['name'],
|
||||
'description': info.get('desc') or info.get('briefDesc'),
|
||||
'creator': info['artistName'],
|
||||
'upload_date': info['publishTime'].replace('-', ''),
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('cover'),
|
||||
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||
}
|
||||
|
||||
|
||||
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:program'
|
||||
IE_DESC = '网易云音乐 - 电台节目'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/program?id=10109055',
|
||||
'info_dict': {
|
||||
'id': '10109055',
|
||||
'ext': 'mp3',
|
||||
'title': '不丹足球背后的故事',
|
||||
'description': '喜马拉雅人的足球梦 ...',
|
||||
'creator': '大话西藏',
|
||||
'timestamp': 1434179342,
|
||||
'upload_date': '20150613',
|
||||
'duration': 900,
|
||||
},
|
||||
}, {
|
||||
'note': 'This program has accompanying songs.',
|
||||
'url': 'http://music.163.com/#/program?id=10141022',
|
||||
'info_dict': {
|
||||
'id': '10141022',
|
||||
'title': '25岁,你是自在如风的少年<27°C>',
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'note': 'This program has accompanying songs.',
|
||||
'url': 'http://music.163.com/#/program?id=10141022',
|
||||
'info_dict': {
|
||||
'id': '10141022',
|
||||
'ext': 'mp3',
|
||||
'title': '25岁,你是自在如风的少年<27°C>',
|
||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||
'timestamp': 1434450841,
|
||||
'upload_date': '20150616',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_id = self._match_id(url)
|
||||
|
||||
info = self.query_api(
|
||||
'dj/program/detail?id=%s' % program_id,
|
||||
program_id, 'Downloading program info')['program']
|
||||
|
||||
name = info['name']
|
||||
description = info['description']
|
||||
|
||||
if not info['songs'] or self._downloader.params.get('noplaylist'):
|
||||
if info['songs']:
|
||||
self.to_screen(
|
||||
'Downloading just the main audio %s because of --no-playlist'
|
||||
% info['mainSong']['id'])
|
||||
|
||||
formats = self.extract_formats(info['mainSong'])
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': program_id,
|
||||
'title': name,
|
||||
'description': description,
|
||||
'creator': info['dj']['brand'],
|
||||
'timestamp': self.convert_milliseconds(info['createTime']),
|
||||
'thumbnail': info['coverUrl'],
|
||||
'duration': self.convert_milliseconds(info.get('duration', 0)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download the main audio %s'
|
||||
% (program_id, info['mainSong']['id']))
|
||||
|
||||
song_ids = [info['mainSong']['id']]
|
||||
song_ids.extend([song['id'] for song in info['songs']])
|
||||
entries = [
|
||||
self.url_result('http://music.163.com/#/song?id=%s' % song_id,
|
||||
'NetEaseMusic', song_id)
|
||||
for song_id in song_ids
|
||||
]
|
||||
return self.playlist_result(entries, program_id, name, description)
|
||||
|
||||
|
||||
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:djradio'
|
||||
IE_DESC = '网易云音乐 - 电台'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://music.163.com/#/djradio?id=42',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'title': '声音蔓延',
|
||||
'description': 'md5:766220985cbd16fdd552f64c578a6b15'
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
}
|
||||
_PAGE_SIZE = 1000
|
||||
|
||||
def _real_extract(self, url):
|
||||
dj_id = self._match_id(url)
|
||||
|
||||
name = None
|
||||
desc = None
|
||||
entries = []
|
||||
for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
|
||||
info = self.query_api(
|
||||
'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
|
||||
% (self._PAGE_SIZE, dj_id, offset),
|
||||
dj_id, 'Downloading dj programs - %d' % offset)
|
||||
|
||||
entries.extend([
|
||||
self.url_result(
|
||||
'http://music.163.com/#/program?id=%s' % program['id'],
|
||||
'NetEaseMusicProgram', program['id'])
|
||||
for program in info['programs']
|
||||
])
|
||||
|
||||
if name is None:
|
||||
radio = info['programs'][0]['radio']
|
||||
name = radio['name']
|
||||
desc = radio['desc']
|
||||
|
||||
if not info['more']:
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, dj_id, name, desc)
|
@@ -31,7 +31,7 @@ class NewstubeIE(InfoExtractor):
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_guid = self._html_search_regex(
|
||||
r'<meta property="og:video" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
r'<meta property="og:video:url" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
page, 'video GUID')
|
||||
|
||||
player = self._download_xml(
|
||||
|
@@ -6,6 +6,7 @@ from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class NextMediaIE(InfoExtractor):
|
||||
IE_DESC = '蘋果日報'
|
||||
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
||||
@@ -66,6 +67,7 @@ class NextMediaIE(InfoExtractor):
|
||||
|
||||
|
||||
class NextMediaActionNewsIE(NextMediaIE):
|
||||
IE_DESC = '蘋果日報 - 動新聞'
|
||||
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
||||
@@ -90,6 +92,7 @@ class NextMediaActionNewsIE(NextMediaIE):
|
||||
|
||||
|
||||
class AppleDailyIE(NextMediaIE):
|
||||
IE_DESC = '臺灣蘋果日報'
|
||||
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
|
@@ -195,7 +195,7 @@ class NocoIE(InfoExtractor):
|
||||
if episode_number:
|
||||
title += ' #' + compat_str(episode_number)
|
||||
if episode:
|
||||
title += ' - ' + episode
|
||||
title += ' - ' + compat_str(episode)
|
||||
|
||||
description = show.get('show_resume') or show.get('family_resume')
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user