Compare commits
804 Commits
2013.02.02
...
2013.07.25
Author | SHA1 | Date | |
---|---|---|---|
|
d0866f0bb4 | ||
|
09eeb75130 | ||
|
0a99956f71 | ||
|
12ef6aefa8 | ||
|
755eb0320e | ||
|
43ba5456b1 | ||
|
156d5ad6da | ||
|
c626a3d9fa | ||
|
b2e8bc1b20 | ||
|
771822ebb8 | ||
|
eb6a41ba0f | ||
|
7d2392691c | ||
|
c216c1894d | ||
|
3e1ad508eb | ||
|
a052c1d785 | ||
|
16484d4923 | ||
|
32a09b4382 | ||
|
870a7e6156 | ||
|
239e3e0cca | ||
|
b1ca5e3ffa | ||
|
b9a1252c96 | ||
|
fc492de31d | ||
|
a9c0f9bc63 | ||
|
b7cc9f5026 | ||
|
252580c561 | ||
|
acc47c1a3f | ||
|
70fa830e4d | ||
|
a7af0ebaf5 | ||
|
67ae7b4760 | ||
|
de48addae2 | ||
|
ddbfd0f0c5 | ||
|
d7ae0639b4 | ||
|
0382435990 | ||
|
b390d85d95 | ||
|
be925dc64c | ||
|
de7a91bfe3 | ||
|
a4358cbabd | ||
|
177ed935a9 | ||
|
c364f15ff1 | ||
|
e1f6e61e6a | ||
|
0932300e3a | ||
|
3f40217704 | ||
|
f631c3311a | ||
|
ad433bb372 | ||
|
3e0b3a1428 | ||
|
444b116597 | ||
|
2aea08eda1 | ||
|
8e5e059d7d | ||
|
2b1b511f6b | ||
|
233ad24ecf | ||
|
c4949c50f9 | ||
|
b6ef402905 | ||
|
ccf365475a | ||
|
e1fb245690 | ||
|
5a76c6517e | ||
|
1bb9568776 | ||
|
ecd1c2f7e9 | ||
|
466de68801 | ||
|
88d4111cfa | ||
|
51fb64bab1 | ||
|
be547e1d3b | ||
|
bf85454116 | ||
|
5910724b11 | ||
|
7e24b09da9 | ||
|
f085f960e7 | ||
|
f38de77f6e | ||
|
58e7d46d1b | ||
|
2a5201638d | ||
|
fe6fad1242 | ||
|
ec00e1d8a0 | ||
|
de29c4144e | ||
|
f3bab0044e | ||
|
ffd1833b87 | ||
|
896d5b63e8 | ||
|
67de24e449 | ||
|
66400c470c | ||
|
7665010267 | ||
|
5d9b75051a | ||
|
ab2f744b90 | ||
|
300fcad8a6 | ||
|
f7e025958a | ||
|
0ab5531363 | ||
|
b4444d5ca2 | ||
|
b9d3e1635f | ||
|
aa6b734e02 | ||
|
73b57f0ccb | ||
|
3c4e6d8337 | ||
|
36034aecc2 | ||
|
ffca4b5c32 | ||
|
b0e72bcf34 | ||
|
7fd930c0c8 | ||
|
2e78b2bead | ||
|
44dbe89035 | ||
|
2d5a8b5512 | ||
|
159736c1b8 | ||
|
46720279c2 | ||
|
d8269e1dfb | ||
|
cbdbb76665 | ||
|
6543f0dca5 | ||
|
232eb88bfe | ||
|
a95967f8b7 | ||
|
2ef648d3d3 | ||
|
33f6830fd5 | ||
|
606d7e67fd | ||
|
fd87ff26b9 | ||
|
85347e1cb6 | ||
|
41897817cc | ||
|
45ff2d51d0 | ||
|
5de3ece225 | ||
|
df50a41289 | ||
|
59ae56fad5 | ||
|
690e872c51 | ||
|
81082e046e | ||
|
3fa9550837 | ||
|
b1082f01a6 | ||
|
f35b84c807 | ||
|
117adb0f0f | ||
|
abb285fb1b | ||
|
a431154706 | ||
|
cfe50f04ed | ||
|
a7055eb956 | ||
|
0a1be1e997 | ||
|
c93898dae9 | ||
|
ebdf2af727 | ||
|
c108eb73cc | ||
|
3a1375dacf | ||
|
41bece30b4 | ||
|
16ea58cbda | ||
|
99e350d902 | ||
|
13e06d298c | ||
|
81f0259b9e | ||
|
fefcb5d314 | ||
|
345b0c9b46 | ||
|
20c3893f0e | ||
|
29293c1e09 | ||
|
5fe3a3c3fb | ||
|
b04621d155 | ||
|
b227060388 | ||
|
d93e4dcbb7 | ||
|
73e79f2a1b | ||
|
fc79158de2 | ||
|
7763b04e5f | ||
|
9d7b44b4cc | ||
|
897f36d179 | ||
|
94c3637f6d | ||
|
04cc96173c | ||
|
fbaaad49d7 | ||
|
b29f3b250d | ||
|
fa343954d4 | ||
|
2491f5898e | ||
|
b27c856fbc | ||
|
9941ceb331 | ||
|
c536d38059 | ||
|
8de64cac98 | ||
|
6d6d286539 | ||
|
5d2eac9eba | ||
|
9826925a20 | ||
|
24a267b562 | ||
|
d4da3d6116 | ||
|
d5a62e4f5f | ||
|
9a82b2389f | ||
|
8dba13f7e8 | ||
|
deacef651f | ||
|
2e1b3afeca | ||
|
652e776893 | ||
|
d055fe4cb0 | ||
|
131842bb0b | ||
|
59fc531f78 | ||
|
5c44c15438 | ||
|
62067cb9b8 | ||
|
0f81866329 | ||
|
2db67bc0f4 | ||
|
7dba9cd039 | ||
|
75dff0eef7 | ||
|
d828f3a550 | ||
|
bcd6e4bd07 | ||
|
53936f3d57 | ||
|
0beb3add18 | ||
|
f9bd64c098 | ||
|
d7f44b5bdb | ||
|
48bfb5f238 | ||
|
97ebe8dcaf | ||
|
d4409747ba | ||
|
37b6a6617f | ||
|
ca1c9cfe11 | ||
|
adeb4d7469 | ||
|
50587ee8ec | ||
|
8244288dfe | ||
|
6ffe72835a | ||
|
8ba5e990a5 | ||
|
9afb1afcc6 | ||
|
0e21093a8f | ||
|
9c5cd0948f | ||
|
1083705fe8 | ||
|
f3d294617f | ||
|
de33a30858 | ||
|
887a227953 | ||
|
705f6f35bc | ||
|
e648b22dbd | ||
|
257a2501fa | ||
|
99afb3ddd4 | ||
|
a3c776203f | ||
|
53f350c165 | ||
|
f46d31f948 | ||
|
bf64ff72db | ||
|
bc2884afc1 | ||
|
023fa8c440 | ||
|
427023a1e6 | ||
|
a924876fed | ||
|
3f223f7b2e | ||
|
fc2c063e1e | ||
|
20db33e299 | ||
|
c0109aa497 | ||
|
ba7a1de04d | ||
|
4269e78a80 | ||
|
6f5ac90cf3 | ||
|
de282fc217 | ||
|
ddbd903576 | ||
|
0c56a3f773 | ||
|
9d069c4778 | ||
|
0d843f796b | ||
|
67f51b3d8c | ||
|
5c5de1c79a | ||
|
0821771466 | ||
|
83f6f68e79 | ||
|
27473d18da | ||
|
0c6c096c20 | ||
|
52c8ade4ad | ||
|
0e853ca4c4 | ||
|
41beccbab0 | ||
|
2eb88d953f | ||
|
1f0483b4b1 | ||
|
6b47c7f24e | ||
|
d798e1c7a9 | ||
|
3a8736bd74 | ||
|
c8c5163618 | ||
|
500f3d2432 | ||
|
ed4a915e08 | ||
|
b8f7b1579a | ||
|
ed54491c60 | ||
|
e4decf2750 | ||
|
c90f13d106 | ||
|
62008f69c1 | ||
|
e88f5e0b4e | ||
|
769fda3c5a | ||
|
23300d7149 | ||
|
f5756f388a | ||
|
ee313cdcbf | ||
|
8b50fed04b | ||
|
5b66de8859 | ||
|
e38af9e00c | ||
|
6b37f0be55 | ||
|
6e5d5f2fc1 | ||
|
75c9481224 | ||
|
5746f9da99 | ||
|
112da0a0ce | ||
|
bcd606c0fe | ||
|
ed92bc9f6e | ||
|
9b0756f8f2 | ||
|
aa0c87391c | ||
|
b1dfdc51b1 | ||
|
2e32528012 | ||
|
f64e7695a1 | ||
|
5abeaf0650 | ||
|
8bcc355972 | ||
|
6b4642fae3 | ||
|
d1bd37deac | ||
|
405ec05cb2 | ||
|
52e8e1dc88 | ||
|
b98a6b2f72 | ||
|
0ca45b233f | ||
|
65cceef8f4 | ||
|
b004821fa9 | ||
|
81b42336ad | ||
|
c6c1974672 | ||
|
a545d1d262 | ||
|
037fcd0047 | ||
|
318452bc0c | ||
|
d746cd88c2 | ||
|
9c42603b5a | ||
|
ea93cce4f6 | ||
|
f4daa18152 | ||
|
9caa687d81 | ||
|
3b58c6fb54 | ||
|
5926c10690 | ||
|
df725153d2 | ||
|
d662896090 | ||
|
db241e8645 | ||
|
ead28ff30a | ||
|
515d7a5e73 | ||
|
14fbdc9cdd | ||
|
98bcd2834a | ||
|
f7ab6cbe16 | ||
|
28ef06f7c2 | ||
|
577d02370d | ||
|
50be92c11c | ||
|
d18596baf4 | ||
|
7ce7e39476 | ||
|
93eb15c573 | ||
|
9f4d83e3b1 | ||
|
1c251cd948 | ||
|
70d1924f8b | ||
|
7b4948b05f | ||
|
878b5d9f0d | ||
|
2bc1820660 | ||
|
8bf8b5a577 | ||
|
8222d8de88 | ||
|
c7253e2e8c | ||
|
d69cf69a6a | ||
|
d02ecdefab | ||
|
bc857bfce0 | ||
|
f8bf74575a | ||
|
964ac8b584 | ||
|
a3522dfddd | ||
|
d3a8613b6e | ||
|
200b388752 | ||
|
dabcaf3b06 | ||
|
e646ffe795 | ||
|
b0dcc3c47f | ||
|
b07d9c23c5 | ||
|
d71cae62cc | ||
|
633a50cf4b | ||
|
825e0984e2 | ||
|
d1cade5ade | ||
|
190717e31f | ||
|
0824c28c8b | ||
|
c59b4aaeef | ||
|
f9c6cbf002 | ||
|
b8fe71ab86 | ||
|
cb10cded2a | ||
|
cd8b830292 | ||
|
1ac4004f3a | ||
|
e17d368ae2 | ||
|
27110b0567 | ||
|
9fe4de3471 | ||
|
d26d440e19 | ||
|
9f5daf0006 | ||
|
eb1634cbf8 | ||
|
01c10ca26e | ||
|
45aef47281 | ||
|
ae287755b7 | ||
|
a37f27ae99 | ||
|
49f5f315fd | ||
|
97d2db017c | ||
|
2c64df0399 | ||
|
828400422a | ||
|
c3c77cec30 | ||
|
1183b85f50 | ||
|
0143dc029c | ||
|
e10e576fed | ||
|
78af8eb1d1 | ||
|
79e93125d0 | ||
|
48db0b1f4a | ||
|
8f0578f0fc | ||
|
250f557872 | ||
|
462dc88b17 | ||
|
570fa151fc | ||
|
9c286cfa00 | ||
|
80cbb6ddbb | ||
|
9fd5ce0cbe | ||
|
1736dec629 | ||
|
b8a360837a | ||
|
fc28721960 | ||
|
51ce3a75c9 | ||
|
335056663a | ||
|
5b286728de | ||
|
291a168bcc | ||
|
fda7d31aa0 | ||
|
cbf46c737c | ||
|
7beb36a529 | ||
|
153697660d | ||
|
60a72e8d45 | ||
|
426ff04282 | ||
|
a50e1b32e4 | ||
|
9eae41ddef | ||
|
aad0d6d5ba | ||
|
7aca14a1ec | ||
|
d1596ef439 | ||
|
ea63e4998b | ||
|
a08dfd27a8 | ||
|
f58848011e | ||
|
934858ad86 | ||
|
3c25b9abae | ||
|
3fc03845a1 | ||
|
9b122384e9 | ||
|
9f4e6bbaeb | ||
|
b05654f0e3 | ||
|
9b3a760bbb | ||
|
d5822b96b0 | ||
|
b3d14cbfa7 | ||
|
d6039175e5 | ||
|
97d6faaced | ||
|
219b8130df | ||
|
38cbc40a64 | ||
|
93d3a642a9 | ||
|
c5e8d7af0e | ||
|
d6983cb460 | ||
|
dd9829292e | ||
|
89cb0eb0b6 | ||
|
9b5fffb149 | ||
|
1f90438025 | ||
|
a130adb25b | ||
|
8756c5fe7a | ||
|
828dba2983 | ||
|
6b3f5a329b | ||
|
63ef586b05 | ||
|
383a6a61b1 | ||
|
4fdd4e6f6f | ||
|
01ba4b80a7 | ||
|
de66764e4e | ||
|
1037d53988 | ||
|
c3ab8f866c | ||
|
94eb2dd1fe | ||
|
346b5ce8fd | ||
|
b37fbb990b | ||
|
ef75f76f5c | ||
|
e296100005 | ||
|
953dd93a48 | ||
|
e704f4d378 | ||
|
77d0f05f71 | ||
|
50d2376769 | ||
|
759d525301 | ||
|
fcfa188548 | ||
|
f4c8bbcfc2 | ||
|
31eead52e7 | ||
|
038a3a1a61 | ||
|
587c68b2cd | ||
|
377fdf5dde | ||
|
5c67601931 | ||
|
68f54207a3 | ||
|
bb47437686 | ||
|
213b715893 | ||
|
449d5c910c | ||
|
0251f9c9c0 | ||
|
8bc7c3d858 | ||
|
af44c94862 | ||
|
36ed7177f0 | ||
|
32aa88bcae | ||
|
51090d636b | ||
|
31513ea6b9 | ||
|
88cebbd7b8 | ||
|
fb8f7280bc | ||
|
f380401bbd | ||
|
9abc6c8b31 | ||
|
8cd252f115 | ||
|
53f72b11e5 | ||
|
ee55fcbe12 | ||
|
78d3442b12 | ||
|
979a9dd4c4 | ||
|
d5979c5d55 | ||
|
8027175600 | ||
|
3054ff0cbe | ||
|
cd453d38bb | ||
|
f5a290eed9 | ||
|
ecb3e676a5 | ||
|
8b59a98610 | ||
|
8409501206 | ||
|
be95cac157 | ||
|
476203d025 | ||
|
468e2e926b | ||
|
ac3e9394e7 | ||
|
868d62a509 | ||
|
157b864a01 | ||
|
951b9dfd94 | ||
|
1142d31164 | ||
|
9131bde941 | ||
|
1132c10dc2 | ||
|
c978a96c02 | ||
|
71e458d437 | ||
|
57bde0d9c7 | ||
|
50b4d25980 | ||
|
eda60e8251 | ||
|
c794cbbb19 | ||
|
4a76d1dbe5 | ||
|
418f734a58 | ||
|
dc1c355b72 | ||
|
1b2b22ed9f | ||
|
f2cd958c0a | ||
|
57adeaea87 | ||
|
8f3f1aef05 | ||
|
51d2453c7a | ||
|
45014296be | ||
|
afef36c950 | ||
|
b31756c18e | ||
|
f008688520 | ||
|
5b68ea215b | ||
|
b1d568f0bc | ||
|
17bd1b2f41 | ||
|
5b0d3cc0cd | ||
|
d4f76f1674 | ||
|
340fa21198 | ||
|
de5d66d431 | ||
|
7bdb17d4d5 | ||
|
419c64b107 | ||
|
99a5ae3f8e | ||
|
c7563c528b | ||
|
e30e9318da | ||
|
5c51028d38 | ||
|
c1d58e1c67 | ||
|
02030ff7fe | ||
|
f45c185fa9 | ||
|
1bd96c3a60 | ||
|
929f85d851 | ||
|
98d4a4e6bc | ||
|
fb2f83360c | ||
|
3c5e7729e1 | ||
|
5a853e1423 | ||
|
2f58b12dad | ||
|
59f4fd4dc6 | ||
|
5738240ee8 | ||
|
86fd453ea8 | ||
|
c83411b9ee | ||
|
057c9938a1 | ||
|
9259966132 | ||
|
b08980412e | ||
|
532a1e0429 | ||
|
2a36c352a0 | ||
|
1a2adf3f49 | ||
|
43b62accbb | ||
|
be74864ace | ||
|
0ae456f08a | ||
|
0f75d25991 | ||
|
67129e4a15 | ||
|
dfb9323cf9 | ||
|
7f5bd09baf | ||
|
02d5eb935f | ||
|
94ca71b7cc | ||
|
b338f1b154 | ||
|
486f0c9476 | ||
|
d96680f58d | ||
|
f8602d3242 | ||
|
0c021ad171 | ||
|
086d7b4500 | ||
|
891629c84a | ||
|
ea6d901e51 | ||
|
4539dd30e6 | ||
|
c43e57242e | ||
|
db8fd71ca9 | ||
|
f4f316881d | ||
|
0e16f09474 | ||
|
09dd418f53 | ||
|
decd1d1737 | ||
|
180e689f7e | ||
|
7da5556ac2 | ||
|
f23a03a89b | ||
|
84e4682f0e | ||
|
1f99511210 | ||
|
0d94f2474c | ||
|
480b6c1e8b | ||
|
95464f14d1 | ||
|
c34407d16c | ||
|
5e34d2ebbf | ||
|
815dd2ffa8 | ||
|
ecd5fb49c5 | ||
|
b86174e7a3 | ||
|
2e2038dc35 | ||
|
46bfb42258 | ||
|
feecf22511 | ||
|
4c4f15eb78 | ||
|
104ccdb8b4 | ||
|
6ccff79594 | ||
|
aed523ecc1 | ||
|
d496a75d0a | ||
|
5c01dd1e73 | ||
|
11d9224e3b | ||
|
34c29ba1d7 | ||
|
6cd657f9f2 | ||
|
4ae9e55822 | ||
|
8749b71273 | ||
|
dbc50fdf82 | ||
|
b1d2ef9255 | ||
|
5fb16555af | ||
|
ba7c775a04 | ||
|
fe348844d9 | ||
|
767e00277f | ||
|
6ce533a220 | ||
|
08b2ac745a | ||
|
46a127eecb | ||
|
fc63faf070 | ||
|
9665577802 | ||
|
434aca5b14 | ||
|
e31852aba9 | ||
|
37254abc36 | ||
|
a11ea50319 | ||
|
81df121dd3 | ||
|
50f6412eb8 | ||
|
bf50b0383e | ||
|
bd55852517 | ||
|
4c9f7a9988 | ||
|
aba8df23ed | ||
|
3820df0106 | ||
|
e74c504f91 | ||
|
fa70605db2 | ||
|
0d173446ff | ||
|
320e26a0af | ||
|
a3d689cfb3 | ||
|
59cc5d9380 | ||
|
28535652ab | ||
|
7b670a4483 | ||
|
69fc019f26 | ||
|
613bf66939 | ||
|
9edb0916f4 | ||
|
f4b659f782 | ||
|
c70446c7df | ||
|
c76cb6d548 | ||
|
71f37e90ef | ||
|
75b5c590a8 | ||
|
4469666780 | ||
|
c15e024141 | ||
|
8cb94542f4 | ||
|
c681a03918 | ||
|
30f2999962 | ||
|
74e3452b9e | ||
|
9e1cf0c200 | ||
|
e11eb11906 | ||
|
c04bca6f60 | ||
|
b0936ef423 | ||
|
41a6eb949a | ||
|
f17ce13a92 | ||
|
8c416ad29a | ||
|
c72938240e | ||
|
e905b6f80e | ||
|
6de8f1afb7 | ||
|
9341212642 | ||
|
f7a9721e16 | ||
|
089e843b0f | ||
|
c8056d866a | ||
|
49da66e459 | ||
|
fb6c319904 | ||
|
5a8d13199c | ||
|
dce9027045 | ||
|
feba604e92 | ||
|
d22f65413a | ||
|
0599ef8c08 | ||
|
bfdf469295 | ||
|
32c96387c1 | ||
|
c8c5443bb5 | ||
|
a60b854d90 | ||
|
b8ad4f02a2 | ||
|
d281274bf2 | ||
|
b625bc2c31 | ||
|
f4381ab88a | ||
|
744435f2a4 | ||
|
855703e55e | ||
|
927c8c4924 | ||
|
0ba994e9e3 | ||
|
af9ad45cd4 | ||
|
e0fee250c3 | ||
|
72ca05016d | ||
|
844d1f9fa1 | ||
|
213c31ae16 | ||
|
04f3d551a0 | ||
|
e8600d69fd | ||
|
b03d65c237 | ||
|
8743974189 | ||
|
dc36bc9434 | ||
|
bce878a7c1 | ||
|
532d797824 | ||
|
146c12a2da | ||
|
d39919c03e | ||
|
df2dedeefb | ||
|
adb029ed81 | ||
|
43ff1a347d | ||
|
14294236bf | ||
|
c2b293ba30 | ||
|
37cd9f522f | ||
|
f33154cd39 | ||
|
bafeed9f5d | ||
|
ef767f9fd5 | ||
|
bc97f6d60c | ||
|
90a99c1b5e | ||
|
f375d4b7de | ||
|
fa41fbd318 | ||
|
6a205c8876 | ||
|
0fb3756409 | ||
|
fbbdf475b1 | ||
|
c238be3e3a | ||
|
1bf2801e6a | ||
|
c9c8402093 | ||
|
6060788083 | ||
|
e3700fc9e4 | ||
|
b693216d8d | ||
|
46b9d8295d | ||
|
7decf8951c | ||
|
1f46c15262 | ||
|
0cd358676c | ||
|
43113d92cc | ||
|
7eab8dc750 | ||
|
44e939514e | ||
|
95506f1235 | ||
|
a91556fd74 | ||
|
1447f728b5 | ||
|
d2c690828a | ||
|
cfa90f4adc | ||
|
898280a056 | ||
|
59b4a2f0e4 | ||
|
1ee9778405 | ||
|
db74c11d2b | ||
|
5011cded16 | ||
|
f10b2a9c14 | ||
|
5cb3c0b319 | ||
|
b9fc428494 | ||
|
c0ba104674 | ||
|
2a4093eaf3 | ||
|
9e62bc4439 | ||
|
553d097442 | ||
|
ae608b8076 | ||
|
c397187061 | ||
|
e32b06e977 | ||
|
8c42c506cd | ||
|
8cc83b8dbe | ||
|
51af426d89 | ||
|
08ec0af7c6 | ||
|
3b221c5406 | ||
|
3d3423574d | ||
|
e5edd51de4 | ||
|
64c78d50cc | ||
|
b3bcca0844 | ||
|
61e40c88a9 | ||
|
40634747f7 | ||
|
c2e21f2f0d | ||
|
47dcd621c0 | ||
|
a0d6fe7b92 | ||
|
c9fa1cbab6 | ||
|
8a38a194fb | ||
|
6ac7f082c4 | ||
|
f6e6da9525 | ||
|
597cc8a455 | ||
|
3370abd509 | ||
|
631f73978c | ||
|
e5f30ade10 | ||
|
6622d22c79 | ||
|
4e1582f372 | ||
|
967897fd22 | ||
|
f918ec7ea2 | ||
|
a2ae43a55f | ||
|
7ae153ee9c | ||
|
f7b567ff84 | ||
|
f2e237adc8 | ||
|
2e5457be1d | ||
|
7f9d41a55e | ||
|
8207626bbe | ||
|
df8db1aa21 | ||
|
691db5ba02 | ||
|
acb8752f80 | ||
|
679790eee1 | ||
|
6bf48bd866 | ||
|
790d4fcbe1 | ||
|
89de9eb125 | ||
|
6324fd1d74 | ||
|
9e07cf2955 | ||
|
f03b88b3fb | ||
|
97d0365f49 | ||
|
12887875a2 | ||
|
450e709972 | ||
|
9befce2b8c | ||
|
cb99797798 | ||
|
f82b28146a | ||
|
4dc72b830c | ||
|
ea05129ebd | ||
|
35d217133f | ||
|
d1b7a24354 | ||
|
c85538dba1 | ||
|
60bd48b175 | ||
|
4be0aa3539 | ||
|
f636c34481 | ||
|
3bf79c752e | ||
|
cdb130b09a | ||
|
2e5d60b7db | ||
|
8271226a55 | ||
|
1013186a17 | ||
|
7c038b3c32 | ||
|
c8cd8e5f55 | ||
|
471cf47796 | ||
|
d8f64574a4 | ||
|
e711babbd1 | ||
|
a72b0f2b6f | ||
|
434eb6f26b | ||
|
197080b10b | ||
|
7796e8c2cb | ||
|
6d4363368a | ||
|
414638cd50 | ||
|
2a9983b78f | ||
|
b17c974a88 | ||
|
5717d91ab7 | ||
|
79eb0287ab | ||
|
58994225bc | ||
|
59d4c2fe1b | ||
|
3a468f2d8b | ||
|
1ad5d872b9 | ||
|
355fc8e944 | ||
|
380a29dbf7 | ||
|
1528d6642d | ||
|
7311fef854 | ||
|
906417c7c5 | ||
|
6aabe82035 | ||
|
f0877a445e | ||
|
da06e2daf8 | ||
|
d3f5f9f6b9 | ||
|
bfc6ea7935 | ||
|
8edc2cf8ca | ||
|
fb778e66df | ||
|
3a9918d37f | ||
|
ccb0cae134 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -17,3 +17,4 @@ youtube-dl.tar.gz
|
||||
.coverage
|
||||
cover/
|
||||
updates_key.pem
|
||||
*.egg-info
|
@@ -8,6 +8,8 @@ notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
- phihag@phihag.de
|
||||
- jaime.marquinez.ferrandiz+travis@gmail.com
|
||||
- yasoob.khld@gmail.com
|
||||
# irc:
|
||||
# channels:
|
||||
# - "irc.freenode.org#youtube-dl"
|
||||
|
@@ -1,3 +1,5 @@
|
||||
include README.md
|
||||
include test/*.py
|
||||
include test/*.json
|
||||
include test/*.json
|
||||
include youtube-dl.bash-completion
|
||||
include youtube-dl.1
|
||||
|
31
Makefile
31
Makefile
@@ -1,14 +1,27 @@
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||
rm -rf youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
|
||||
PREFIX=/usr/local
|
||||
BINDIR=$(PREFIX)/bin
|
||||
MANDIR=$(PREFIX)/man
|
||||
SYSCONFDIR=/etc
|
||||
PYTHON=/usr/bin/env python
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
ifeq ($(PREFIX),/usr)
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
ifeq ($(PREFIX),/usr/local)
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
SYSCONFDIR=$(PREFIX)/etc
|
||||
endif
|
||||
endif
|
||||
|
||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
||||
install -d $(DESTDIR)$(BINDIR)
|
||||
install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
|
||||
@@ -23,17 +36,19 @@ test:
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
.PHONY: all clean install test tar
|
||||
.PHONY: all clean install test tar bash-completion pypi-files
|
||||
|
||||
youtube-dl: youtube_dl/*.py
|
||||
zip --quiet youtube-dl youtube_dl/*.py
|
||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1
|
||||
|
||||
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
|
||||
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
|
||||
echo '#!$(PYTHON)' > youtube-dl
|
||||
cat youtube-dl.zip >> youtube-dl
|
||||
rm youtube-dl.zip
|
||||
chmod a+x youtube-dl
|
||||
|
||||
README.md: youtube_dl/*.py
|
||||
README.md: youtube_dl/*.py youtube_dl/*/*.py
|
||||
COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
|
||||
|
||||
README.txt: README.md
|
||||
@@ -42,9 +57,11 @@ README.txt: README.md
|
||||
youtube-dl.1: README.md
|
||||
pandoc -s -f markdown -t man README.md -o youtube-dl.1
|
||||
|
||||
youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
|
||||
youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
|
||||
python devscripts/bash-completion.py
|
||||
|
||||
bash-completion: youtube-dl.bash-completion
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
|
244
README.md
244
README.md
@@ -1,7 +1,7 @@
|
||||
% YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
youtube-dl
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
**youtube-dl** [OPTIONS] URL [URL...]
|
||||
@@ -14,112 +14,145 @@ your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||
which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# OPTIONS
|
||||
-h, --help print this help text and exit
|
||||
--version print program version and exit
|
||||
-U, --update update this program to latest version
|
||||
-i, --ignore-errors continue on download errors
|
||||
-r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m)
|
||||
-R, --retries RETRIES number of retries (default is 10)
|
||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k) (default
|
||||
is 1024)
|
||||
--no-resize-buffer do not automatically adjust the buffer size. By
|
||||
default, the buffer size is automatically resized
|
||||
from an initial value of SIZE.
|
||||
--dump-user-agent display the current browser identification
|
||||
--user-agent UA specify a custom user agent
|
||||
--list-extractors List all supported extractors and the URLs they
|
||||
would handle
|
||||
-h, --help print this help text and exit
|
||||
--version print program version and exit
|
||||
-U, --update update this program to latest version. Make sure
|
||||
that you have sufficient permissions (run with
|
||||
sudo if needed)
|
||||
-i, --ignore-errors continue on download errors
|
||||
--dump-user-agent display the current browser identification
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video access
|
||||
is restricted to one domain
|
||||
--list-extractors List all supported extractors and the URLs they
|
||||
would handle
|
||||
--extractor-descriptions Output descriptions of all supported extractors
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER playlist video to end at (default is last)
|
||||
--match-title REGEX download only matching titles (regex or caseless
|
||||
sub-string)
|
||||
--reject-title REGEX skip download for matching titles (regex or
|
||||
caseless sub-string)
|
||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g.
|
||||
50k or 44.6m)
|
||||
--max-filesize SIZE Do not download any videos larger than SIZE (e.g.
|
||||
50k or 44.6m)
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER playlist video to end at (default is last)
|
||||
--match-title REGEX download only matching titles (regex or caseless
|
||||
sub-string)
|
||||
--reject-title REGEX skip download for matching titles (regex or
|
||||
caseless sub-string)
|
||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||
--min-filesize SIZE Do not download any videos smaller than SIZE
|
||||
(e.g. 50k or 44.6m)
|
||||
--max-filesize SIZE Do not download any videos larger than SIZE (e.g.
|
||||
50k or 44.6m)
|
||||
--date DATE download only videos uploaded in this date
|
||||
--datebefore DATE download only videos uploaded before this date
|
||||
--dateafter DATE download only videos uploaded after this date
|
||||
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)
|
||||
-R, --retries RETRIES number of retries (default is 10)
|
||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k)
|
||||
(default is 1024)
|
||||
--no-resize-buffer do not automatically adjust the buffer size. By
|
||||
default, the buffer size is automatically resized
|
||||
from an initial value of SIZE.
|
||||
|
||||
## Filesystem Options:
|
||||
-t, --title use title in file name
|
||||
--id use video ID in file name
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-A, --auto-number number downloaded files starting from 00000
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to get the
|
||||
title, %(uploader)s for the uploader name,
|
||||
%(uploader_id)s for the uploader nickname if
|
||||
different, %(autonumber)s to get an automatically
|
||||
incremented number, %(ext)s for the filename
|
||||
extension, %(upload_date)s for the upload date
|
||||
(YYYYMMDD), %(extractor)s for the provider
|
||||
(youtube, metacafe, etc), %(id)s for the video id
|
||||
and %% for a literal percent. Use - to output to
|
||||
stdout. Can also be used to download to a different
|
||||
directory, for example with -o '/my/downloads/%(upl
|
||||
oader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
--restrict-filenames Restrict filenames to only ASCII characters, and
|
||||
avoid "&" and spaces in filenames
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||
-w, --no-overwrites do not overwrite files
|
||||
-c, --continue resume partially downloaded files
|
||||
--no-continue do not resume partially downloaded files (restart
|
||||
from beginning)
|
||||
--cookies FILE file to read cookies from and dump cookie jar in
|
||||
--no-part do not use .part files
|
||||
--no-mtime do not use the Last-modified header to set the file
|
||||
modification time
|
||||
--write-description write video description to a .description file
|
||||
--write-info-json write video metadata to a .info.json file
|
||||
-t, --title use title in file name (default)
|
||||
--id use only video ID in file name
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-A, --auto-number number downloaded files starting from 00000
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to get
|
||||
the title, %(uploader)s for the uploader name,
|
||||
%(uploader_id)s for the uploader nickname if
|
||||
different, %(autonumber)s to get an automatically
|
||||
incremented number, %(ext)s for the filename
|
||||
extension, %(upload_date)s for the upload date
|
||||
(YYYYMMDD), %(extractor)s for the provider
|
||||
(youtube, metacafe, etc), %(id)s for the video id
|
||||
, %(playlist)s for the playlist the video is in,
|
||||
%(playlist_index)s for the position in the
|
||||
playlist and %% for a literal percent. Use - to
|
||||
output to stdout. Can also be used to download to
|
||||
a different directory, for example with -o '/my/d
|
||||
ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
|
||||
when it is present in output filename template or
|
||||
--autonumber option is given
|
||||
--restrict-filenames Restrict filenames to only ASCII characters, and
|
||||
avoid "&" and spaces in filenames
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||
-w, --no-overwrites do not overwrite files
|
||||
-c, --continue resume partially downloaded files
|
||||
--no-continue do not resume partially downloaded files (restart
|
||||
from beginning)
|
||||
--cookies FILE file to read cookies from and dump cookie jar in
|
||||
--no-part do not use .part files
|
||||
--no-mtime do not use the Last-modified header to set the
|
||||
file modification time
|
||||
--write-description write video description to a .description file
|
||||
--write-info-json write video metadata to a .info.json file
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
-s, --simulate do not download the video and do not write anything
|
||||
to disk
|
||||
--skip-download do not download the video
|
||||
-g, --get-url simulate, quiet but print URL
|
||||
-e, --get-title simulate, quiet but print title
|
||||
--get-thumbnail simulate, quiet but print thumbnail URL
|
||||
--get-description simulate, quiet but print video description
|
||||
--get-filename simulate, quiet but print output filename
|
||||
--get-format simulate, quiet but print output format
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
-v, --verbose print various debugging information
|
||||
-q, --quiet activates quiet mode
|
||||
-s, --simulate do not download the video and do not write
|
||||
anything to disk
|
||||
--skip-download do not download the video
|
||||
-g, --get-url simulate, quiet but print URL
|
||||
-e, --get-title simulate, quiet but print title
|
||||
--get-id simulate, quiet but print id
|
||||
--get-thumbnail simulate, quiet but print thumbnail URL
|
||||
--get-description simulate, quiet but print video description
|
||||
--get-filename simulate, quiet but print output filename
|
||||
--get-format simulate, quiet but print output format
|
||||
--newline output progress bar as new lines
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
-v, --verbose print various debugging information
|
||||
--dump-intermediate-pages print downloaded pages to debug problems(very
|
||||
verbose)
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific one is
|
||||
requested
|
||||
--max-quality FORMAT highest quality format to download
|
||||
-F, --list-formats list all available formats (currently youtube only)
|
||||
--write-srt write video closed captions to a .srt file
|
||||
(currently youtube only)
|
||||
--srt-lang LANG language of the closed captions to download
|
||||
(optional) use IETF language tags like 'en'
|
||||
-f, --format FORMAT video format code, specifiy the order of
|
||||
preference using slashes: "-f 22/17/18"
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific one
|
||||
is requested
|
||||
--max-quality FORMAT highest quality format to download
|
||||
-F, --list-formats list all available formats (currently youtube
|
||||
only)
|
||||
--write-sub write subtitle file (currently youtube only)
|
||||
--write-auto-sub write automatic subtitle file (currently youtube
|
||||
only)
|
||||
--only-sub [deprecated] alias of --skip-download
|
||||
--all-subs downloads all the available subtitles of the
|
||||
video (currently youtube only)
|
||||
--list-subs lists all available subtitles for the video
|
||||
(currently youtube only)
|
||||
--sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt)
|
||||
(currently youtube only)
|
||||
--sub-lang LANG language of the subtitles to download (optional)
|
||||
use IETF language tags like 'en'
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME account username
|
||||
-p, --password PASSWORD account password
|
||||
-n, --netrc use .netrc authentication data
|
||||
-u, --username USERNAME account username
|
||||
-p, --password PASSWORD account password
|
||||
-n, --netrc use .netrc authentication data
|
||||
--video-password PASSWORD video password (vimeo only)
|
||||
|
||||
## Post-processing Options:
|
||||
-x, --extract-audio convert video files to audio-only files (requires
|
||||
ffmpeg or avconv and ffprobe or avprobe)
|
||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or
|
||||
"wav"; best by default
|
||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a
|
||||
value between 0 (better) and 9 (worse) for VBR or a
|
||||
specific bitrate like 128K (default 5)
|
||||
--recode-video FORMAT Encode the video to another format if necessary
|
||||
(currently supported: mp4|flv|ogg|webm)
|
||||
-k, --keep-video keeps the video file on disk after the post-
|
||||
processing; the video is erased by default
|
||||
--no-post-overwrites do not overwrite post-processed files; the post-
|
||||
processed files are overwritten by default
|
||||
-x, --extract-audio convert video files to audio-only files (requires
|
||||
ffmpeg or avconv and ffprobe or avprobe)
|
||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or
|
||||
"wav"; best by default
|
||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert
|
||||
a value between 0 (better) and 9 (worse) for VBR
|
||||
or a specific bitrate like 128K (default 5)
|
||||
--recode-video FORMAT Encode the video to another format if necessary
|
||||
(currently supported: mp4|flv|ogg|webm)
|
||||
-k, --keep-video keeps the video file on disk after the post-
|
||||
processing; the video is erased by default
|
||||
--no-post-overwrites do not overwrite post-processed files; the post-
|
||||
processed files are overwritten by default
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
@@ -137,8 +170,10 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
|
||||
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
|
||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||
- `playlist`: The name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The index of the video in the playlist, a five-digit number.
|
||||
|
||||
The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment).
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
||||
@@ -147,15 +182,28 @@ In some cases, you don't want special characters such as 中, spaces, or &, such
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
youtube-dl_test_video_.mp4 # A simple file name
|
||||
|
||||
# VIDEO SELECTION
|
||||
|
||||
Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`, they accept dates in two formats:
|
||||
|
||||
- Absolute dates: Dates in the format `YYYYMMDD`.
|
||||
- Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?`
|
||||
|
||||
Examples:
|
||||
|
||||
$ youtube-dl --dateafter now-6months #will only download the videos uploaded in the last 6 months
|
||||
$ youtube-dl --date 19700101 #will only download the videos uploaded in January 1, 1970
|
||||
$ youtube-dl --dateafter 20000101 --datebefore 20100101 #will only download the videos uploaded between 2000 and 2010
|
||||
|
||||
# FAQ
|
||||
|
||||
### Can you please put the -b option back?
|
||||
|
||||
Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it.
|
||||
Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it.
|
||||
|
||||
### I get HTTP error 402 when trying to download a video. What's this?
|
||||
|
||||
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
|
||||
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
|
||||
|
||||
### I have downloaded a video but how can I play it?
|
||||
|
||||
|
57
devscripts/gh-pages/update-feed.py
Executable file
57
devscripts/gh-pages/update-feed.py
Executable file
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import datetime
|
||||
|
||||
import textwrap
|
||||
|
||||
import json
|
||||
|
||||
atom_template=textwrap.dedent("""\
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<atom:feed xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<atom:title>youtube-dl releases</atom:title>
|
||||
<atom:id>youtube-dl-updates-feed</atom:id>
|
||||
<atom:updated>@TIMESTAMP@</atom:updated>
|
||||
@ENTRIES@
|
||||
</atom:feed>""")
|
||||
|
||||
entry_template=textwrap.dedent("""
|
||||
<atom:entry>
|
||||
<atom:id>youtube-dl-@VERSION@</atom:id>
|
||||
<atom:title>New version @VERSION@</atom:title>
|
||||
<atom:link href="http://rg3.github.io/youtube-dl" />
|
||||
<atom:content type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">
|
||||
Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a>
|
||||
</div>
|
||||
</atom:content>
|
||||
<atom:author>
|
||||
<atom:name>The youtube-dl maintainers</atom:name>
|
||||
</atom:author>
|
||||
<atom:updated>@TIMESTAMP@</atom:updated>
|
||||
</atom:entry>
|
||||
""")
|
||||
|
||||
now = datetime.datetime.now()
|
||||
now_iso = now.isoformat()
|
||||
|
||||
atom_template = atom_template.replace('@TIMESTAMP@',now_iso)
|
||||
|
||||
entries=[]
|
||||
|
||||
versions_info = json.load(open('update/versions.json'))
|
||||
versions = list(versions_info['versions'].keys())
|
||||
versions.sort()
|
||||
|
||||
for v in versions:
|
||||
entry = entry_template.replace('@TIMESTAMP@',v.replace('.','-'))
|
||||
entry = entry.replace('@VERSION@',v)
|
||||
entries.append(entry)
|
||||
|
||||
entries_str = textwrap.indent(''.join(entries), '\t')
|
||||
atom_template = atom_template.replace('@ENTRIES@', entries_str)
|
||||
|
||||
with open('update/releases.atom','w',encoding='utf-8') as atom_file:
|
||||
atom_file.write(atom_template)
|
||||
|
||||
|
@@ -14,25 +14,35 @@
|
||||
|
||||
set -e
|
||||
|
||||
skip_tests=false
|
||||
if [ "$1" = '--skip-test' ]; then
|
||||
skip_tests=true
|
||||
shift
|
||||
fi
|
||||
|
||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
||||
version="$1"
|
||||
if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
|
||||
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
|
||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||
|
||||
echo "\n### First of all, testing..."
|
||||
make clean
|
||||
nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make cleanall
|
||||
if $skip_tests ; then
|
||||
echo 'SKIPPING TESTS'
|
||||
else
|
||||
nosetests --verbose --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1
|
||||
fi
|
||||
|
||||
echo "\n### Changing version in version.py..."
|
||||
/bin/echo -e "\n### Changing version in version.py..."
|
||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||
|
||||
echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
|
||||
/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
|
||||
make README.md
|
||||
git add CHANGELOG README.md youtube_dl/version.py
|
||||
git commit -m "release $version"
|
||||
|
||||
echo "\n### Now tagging, signing and pushing..."
|
||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||
git tag -s -m "Release $version" "$version"
|
||||
git show "$version"
|
||||
read -p "Is it good, can I push? (y/n) " -n 1
|
||||
@@ -42,7 +52,7 @@ MASTER=$(git rev-parse --abbrev-ref HEAD)
|
||||
git push origin $MASTER:master
|
||||
git push origin "$version"
|
||||
|
||||
echo "\n### OK, now it is time to build the binaries..."
|
||||
/bin/echo -e "\n### OK, now it is time to build the binaries..."
|
||||
REV=$(git rev-parse HEAD)
|
||||
make youtube-dl youtube-dl.tar.gz
|
||||
wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
|
||||
@@ -57,11 +67,13 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
||||
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
|
||||
git checkout HEAD -- youtube-dl youtube-dl.exe
|
||||
|
||||
echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
|
||||
/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
|
||||
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
|
||||
scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/
|
||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
|
||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
|
||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||
|
||||
echo "\n### Now switching to gh-pages..."
|
||||
/bin/echo -e "\n### Now switching to gh-pages..."
|
||||
git clone --branch gh-pages --single-branch . build/gh-pages
|
||||
ROOT=$(pwd)
|
||||
(
|
||||
@@ -69,6 +81,7 @@ ROOT=$(pwd)
|
||||
ORIGIN_URL=$(git config --get remote.origin.url)
|
||||
cd build/gh-pages
|
||||
"$ROOT/devscripts/gh-pages/add-version.py" $version
|
||||
"$ROOT/devscripts/gh-pages/update-feed.py"
|
||||
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
|
||||
"$ROOT/devscripts/gh-pages/generate-download.py"
|
||||
"$ROOT/devscripts/gh-pages/update-copyright.py"
|
||||
@@ -83,7 +96,9 @@ ROOT=$(pwd)
|
||||
)
|
||||
rm -rf build
|
||||
|
||||
make pypi-files
|
||||
echo "Uploading to PyPi ..."
|
||||
pip sdist upload
|
||||
python setup.py sdist upload
|
||||
make clean
|
||||
|
||||
echo "\n### DONE!"
|
||||
/bin/echo -e "\n### DONE!"
|
||||
|
@@ -40,7 +40,7 @@ raw_input()
|
||||
|
||||
filename = sys.argv[0]
|
||||
|
||||
UPDATE_URL = "http://rg3.github.com/youtube-dl/update/"
|
||||
UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
|
||||
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
|
||||
JSON_URL = UPDATE_URL + 'versions.json'
|
||||
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
|
||||
|
92
devscripts/youtube_genalgo.py
Normal file
92
devscripts/youtube_genalgo.py
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Generate youtube signature algorithm from test cases
|
||||
|
||||
import sys
|
||||
|
||||
tests = [
|
||||
# 92 - vflQw-fB4 2013/07/17
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
|
||||
"mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
|
||||
# 90
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
|
||||
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
|
||||
# 88
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
|
||||
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
|
||||
# 87 - vflART1Nf 2013/07/24
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
|
||||
# 86 - vfl_ymO4Z 2013/06/27
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
|
||||
# 85 - vflSAFCP9 2013/07/19
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
|
||||
# 84
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
|
||||
# 83 - vflcaqGO8 2013/07/11
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
"urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
|
||||
# 82
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
|
||||
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
|
||||
# 81
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
|
||||
"urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."),
|
||||
]
|
||||
|
||||
def find_matching(wrong, right):
|
||||
idxs = [wrong.index(c) for c in right]
|
||||
return compress(idxs)
|
||||
return ('s[%d]' % i for i in idxs)
|
||||
|
||||
def compress(idxs):
|
||||
def _genslice(start, end, step):
|
||||
starts = '' if start == 0 else str(start)
|
||||
ends = ':%d' % (end+step)
|
||||
steps = '' if step == 1 else (':%d' % step)
|
||||
return 's[%s%s%s]' % (starts, ends, steps)
|
||||
|
||||
step = None
|
||||
for i, prev in zip(idxs[1:], idxs[:-1]):
|
||||
if step is not None:
|
||||
if i - prev == step:
|
||||
continue
|
||||
yield _genslice(start, prev, step)
|
||||
step = None
|
||||
continue
|
||||
if i - prev in [-1, 1]:
|
||||
step = i - prev
|
||||
start = prev
|
||||
continue
|
||||
else:
|
||||
yield 's[%d]' % prev
|
||||
if step is None:
|
||||
yield 's[%d]' % i
|
||||
else:
|
||||
yield _genslice(start, i, step)
|
||||
|
||||
def _assert_compress(inp, exp):
|
||||
res = list(compress(inp))
|
||||
if res != exp:
|
||||
print('Got %r, expected %r' % (res, exp))
|
||||
assert res == exp
|
||||
_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]'])
|
||||
_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]'])
|
||||
_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]'])
|
||||
|
||||
def gen(wrong, right, indent):
|
||||
code = ' + '.join(find_matching(wrong, right))
|
||||
return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code)
|
||||
|
||||
def genall(tests):
|
||||
indent = ' ' * 8
|
||||
return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests)
|
||||
|
||||
def main():
|
||||
print(genall(tests))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
46
setup.py
46
setup.py
@@ -2,6 +2,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import pkg_resources
|
||||
import sys
|
||||
|
||||
@@ -11,8 +12,9 @@ except ImportError:
|
||||
from distutils.core import setup
|
||||
|
||||
try:
|
||||
# This will create an exe that needs Microsoft Visual C++ 2008
|
||||
# Redistributable Package
|
||||
import py2exe
|
||||
"""This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package"""
|
||||
except ImportError:
|
||||
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||
print("Cannot import py2exe", file=sys.stderr)
|
||||
@@ -23,15 +25,17 @@ py2exe_options = {
|
||||
"compressed": 1,
|
||||
"optimize": 2,
|
||||
"dist_dir": '.',
|
||||
"dll_excludes": ['w9xpopen.exe']
|
||||
"dll_excludes": ['w9xpopen.exe'],
|
||||
}
|
||||
|
||||
py2exe_console = [{
|
||||
"script": "./youtube_dl/__main__.py",
|
||||
"dest_base": "youtube-dl",
|
||||
}]
|
||||
|
||||
py2exe_params = {
|
||||
'console': py2exe_console,
|
||||
'options': { "py2exe": py2exe_options },
|
||||
'options': {"py2exe": py2exe_options},
|
||||
'zipfile': None
|
||||
}
|
||||
|
||||
@@ -40,30 +44,34 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||
else:
|
||||
params = {
|
||||
'scripts': ['bin/youtube-dl'],
|
||||
'data_files': [('etc/bash_completion.d', ['youtube-dl.bash-completion']), # Installing system-wide would require sudo...
|
||||
('share/doc/youtube_dl', ['README.txt']),
|
||||
('share/man/man1/', ['youtube-dl.1'])]
|
||||
'data_files': [ # Installing system-wide would require sudo...
|
||||
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
||||
('share/doc/youtube_dl', ['README.txt']),
|
||||
('share/man/man1/', ['youtube-dl.1'])
|
||||
]
|
||||
}
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec'))
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
|
||||
setup(
|
||||
name = 'youtube_dl',
|
||||
version = __version__,
|
||||
description = 'YouTube video downloader',
|
||||
long_description = 'Small command-line program to download videos from YouTube.com and other video sites.',
|
||||
url = 'https://github.com/rg3/youtube-dl',
|
||||
author = 'Ricardo Garcia',
|
||||
maintainer = 'Philipp Hagemeister',
|
||||
maintainer_email = 'phihag@phihag.de',
|
||||
packages = ['youtube_dl'],
|
||||
name='youtube_dl',
|
||||
version=__version__,
|
||||
description='YouTube video downloader',
|
||||
long_description='Small command-line program to download videos from'
|
||||
' YouTube.com and other video sites.',
|
||||
url='https://github.com/rg3/youtube-dl',
|
||||
author='Ricardo Garcia',
|
||||
maintainer='Philipp Hagemeister',
|
||||
maintainer_email='phihag@phihag.de',
|
||||
packages=['youtube_dl', 'youtube_dl.extractor'],
|
||||
|
||||
# Provokes warning on most systems (why?!)
|
||||
#test_suite = 'nose.collector',
|
||||
#test_requires = ['nosetest'],
|
||||
# test_suite = 'nose.collector',
|
||||
# test_requires = ['nosetest'],
|
||||
|
||||
classifiers = [
|
||||
classifiers=[
|
||||
"Topic :: Multimedia :: Video",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Console",
|
||||
|
44
test/helper.py
Normal file
44
test/helper.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import io
|
||||
import json
|
||||
import os.path
|
||||
|
||||
import youtube_dl.extractor
|
||||
from youtube_dl import YoutubeDL, YoutubeDLHandler
|
||||
from youtube_dl.utils import (
|
||||
compat_cookiejar,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
# General configuration (from __init__, not very elegant...)
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
parameters = json.load(pf)
|
||||
|
||||
class FakeYDL(YoutubeDL):
|
||||
def __init__(self):
|
||||
self.result = []
|
||||
# Different instances of the downloader can't share the same dictionary
|
||||
# some test set the "sublang" parameter, which would break the md5 checks.
|
||||
self.params = dict(parameters)
|
||||
def to_screen(self, s):
|
||||
print(s)
|
||||
def trouble(self, s, tb=None):
|
||||
raise Exception(s)
|
||||
def download(self, x):
|
||||
self.result.append(x)
|
||||
|
||||
def get_testcases():
|
||||
for ie in youtube_dl.extractor.gen_extractors():
|
||||
t = getattr(ie, '_TEST', None)
|
||||
if t:
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
||||
for t in getattr(ie, '_TESTS', []):
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
@@ -29,6 +29,7 @@
|
||||
"simulate": false,
|
||||
"skip_download": false,
|
||||
"subtitleslang": null,
|
||||
"subtitlesformat": "srt",
|
||||
"test": true,
|
||||
"updatetime": true,
|
||||
"usenetrc": false,
|
||||
@@ -36,5 +37,8 @@
|
||||
"verbose": true,
|
||||
"writedescription": false,
|
||||
"writeinfojson": true,
|
||||
"writesubtitles": false
|
||||
}
|
||||
"writesubtitles": false,
|
||||
"onlysubtitles": false,
|
||||
"allsubtitles": false,
|
||||
"listssubtitles": false
|
||||
}
|
||||
|
@@ -7,21 +7,71 @@ import unittest
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE
|
||||
from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors
|
||||
from helper import get_testcases
|
||||
|
||||
class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_youtube_playlist_matching(self):
|
||||
self.assertTrue(YoutubePlaylistIE().suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
||||
self.assertTrue(YoutubePlaylistIE().suitable(u'PL63F0C78739B09958'))
|
||||
self.assertFalse(YoutubePlaylistIE().suitable(u'PLtS2H6bU1M'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
|
||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
|
||||
|
||||
def test_youtube_matching(self):
|
||||
self.assertTrue(YoutubeIE().suitable(u'PLtS2H6bU1M'))
|
||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
||||
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
||||
|
||||
def test_youtube_channel_matching(self):
|
||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
|
||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
|
||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
|
||||
|
||||
def test_justin_tv_channelid_matching(self):
|
||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/"))
|
||||
|
||||
def test_justintv_videoid_matching(self):
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483"))
|
||||
|
||||
def test_justin_tv_chapterid_matching(self):
|
||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||
|
||||
def test_youtube_extract(self):
|
||||
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
for tc in get_testcases():
|
||||
url = tc['url']
|
||||
for ie in ies:
|
||||
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']:
|
||||
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
|
||||
else:
|
||||
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
|
||||
|
||||
def test_keywords(self):
|
||||
ies = gen_extractors()
|
||||
matching_ies = lambda url: [ie.IE_NAME for ie in ies
|
||||
if ie.suitable(url) and ie.IE_NAME != 'generic']
|
||||
self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
|
||||
self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
|
||||
self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
|
||||
self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -7,19 +7,19 @@ import os
|
||||
import json
|
||||
import unittest
|
||||
import sys
|
||||
import hashlib
|
||||
import socket
|
||||
import binascii
|
||||
|
||||
# Allow direct execution
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import youtube_dl.FileDownloader
|
||||
import youtube_dl.InfoExtractors
|
||||
import youtube_dl.YoutubeDL
|
||||
from youtube_dl.utils import *
|
||||
|
||||
DEF_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tests.json')
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
|
||||
RETRIES = 3
|
||||
|
||||
# General configuration (from __init__, not very elegant...)
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
@@ -36,26 +36,33 @@ def _try_rm(filename):
|
||||
if ose.errno != errno.ENOENT:
|
||||
raise
|
||||
|
||||
class FileDownloader(youtube_dl.FileDownloader):
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.to_stderr = self.to_screen
|
||||
self.processed_info_dicts = []
|
||||
return youtube_dl.FileDownloader.__init__(self, *args, **kwargs)
|
||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
||||
def report_warning(self, message):
|
||||
# Don't accept warnings during tests
|
||||
raise ExtractorError(message)
|
||||
def process_info(self, info_dict):
|
||||
self.processed_info_dicts.append(info_dict)
|
||||
return youtube_dl.FileDownloader.process_info(self, info_dict)
|
||||
return super(YoutubeDL, self).process_info(info_dict)
|
||||
|
||||
def _file_md5(fn):
|
||||
with open(fn, 'rb') as f:
|
||||
return hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
with io.open(DEF_FILE, encoding='utf-8') as deff:
|
||||
defs = json.load(deff)
|
||||
from helper import get_testcases
|
||||
defs = get_testcases()
|
||||
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
parameters = json.load(pf)
|
||||
|
||||
|
||||
class TestDownload(unittest.TestCase):
|
||||
maxDiff = None
|
||||
def setUp(self):
|
||||
self.parameters = parameters
|
||||
self.defs = defs
|
||||
@@ -64,29 +71,29 @@ class TestDownload(unittest.TestCase):
|
||||
def generator(test_case):
|
||||
|
||||
def test_template(self):
|
||||
ie = getattr(youtube_dl.InfoExtractors, test_case['name'] + 'IE')
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
def print_skipping(reason):
|
||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||
if not ie._WORKING:
|
||||
print('Skipping: IE marked as not _WORKING')
|
||||
print_skipping('IE marked as not _WORKING')
|
||||
return
|
||||
if 'playlist' not in test_case and not test_case['file']:
|
||||
print('Skipping: No output file specified')
|
||||
print_skipping('No output file specified')
|
||||
return
|
||||
if 'skip' in test_case:
|
||||
print('Skipping: {0}'.format(test_case['skip']))
|
||||
print_skipping(test_case['skip'])
|
||||
return
|
||||
|
||||
params = self.parameters.copy()
|
||||
params.update(test_case.get('params', {}))
|
||||
|
||||
fd = FileDownloader(params)
|
||||
fd.add_info_extractor(ie())
|
||||
for ien in test_case.get('add_ie', []):
|
||||
fd.add_info_extractor(getattr(youtube_dl.InfoExtractors, ien + 'IE')())
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_default_info_extractors()
|
||||
finished_hook_called = set()
|
||||
def _hook(status):
|
||||
if status['status'] == 'finished':
|
||||
finished_hook_called.add(status['filename'])
|
||||
fd.add_progress_hook(_hook)
|
||||
ydl.fd.add_progress_hook(_hook)
|
||||
|
||||
test_cases = test_case.get('playlist', [test_case])
|
||||
for tc in test_cases:
|
||||
@@ -94,7 +101,19 @@ def generator(test_case):
|
||||
_try_rm(tc['file'] + '.part')
|
||||
_try_rm(tc['file'] + '.info.json')
|
||||
try:
|
||||
fd.download([test_case['url']])
|
||||
for retry in range(1, RETRIES + 1):
|
||||
try:
|
||||
ydl.download([test_case['url']])
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
if retry == RETRIES: raise
|
||||
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
||||
raise
|
||||
|
||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
|
||||
else:
|
||||
break
|
||||
|
||||
for tc in test_cases:
|
||||
if not test_case.get('params', {}).get('skip_download', False):
|
||||
@@ -106,8 +125,25 @@ def generator(test_case):
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
|
||||
info_dict = json.load(infof)
|
||||
for (info_field, value) in tc.get('info_dict', {}).items():
|
||||
self.assertEqual(value, info_dict.get(info_field))
|
||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
|
||||
else:
|
||||
got = info_dict.get(info_field)
|
||||
self.assertEqual(
|
||||
expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# If checkable fields are missing from the test case, print the info_dict
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in info_dict.items()
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
|
||||
if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
|
||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')
|
||||
|
||||
# Check for the presence of mandatory fields
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
||||
finally:
|
||||
for tc in test_cases:
|
||||
_try_rm(tc['file'])
|
||||
@@ -117,9 +153,14 @@ def generator(test_case):
|
||||
return test_template
|
||||
|
||||
### And add them to TestDownload
|
||||
for test_case in defs:
|
||||
for n, test_case in enumerate(defs):
|
||||
test_method = generator(test_case)
|
||||
test_method.__name__ = "test_{0}".format(test_case["name"])
|
||||
tname = 'test_' + str(test_case['name'])
|
||||
i = 1
|
||||
while hasattr(TestDownload, tname):
|
||||
tname = 'test_' + str(test_case['name']) + '_' + str(i)
|
||||
i += 1
|
||||
test_method.__name__ = tname
|
||||
setattr(TestDownload, test_method.__name__, test_method)
|
||||
del test_method
|
||||
|
||||
|
@@ -4,6 +4,7 @@
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import xml.etree.ElementTree
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
@@ -14,6 +15,9 @@ from youtube_dl.utils import timeconvert
|
||||
from youtube_dl.utils import sanitize_filename
|
||||
from youtube_dl.utils import unescapeHTML
|
||||
from youtube_dl.utils import orderedSet
|
||||
from youtube_dl.utils import DateRange
|
||||
from youtube_dl.utils import unified_strdate
|
||||
from youtube_dl.utils import find_xpath_attr
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
_compat_str = lambda b: b.decode('unicode-escape')
|
||||
@@ -95,6 +99,33 @@ class TestUtil(unittest.TestCase):
|
||||
|
||||
def test_unescape_html(self):
|
||||
self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
|
||||
|
||||
def test_daterange(self):
|
||||
_20century = DateRange("19000101","20000101")
|
||||
self.assertFalse("17890714" in _20century)
|
||||
_ac = DateRange("00010101")
|
||||
self.assertTrue("19690721" in _ac)
|
||||
_firstmilenium = DateRange(end="10000101")
|
||||
self.assertTrue("07110427" in _firstmilenium)
|
||||
|
||||
def test_unified_dates(self):
|
||||
self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
|
||||
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = u'''<root>
|
||||
<node/>
|
||||
<node x="a"/>
|
||||
<node x="a" y="c" />
|
||||
<node x="b" y="d" />
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -9,8 +9,8 @@ import unittest
|
||||
# Allow direct execution
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import youtube_dl.FileDownloader
|
||||
import youtube_dl.InfoExtractors
|
||||
import youtube_dl.YoutubeDL
|
||||
import youtube_dl.extractor
|
||||
from youtube_dl.utils import *
|
||||
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
@@ -22,9 +22,9 @@ proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
|
||||
class FileDownloader(youtube_dl.FileDownloader):
|
||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
youtube_dl.FileDownloader.__init__(self, *args, **kwargs)
|
||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
||||
self.to_stderr = self.to_screen
|
||||
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
@@ -48,10 +48,10 @@ class TestInfoJSON(unittest.TestCase):
|
||||
self.tearDown()
|
||||
|
||||
def test_info_json(self):
|
||||
ie = youtube_dl.InfoExtractors.YoutubeIE()
|
||||
fd = FileDownloader(params)
|
||||
fd.add_info_extractor(ie)
|
||||
fd.download([TEST_ID])
|
||||
ie = youtube_dl.extractor.YoutubeIE()
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_info_extractor(ie)
|
||||
ydl.download([TEST_ID])
|
||||
self.assertTrue(os.path.exists(INFO_JSON_FILE))
|
||||
with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
|
||||
jd = json.load(jsonf)
|
||||
|
@@ -8,66 +8,91 @@ import json
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.InfoExtractors import YoutubeUserIE,YoutubePlaylistIE
|
||||
from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE
|
||||
from youtube_dl.utils import *
|
||||
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
parameters = json.load(pf)
|
||||
|
||||
# General configuration (from __init__, not very elegant...)
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
|
||||
class FakeDownloader(object):
|
||||
def __init__(self):
|
||||
self.result = []
|
||||
self.params = parameters
|
||||
def to_screen(self, s):
|
||||
print(s)
|
||||
def trouble(self, s):
|
||||
raise Exception(s)
|
||||
def download(self, x):
|
||||
self.result.append(x)
|
||||
from helper import FakeYDL
|
||||
|
||||
class TestYoutubeLists(unittest.TestCase):
|
||||
def assertIsPlaylist(self,info):
|
||||
"""Make sure the info has '_type' set to 'playlist'"""
|
||||
self.assertEqual(info['_type'], 'playlist')
|
||||
|
||||
def test_youtube_playlist(self):
|
||||
DL = FakeDownloader()
|
||||
IE = YoutubePlaylistIE(DL)
|
||||
IE.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertEqual(DL.result, [
|
||||
['http://www.youtube.com/watch?v=bV9L5Ht9LgY'],
|
||||
['http://www.youtube.com/watch?v=FXxLjLQi3Fg'],
|
||||
['http://www.youtube.com/watch?v=tU3Bgo5qJZE']
|
||||
])
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'ytdl test PL')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
|
||||
|
||||
def test_issue_673(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('PLBB231211A4F62143')[0]
|
||||
self.assertTrue(len(result['entries']) > 25)
|
||||
|
||||
def test_youtube_playlist_long(self):
|
||||
DL = FakeDownloader()
|
||||
IE = YoutubePlaylistIE(DL)
|
||||
IE.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
self.assertTrue(len(DL.result) >= 799)
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertTrue(len(result['entries']) >= 799)
|
||||
|
||||
def test_youtube_playlist_with_deleted(self):
|
||||
#651
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||
|
||||
def test_youtube_playlist_empty(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(len(result['entries']), 0)
|
||||
|
||||
def test_youtube_course(self):
|
||||
DL = FakeDownloader()
|
||||
IE = YoutubePlaylistIE(DL)
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
IE.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
self.assertEqual(DL.result[0], ['http://www.youtube.com/watch?v=j9WZyLZCBzs'])
|
||||
self.assertEqual(len(DL.result), 25)
|
||||
self.assertEqual(DL.result[-1], ['http://www.youtube.com/watch?v=rYefUsYuEp0'])
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
|
||||
entries = result['entries']
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
||||
def test_youtube_channel(self):
|
||||
# I give up, please find a channel that does paginate and test this like test_youtube_playlist_long
|
||||
pass # TODO
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeChannelIE(dl)
|
||||
#test paginated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
|
||||
self.assertTrue(len(result['entries']) > 90)
|
||||
#test autogenerated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
|
||||
self.assertTrue(len(result['entries']) >= 18)
|
||||
|
||||
def test_youtube_user(self):
|
||||
DL = FakeDownloader()
|
||||
IE = YoutubeUserIE(DL)
|
||||
IE.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||
self.assertTrue(len(DL.result) >= 320)
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeUserIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
|
||||
self.assertTrue(len(result['entries']) >= 320)
|
||||
|
||||
def test_youtube_safe_search(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
|
||||
self.assertEqual(len(result['entries']), 2)
|
||||
|
||||
def test_youtube_show(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeShowIE(dl)
|
||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||
self.assertTrue(len(result) >= 4)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
67
test/test_youtube_sig.py
Normal file
67
test/test_youtube_sig.py
Normal file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor.youtube import YoutubeIE
|
||||
from helper import FakeYDL
|
||||
|
||||
sig = YoutubeIE(FakeYDL())._decrypt_signature
|
||||
|
||||
class TestYoutubeSig(unittest.TestCase):
|
||||
def test_92(self):
|
||||
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
|
||||
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_90(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
|
||||
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_88(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
|
||||
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_87(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
|
||||
right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_86(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
|
||||
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_85(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
|
||||
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_84(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
|
||||
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_83(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_82(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
|
||||
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
def test_81(self):
|
||||
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||
right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
|
||||
self.assertEqual(sig(wrong), right)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -10,48 +10,86 @@ import hashlib
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.InfoExtractors import YoutubeIE
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.utils import *
|
||||
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
parameters = json.load(pf)
|
||||
|
||||
# General configuration (from __init__, not very elegant...)
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
|
||||
class FakeDownloader(object):
|
||||
def __init__(self):
|
||||
self.result = []
|
||||
self.params = parameters
|
||||
def to_screen(self, s):
|
||||
print(s)
|
||||
def trouble(self, s):
|
||||
raise Exception(s)
|
||||
def download(self, x):
|
||||
self.result.append(x)
|
||||
from helper import FakeYDL
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
class TestYoutubeSubtitles(unittest.TestCase):
|
||||
def setUp(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['allsubtitles'] = False
|
||||
DL.params['writesubtitles'] = False
|
||||
DL.params['subtitlesformat'] = 'srt'
|
||||
DL.params['listsubtitles'] = False
|
||||
def test_youtube_no_subtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = False
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
subtitles = info_dict[0]['subtitles']
|
||||
self.assertEqual(subtitles, None)
|
||||
def test_youtube_subtitles(self):
|
||||
DL = FakeDownloader()
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
self.assertEqual(md5(info_dict[0]['subtitles']), 'c3228550d59116f3c29fba370b55d033')
|
||||
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_subtitles_it(self):
|
||||
DL = FakeDownloader()
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitleslang'] = 'it'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
self.assertEqual(md5(info_dict[0]['subtitles']), '132a88a0daf8e1520f393eb58f1f646a')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d')
|
||||
def test_youtube_onlysubtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['onlysubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_allsubtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['allsubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
subtitles = info_dict[0]['subtitles']
|
||||
self.assertEqual(len(subtitles), 13)
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitlesformat'] = 'sbv'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writesubtitles'] = True
|
||||
DL.params['subtitlesformat'] = 'vtt'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
def test_youtube_list_subtitles(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['listsubtitles'] = True
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('QRS8MkLhQmM')
|
||||
self.assertEqual(info_dict, None)
|
||||
def test_youtube_automatic_captions(self):
|
||||
DL = FakeYDL()
|
||||
DL.params['writeautomaticsub'] = True
|
||||
DL.params['subtitleslang'] = 'it'
|
||||
IE = YoutubeIE(DL)
|
||||
info_dict = IE.extract('8YoUxe5ncPo')
|
||||
sub = info_dict[0]['subtitles'][0]
|
||||
self.assertTrue(sub[2] is not None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
279
test/tests.json
279
test/tests.json
@@ -1,279 +0,0 @@
|
||||
[
|
||||
{
|
||||
"name": "Youtube",
|
||||
"url": "http://www.youtube.com/watch?v=BaW_jenozKc",
|
||||
"file": "BaW_jenozKc.mp4",
|
||||
"info_dict": {
|
||||
"title": "youtube-dl test video \"'/\\ä↭𝕐",
|
||||
"uploader": "Philipp Hagemeister",
|
||||
"uploader_id": "phihag",
|
||||
"upload_date": "20121002",
|
||||
"description": "test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Dailymotion",
|
||||
"md5": "392c4b85a60a90dc4792da41ce3144eb",
|
||||
"url": "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech",
|
||||
"file": "x33vw9.mp4"
|
||||
},
|
||||
{
|
||||
"name": "Metacafe",
|
||||
"add_ie": ["Youtube"],
|
||||
"url": "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
||||
"file": "_aUehQsCQtM.flv"
|
||||
},
|
||||
{
|
||||
"name": "BlipTV",
|
||||
"md5": "b2d849efcf7ee18917e4b4d9ff37cafe",
|
||||
"url": "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352",
|
||||
"file": "5779306.m4v"
|
||||
},
|
||||
{
|
||||
"name": "XVideos",
|
||||
"md5": "1d0c835822f0a71a7bf011855db929d0",
|
||||
"url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1",
|
||||
"file": "939581.flv"
|
||||
},
|
||||
{
|
||||
"name": "YouPorn",
|
||||
"md5": "c37ddbaaa39058c76a7e86c6813423c1",
|
||||
"url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/",
|
||||
"file": "505835.mp4"
|
||||
},
|
||||
{
|
||||
"name": "Pornotube",
|
||||
"md5": "374dd6dcedd24234453b295209aa69b6",
|
||||
"url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing",
|
||||
"file": "1689755.flv"
|
||||
},
|
||||
{
|
||||
"name": "YouJizz",
|
||||
"md5": "07e15fa469ba384c7693fd246905547c",
|
||||
"url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html",
|
||||
"file": "2189178.flv"
|
||||
},
|
||||
{
|
||||
"name": "Vimeo",
|
||||
"md5": "8879b6cc097e987f02484baf890129e5",
|
||||
"url": "http://vimeo.com/56015672",
|
||||
"file": "56015672.mp4",
|
||||
"info_dict": {
|
||||
"title": "youtube-dl test video - ★ \" ' 幸 / \\ ä ↭ 𝕐",
|
||||
"uploader": "Filippo Valsorda",
|
||||
"uploader_id": "user7108434",
|
||||
"upload_date": "20121220",
|
||||
"description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: ★ \" ' 幸 / \\ ä ↭ 𝕐"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Soundcloud",
|
||||
"md5": "ebef0a451b909710ed1d7787dddbf0d7",
|
||||
"url": "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy",
|
||||
"file": "62986583.mp3"
|
||||
},
|
||||
{
|
||||
"name": "StanfordOpenClassroom",
|
||||
"md5": "544a9468546059d4e80d76265b0443b8",
|
||||
"url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
|
||||
"file": "PracticalUnix_intro-environment.mp4",
|
||||
"skip": "Currently offline"
|
||||
},
|
||||
{
|
||||
"name": "XNXX",
|
||||
"md5": "0831677e2b4761795f68d417e0b7b445",
|
||||
"url": "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_",
|
||||
"file": "1135332.flv"
|
||||
},
|
||||
{
|
||||
"name": "Youku",
|
||||
"url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
|
||||
"file": "XNDgyMDQ2NTQw_part00.flv",
|
||||
"md5": "ffe3f2e435663dc2d1eea34faeff5b5b",
|
||||
"params": { "test": false }
|
||||
},
|
||||
{
|
||||
"name": "NBA",
|
||||
"url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html",
|
||||
"file": "0021200253-okc-bkn-recap.nba.mp4",
|
||||
"md5": "c0edcfc37607344e2ff8f13c378c88a4"
|
||||
},
|
||||
{
|
||||
"name": "JustinTV",
|
||||
"url": "http://www.twitch.tv/thegamedevhub/b/296128360",
|
||||
"file": "296128360.flv",
|
||||
"md5": "ecaa8a790c22a40770901460af191c9a"
|
||||
},
|
||||
{
|
||||
"name": "MyVideo",
|
||||
"url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win",
|
||||
"file": "8229274.flv",
|
||||
"md5": "2d2753e8130479ba2cb7e0a37002053e"
|
||||
},
|
||||
{
|
||||
"name": "Escapist",
|
||||
"url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate",
|
||||
"file": "6618-Breaking-Down-Baldurs-Gate.flv",
|
||||
"md5": "c6793dbda81388f4264c1ba18684a74d"
|
||||
},
|
||||
{
|
||||
"name": "GooglePlus",
|
||||
"url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
|
||||
"file": "ZButuJc6CtH.flv"
|
||||
},
|
||||
{
|
||||
"name": "FunnyOrDie",
|
||||
"url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version",
|
||||
"file": "0732f586d7.mp4",
|
||||
"md5": "f647e9e90064b53b6e046e75d0241fbd"
|
||||
},
|
||||
{
|
||||
"name": "TweetReel",
|
||||
"url": "http://tweetreel.com/?77smq",
|
||||
"file": "77smq.mov",
|
||||
"md5": "56b4d9ca9de467920f3f99a6d91255d6",
|
||||
"info_dict": {
|
||||
"uploader": "itszero",
|
||||
"uploader_id": "itszero",
|
||||
"upload_date": "20091225",
|
||||
"description": "Installing Gentoo Linux on Powerbook G4, it turns out the sleep indicator becomes HDD activity indicator :D"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Steam",
|
||||
"url": "http://store.steampowered.com/video/105600/",
|
||||
"playlist": [
|
||||
{
|
||||
"file": "81300.flv",
|
||||
"md5": "f870007cee7065d7c76b88f0a45ecc07",
|
||||
"info_dict": {
|
||||
"title": "Terraria 1.1 Trailer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"file": "80859.flv",
|
||||
"md5": "61aaf31a5c5c3041afb58fb83cbb5751",
|
||||
"info_dict": {
|
||||
"title": "Terraria Trailer"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Ustream",
|
||||
"url": "http://www.ustream.tv/recorded/20274954",
|
||||
"file": "20274954.flv",
|
||||
"md5": "088f151799e8f572f84eb62f17d73e5c",
|
||||
"info_dict": {
|
||||
"title": "Young Americans for Liberty February 7, 2012 2:28 AM"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "InfoQ",
|
||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||
"file": "12-jan-pythonthings.mp4",
|
||||
"info_dict": {
|
||||
"title": "A Few of My Favorite [Python] Things"
|
||||
},
|
||||
"params": {
|
||||
"skip_download": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "ComedyCentral",
|
||||
"url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
|
||||
"file": "422212.mp4",
|
||||
"md5": "4e2f5cb088a83cd8cdb7756132f9739d",
|
||||
"info_dict": {
|
||||
"title": "thedailyshow-kristen-stewart part 1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "RBMARadio",
|
||||
"url": "http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011",
|
||||
"file": "ford-lopatin-live-at-primavera-sound-2011.mp3",
|
||||
"md5": "6bc6f9bcb18994b4c983bc3bf4384d95",
|
||||
"info_dict": {
|
||||
"title": "Live at Primavera Sound 2011",
|
||||
"description": "Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||
"uploader": "Ford & Lopatin",
|
||||
"uploader_id": "ford-lopatin",
|
||||
"location": "Spain"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Facebook",
|
||||
"url": "https://www.facebook.com/photo.php?v=120708114770723",
|
||||
"file": "120708114770723.mp4",
|
||||
"md5": "48975a41ccc4b7a581abd68651c1a5a8",
|
||||
"info_dict": {
|
||||
"title": "PEOPLE ARE AWESOME 2013",
|
||||
"duration": 279
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "EightTracks",
|
||||
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||
"playlist": [
|
||||
{
|
||||
"file": "11885610.m4a",
|
||||
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
|
||||
"info_dict": {
|
||||
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
"file": "11885608.m4a",
|
||||
"md5": "4ab26f05c1f7291ea460a3920be8021f",
|
||||
"info_dict": {
|
||||
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
|
||||
}
|
||||
},
|
||||
{
|
||||
"file": "11885679.m4a",
|
||||
"md5": "d30b5b5f74217410f4689605c35d1fd7",
|
||||
"info_dict": {
|
||||
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
|
||||
}
|
||||
},
|
||||
{
|
||||
"file": "11885680.m4a",
|
||||
"md5": "4eb0a669317cd725f6bbd336a29f923a",
|
||||
"info_dict": {
|
||||
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
|
||||
}
|
||||
},
|
||||
{
|
||||
"file": "11885682.m4a",
|
||||
"md5": "1893e872e263a2705558d1d319ad19e8",
|
||||
"info_dict": {
|
||||
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
|
||||
}
|
||||
},
|
||||
{
|
||||
"file": "11885683.m4a",
|
||||
"md5": "b673c46f47a216ab1741ae8836af5899",
|
||||
"info_dict": {
|
||||
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
|
||||
}
|
||||
},
|
||||
{
|
||||
"file": "11885684.m4a",
|
||||
"md5": "1d74534e95df54986da7f5abf7d842b7",
|
||||
"info_dict": {
|
||||
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
|
||||
}
|
||||
},
|
||||
{
|
||||
"file": "11885685.m4a",
|
||||
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
|
||||
"info_dict": {
|
||||
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
@@ -38,7 +38,7 @@ def rsa_verify(message, signature, key):
|
||||
|
||||
sys.stderr.write(u'Hi! We changed distribution method and now youtube-dl needs to update itself one more time.\n')
|
||||
sys.stderr.write(u'This will only happen once. Simply press enter to go on. Sorry for the trouble!\n')
|
||||
sys.stderr.write(u'From now on, get the binaries from http://rg3.github.com/youtube-dl/download.html, not from the git repository.\n\n')
|
||||
sys.stderr.write(u'From now on, get the binaries from http://rg3.github.io/youtube-dl/download.html, not from the git repository.\n\n')
|
||||
|
||||
try:
|
||||
raw_input()
|
||||
@@ -47,7 +47,7 @@ except NameError: # Python 3
|
||||
|
||||
filename = sys.argv[0]
|
||||
|
||||
UPDATE_URL = "http://rg3.github.com/youtube-dl/update/"
|
||||
UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
|
||||
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
|
||||
JSON_URL = UPDATE_URL + 'versions.json'
|
||||
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
|
||||
|
@@ -1,13 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import math
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@@ -23,89 +16,39 @@ class FileDownloader(object):
|
||||
"""File Downloader class.
|
||||
|
||||
File downloader objects are the ones responsible of downloading the
|
||||
actual video file and writing it to disk if the user has requested
|
||||
it, among some other tasks. In most cases there should be one per
|
||||
program. As, given a video URL, the downloader doesn't know how to
|
||||
extract all the needed information, task that InfoExtractors do, it
|
||||
has to pass the URL to one of them.
|
||||
|
||||
For this, file downloader objects have a method that allows
|
||||
InfoExtractors to be registered in a given order. When it is passed
|
||||
a URL, the file downloader handles it to the first InfoExtractor it
|
||||
finds that reports being able to handle it. The InfoExtractor extracts
|
||||
all the information about the video or videos the URL refers to, and
|
||||
asks the FileDownloader to process the video information, possibly
|
||||
downloading the video.
|
||||
actual video file and writing it to disk.
|
||||
|
||||
File downloaders accept a lot of parameters. In order not to saturate
|
||||
the object constructor with arguments, it receives a dictionary of
|
||||
options instead. These options are available through the params
|
||||
attribute for the InfoExtractors to use. The FileDownloader also
|
||||
registers itself as the downloader in charge for the InfoExtractors
|
||||
that are added to it, so this is a "mutual registration".
|
||||
options instead.
|
||||
|
||||
Available options:
|
||||
|
||||
username: Username for authentication purposes.
|
||||
password: Password for authentication purposes.
|
||||
usenetrc: Use netrc for authentication instead.
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
forceurl: Force printing final URL.
|
||||
forcetitle: Force printing title.
|
||||
forcethumbnail: Force printing thumbnail URL.
|
||||
forcedescription: Force printing description.
|
||||
forcefilename: Force printing final filename.
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code.
|
||||
format_limit: Highest quality format to try.
|
||||
outtmpl: Template for output names.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
ignoreerrors: Do not stop on download errors.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
nooverwrites: Prevent overwriting files.
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
noprogress: Do not print the progress bar.
|
||||
playliststart: Playlist item to start at.
|
||||
playlistend: Playlist item to end at.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
nopart: Do not use temporary .part files.
|
||||
updatetime: Use the Last-modified header to set output file timestamps.
|
||||
writedescription: Write the video description to a .description file
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
writesubtitles: Write the video subtitles to a .srt file
|
||||
subtitleslang: Language of the subtitles to download
|
||||
test: Download only first bytes to test the downloader.
|
||||
keepvideo: Keep the video file after post-processing
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
"""
|
||||
|
||||
params = None
|
||||
_ies = []
|
||||
_pps = []
|
||||
_download_retcode = None
|
||||
_num_downloads = None
|
||||
_screen_file = None
|
||||
|
||||
def __init__(self, params):
|
||||
def __init__(self, ydl, params):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self._ies = []
|
||||
self._pps = []
|
||||
self.ydl = ydl
|
||||
self._progress_hooks = []
|
||||
self._download_retcode = 0
|
||||
self._num_downloads = 0
|
||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||
self.params = params
|
||||
|
||||
if '%(stitle)s' in self.params['outtmpl']:
|
||||
self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
|
||||
@staticmethod
|
||||
def format_bytes(bytes):
|
||||
if bytes is None:
|
||||
@@ -116,7 +59,7 @@ class FileDownloader(object):
|
||||
exponent = 0
|
||||
else:
|
||||
exponent = int(math.log(bytes, 1024.0))
|
||||
suffix = 'bkMGTPEZY'[exponent]
|
||||
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
|
||||
converted = float(bytes) / float(1024 ** exponent)
|
||||
return '%.2f%s' % (converted, suffix)
|
||||
|
||||
@@ -170,34 +113,11 @@ class FileDownloader(object):
|
||||
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
|
||||
return int(round(number * multiplier))
|
||||
|
||||
def add_info_extractor(self, ie):
|
||||
"""Add an InfoExtractor object to the end of the list."""
|
||||
self._ies.append(ie)
|
||||
ie.set_downloader(self)
|
||||
|
||||
def add_post_processor(self, pp):
|
||||
"""Add a PostProcessor object to the end of the chain."""
|
||||
self._pps.append(pp)
|
||||
pp.set_downloader(self)
|
||||
|
||||
def to_screen(self, message, skip_eol=False):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
assert type(message) == type(u'')
|
||||
if not self.params.get('quiet', False):
|
||||
terminator = [u'\n', u''][skip_eol]
|
||||
output = message + terminator
|
||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||
output = output.encode(preferredencoding(), 'ignore')
|
||||
self._screen_file.write(output)
|
||||
self._screen_file.flush()
|
||||
def to_screen(self, *args, **kargs):
|
||||
self.ydl.to_screen(*args, **kargs)
|
||||
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
assert type(message) == type(u'')
|
||||
output = message + u'\n'
|
||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||
output = output.encode(preferredencoding())
|
||||
sys.stderr.write(output)
|
||||
self.ydl.to_screen(message)
|
||||
|
||||
def to_cons_title(self, message):
|
||||
"""Set console/terminal window title to message."""
|
||||
@@ -208,31 +128,16 @@ class FileDownloader(object):
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
elif 'TERM' in os.environ:
|
||||
sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
|
||||
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
|
||||
|
||||
def fixed_template(self):
|
||||
"""Checks if the output template is fixed."""
|
||||
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
||||
def trouble(self, *args, **kargs):
|
||||
self.ydl.trouble(*args, **kargs)
|
||||
|
||||
def trouble(self, message=None, tb=None):
|
||||
"""Determine action to take when a download problem appears.
|
||||
def report_warning(self, *args, **kargs):
|
||||
self.ydl.report_warning(*args, **kargs)
|
||||
|
||||
Depending on if the downloader has been configured to ignore
|
||||
download errors or not, this method may throw an exception or
|
||||
not when errors are found, after printing the message.
|
||||
|
||||
tb, if given, is additional traceback information.
|
||||
"""
|
||||
if message is not None:
|
||||
self.to_stderr(message)
|
||||
if self.params.get('verbose'):
|
||||
if tb is None:
|
||||
tb_data = traceback.format_list(traceback.extract_stack())
|
||||
tb = u''.join(tb_data)
|
||||
self.to_stderr(tb)
|
||||
if not self.params.get('ignoreerrors', False):
|
||||
raise DownloadError(message)
|
||||
self._download_retcode = 1
|
||||
def report_error(self, *args, **kargs):
|
||||
self.ydl.report_error(*args, **kargs)
|
||||
|
||||
def slow_down(self, start_time, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
@@ -265,7 +170,7 @@ class FileDownloader(object):
|
||||
return
|
||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||
except (IOError, OSError) as err:
|
||||
self.trouble(u'ERROR: unable to rename file')
|
||||
self.report_error(u'unable to rename file')
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
@@ -279,24 +184,15 @@ class FileDownloader(object):
|
||||
filetime = timeconvert(timestr)
|
||||
if filetime is None:
|
||||
return filetime
|
||||
# Ignore obviously invalid dates
|
||||
if filetime == 0:
|
||||
return
|
||||
try:
|
||||
os.utime(filename, (time.time(), filetime))
|
||||
except:
|
||||
pass
|
||||
return filetime
|
||||
|
||||
def report_writedescription(self, descfn):
|
||||
""" Report that the description file is being written """
|
||||
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
||||
|
||||
def report_writesubtitles(self, srtfn):
|
||||
""" Report that the subtitles file is being written """
|
||||
self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
|
||||
|
||||
def report_writeinfojson(self, infofn):
|
||||
""" Report that the metadata file has been written """
|
||||
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
|
||||
|
||||
def report_destination(self, filename):
|
||||
"""Report destination filename."""
|
||||
self.to_screen(u'[download] Destination: ' + filename)
|
||||
@@ -305,8 +201,13 @@ class FileDownloader(object):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
else:
|
||||
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
|
||||
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
||||
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
||||
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
|
||||
|
||||
@@ -336,245 +237,31 @@ class FileDownloader(object):
|
||||
else:
|
||||
self.to_screen(u'')
|
||||
|
||||
def increment_downloads(self):
|
||||
"""Increment the ordinal that assigns a number to each file."""
|
||||
self._num_downloads += 1
|
||||
|
||||
def prepare_filename(self, info_dict):
|
||||
"""Generate the output filename."""
|
||||
try:
|
||||
template_dict = dict(info_dict)
|
||||
|
||||
template_dict['epoch'] = int(time.time())
|
||||
template_dict['autonumber'] = u'%05d' % self._num_downloads
|
||||
|
||||
sanitize = lambda k,v: sanitize_filename(
|
||||
u'NA' if v is None else compat_str(v),
|
||||
restricted=self.params.get('restrictfilenames'),
|
||||
is_id=(k==u'id'))
|
||||
template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
|
||||
|
||||
filename = self.params['outtmpl'] % template_dict
|
||||
return filename
|
||||
except (ValueError, KeyError) as err:
|
||||
self.trouble(u'ERROR: invalid system charset or erroneous output template')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict):
|
||||
""" Returns None iff the file should be downloaded """
|
||||
|
||||
title = info_dict['title']
|
||||
matchtitle = self.params.get('matchtitle', False)
|
||||
if matchtitle:
|
||||
matchtitle = matchtitle.decode('utf8')
|
||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||
rejecttitle = self.params.get('rejecttitle', False)
|
||||
if rejecttitle:
|
||||
rejecttitle = rejecttitle.decode('utf8')
|
||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
return None
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single dictionary returned by an InfoExtractor."""
|
||||
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
|
||||
if not 'format' in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
reason = self._match_entry(info_dict)
|
||||
if reason is not None:
|
||||
self.to_screen(u'[download] ' + reason)
|
||||
return
|
||||
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
if max_downloads is not None:
|
||||
if self._num_downloads > int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
filename = self.prepare_filename(info_dict)
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
compat_print(info_dict['title'])
|
||||
if self.params.get('forceurl', False):
|
||||
compat_print(info_dict['url'])
|
||||
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
|
||||
compat_print(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and 'description' in info_dict:
|
||||
compat_print(info_dict['description'])
|
||||
if self.params.get('forcefilename', False) and filename is not None:
|
||||
compat_print(filename)
|
||||
if self.params.get('forceformat', False):
|
||||
compat_print(info_dict['format'])
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
return
|
||||
|
||||
if filename is None:
|
||||
return
|
||||
|
||||
try:
|
||||
dn = os.path.dirname(encodeFilename(filename))
|
||||
if dn != '' and not os.path.exists(dn): # dn is already encoded
|
||||
os.makedirs(dn)
|
||||
except (OSError, IOError) as err:
|
||||
self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
|
||||
return
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
try:
|
||||
descfn = filename + u'.description'
|
||||
self.report_writedescription(descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(info_dict['description'])
|
||||
except (OSError, IOError):
|
||||
self.trouble(u'ERROR: Cannot write description file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
# that way it will silently go on when used with unsupporting IE
|
||||
try:
|
||||
srtfn = filename.rsplit('.', 1)[0] + u'.srt'
|
||||
self.report_writesubtitles(srtfn)
|
||||
with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
|
||||
srtfile.write(info_dict['subtitles'])
|
||||
except (OSError, IOError):
|
||||
self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = filename + u'.info.json'
|
||||
self.report_writeinfojson(infofn)
|
||||
try:
|
||||
json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
|
||||
write_json_file(json_info_dict, encodeFilename(infofn))
|
||||
except (OSError, IOError):
|
||||
self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
|
||||
return
|
||||
|
||||
if not self.params.get('skip_download', False):
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
||||
success = True
|
||||
else:
|
||||
try:
|
||||
success = self._do_download(filename, info_dict)
|
||||
except (OSError, IOError) as err:
|
||||
raise UnavailableVideoError()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.trouble(u'ERROR: unable to download video data: %s' % str(err))
|
||||
return
|
||||
except (ContentTooShortError, ) as err:
|
||||
self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
||||
return
|
||||
|
||||
if success:
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError) as err:
|
||||
self.trouble(u'ERROR: postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
if len(url_list) > 1 and self.fixed_template():
|
||||
raise SameFileError(self.params['outtmpl'])
|
||||
|
||||
for url in url_list:
|
||||
suitable_found = False
|
||||
for ie in self._ies:
|
||||
# Go to next InfoExtractor if not suitable
|
||||
if not ie.suitable(url):
|
||||
continue
|
||||
|
||||
# Warn if the _WORKING attribute is False
|
||||
if not ie.working():
|
||||
self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
|
||||
u'and will probably not work. If you want to go on, use the -i option.')
|
||||
|
||||
# Suitable InfoExtractor found
|
||||
suitable_found = True
|
||||
|
||||
# Extract information from URL and process it
|
||||
try:
|
||||
videos = ie.extract(url)
|
||||
except ExtractorError as de: # An error we somewhat expected
|
||||
self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
|
||||
break
|
||||
except Exception as e:
|
||||
if self.params.get('ignoreerrors', False):
|
||||
self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
|
||||
break
|
||||
else:
|
||||
raise
|
||||
|
||||
if len(videos or []) > 1 and self.fixed_template():
|
||||
raise SameFileError(self.params['outtmpl'])
|
||||
|
||||
for video in videos or []:
|
||||
video['extractor'] = ie.IE_NAME
|
||||
try:
|
||||
self.increment_downloads()
|
||||
self.process_info(video)
|
||||
except UnavailableVideoError:
|
||||
self.trouble(u'\nERROR: unable to download video')
|
||||
|
||||
# Suitable InfoExtractor had been found; go to next URL
|
||||
break
|
||||
|
||||
if not suitable_found:
|
||||
self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
|
||||
|
||||
return self._download_retcode
|
||||
|
||||
def post_process(self, filename, ie_info):
|
||||
"""Run all the postprocessors on the given file."""
|
||||
info = dict(ie_info)
|
||||
info['filepath'] = filename
|
||||
keep_video = None
|
||||
for pp in self._pps:
|
||||
try:
|
||||
keep_video_wish,new_info = pp.run(info)
|
||||
if keep_video_wish is not None:
|
||||
if keep_video_wish:
|
||||
keep_video = keep_video_wish
|
||||
elif keep_video is None:
|
||||
# No clear decision yet, let IE decide
|
||||
keep_video = keep_video_wish
|
||||
except PostProcessingError as e:
|
||||
self.to_stderr(u'ERROR: ' + e.msg)
|
||||
if keep_video is False and not self.params.get('keepvideo', False):
|
||||
try:
|
||||
self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
|
||||
os.remove(encodeFilename(filename))
|
||||
except (IOError, OSError):
|
||||
self.to_stderr(u'WARNING: Unable to remove downloaded video file')
|
||||
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url):
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
# Check for rtmpdump first
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
|
||||
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['-W', player_url]
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
if self.params.get('verbose', False):
|
||||
try:
|
||||
import pipes
|
||||
@@ -608,9 +295,41 @@ class FileDownloader(object):
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
def _download_with_mplayer(self, filename, url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
|
||||
# Download using mplayer.
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
|
||||
def _do_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
|
||||
@@ -627,7 +346,13 @@ class FileDownloader(object):
|
||||
if url.startswith('rtmp'):
|
||||
return self._download_with_rtmpdump(filename, url,
|
||||
info_dict.get('player_url', None),
|
||||
info_dict.get('page_url', None))
|
||||
info_dict.get('page_url', None),
|
||||
info_dict.get('play_path', None),
|
||||
info_dict.get('tc_url', None))
|
||||
|
||||
# Attempt to download using mplayer
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return self._download_with_mplayer(filename, url)
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
@@ -708,7 +433,7 @@ class FileDownloader(object):
|
||||
self.report_retry(count, retries)
|
||||
|
||||
if count > retries:
|
||||
self.trouble(u'ERROR: giving up after %s retries' % retries)
|
||||
self.report_error(u'giving up after %s retries' % retries)
|
||||
return False
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
@@ -744,12 +469,13 @@ class FileDownloader(object):
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
|
||||
self.report_error(u'unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.trouble(u'\nERROR: unable to write data: %s' % str(err))
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'unable to write data: %s' % str(err))
|
||||
return False
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
@@ -775,7 +501,8 @@ class FileDownloader(object):
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
if stream is None:
|
||||
self.trouble(u'\nERROR: Did not get any data blocks')
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
self.report_finish()
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,3 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -85,8 +80,9 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout,stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
msg = stderr.strip().split('\n')[-1]
|
||||
raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace'))
|
||||
raise FFmpegPostProcessorError(msg)
|
||||
|
||||
def _ffmpeg_filename_argument(self, fn):
|
||||
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
|
||||
@@ -188,6 +184,11 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
|
||||
prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
|
||||
new_path = prefix + sep + extension
|
||||
|
||||
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
|
||||
if new_path == path:
|
||||
self._nopostoverwrites = True
|
||||
|
||||
try:
|
||||
if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
|
||||
self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
|
||||
@@ -210,7 +211,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
|
||||
|
||||
information['filepath'] = new_path
|
||||
return False,information
|
||||
return self._nopostoverwrites,information
|
||||
|
||||
class FFmpegVideoConvertor(FFmpegPostProcessor):
|
||||
def __init__(self, downloader=None,preferedformat=None):
|
||||
|
603
youtube_dl/YoutubeDL.py
Normal file
603
youtube_dl/YoutubeDL.py
Normal file
@@ -0,0 +1,603 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from .utils import *
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .FileDownloader import FileDownloader
|
||||
|
||||
|
||||
class YoutubeDL(object):
|
||||
"""YoutubeDL class.
|
||||
|
||||
YoutubeDL objects are the ones responsible of downloading the
|
||||
actual video file and writing it to disk if the user has requested
|
||||
it, among some other tasks. In most cases there should be one per
|
||||
program. As, given a video URL, the downloader doesn't know how to
|
||||
extract all the needed information, task that InfoExtractors do, it
|
||||
has to pass the URL to one of them.
|
||||
|
||||
For this, YoutubeDL objects have a method that allows
|
||||
InfoExtractors to be registered in a given order. When it is passed
|
||||
a URL, the YoutubeDL object handles it to the first InfoExtractor it
|
||||
finds that reports being able to handle it. The InfoExtractor extracts
|
||||
all the information about the video or videos the URL refers to, and
|
||||
YoutubeDL process the extracted information, possibly using a File
|
||||
Downloader to download the video.
|
||||
|
||||
YoutubeDL objects accept a lot of parameters. In order not to saturate
|
||||
the object constructor with arguments, it receives a dictionary of
|
||||
options instead. These options are available through the params
|
||||
attribute for the InfoExtractors to use. The YoutubeDL also
|
||||
registers itself as the downloader in charge for the InfoExtractors
|
||||
that are added to it, so this is a "mutual registration".
|
||||
|
||||
Available options:
|
||||
|
||||
username: Username for authentication purposes.
|
||||
password: Password for authentication purposes.
|
||||
videopassword: Password for acces a video.
|
||||
usenetrc: Use netrc for authentication instead.
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
forceurl: Force printing final URL.
|
||||
forcetitle: Force printing title.
|
||||
forceid: Force printing ID.
|
||||
forcethumbnail: Force printing thumbnail URL.
|
||||
forcedescription: Force printing description.
|
||||
forcefilename: Force printing final filename.
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code.
|
||||
format_limit: Highest quality format to try.
|
||||
outtmpl: Template for output names.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
ignoreerrors: Do not stop on download errors.
|
||||
nooverwrites: Prevent overwriting files.
|
||||
playliststart: Playlist item to start at.
|
||||
playlistend: Playlist item to end at.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
writedescription: Write the video description to a .description file
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
writethumbnail: Write the thumbnail image to a file
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
writeautomaticsub: Write the automatic subtitles to a file
|
||||
allsubtitles: Downloads all the subtitles of the video
|
||||
listsubtitles: Lists all available subtitles for the video
|
||||
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
|
||||
subtitleslang: Language of the subtitles to download
|
||||
keepvideo: Keep the video file after post-processing
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
skip_download: Skip the actual download of the video file
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle
|
||||
"""
|
||||
|
||||
params = None
|
||||
_ies = []
|
||||
_pps = []
|
||||
_download_retcode = None
|
||||
_num_downloads = None
|
||||
_screen_file = None
|
||||
|
||||
def __init__(self, params):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self._ies = []
|
||||
self._pps = []
|
||||
self._progress_hooks = []
|
||||
self._download_retcode = 0
|
||||
self._num_downloads = 0
|
||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||
self.params = params
|
||||
self.fd = FileDownloader(self, self.params)
|
||||
|
||||
if '%(stitle)s' in self.params['outtmpl']:
|
||||
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
|
||||
def add_info_extractor(self, ie):
|
||||
"""Add an InfoExtractor object to the end of the list."""
|
||||
self._ies.append(ie)
|
||||
ie.set_downloader(self)
|
||||
|
||||
def add_default_info_extractors(self):
|
||||
"""
|
||||
Add the InfoExtractors returned by gen_extractors to the end of the list
|
||||
"""
|
||||
for ie in gen_extractors():
|
||||
self.add_info_extractor(ie)
|
||||
|
||||
def add_post_processor(self, pp):
|
||||
"""Add a PostProcessor object to the end of the chain."""
|
||||
self._pps.append(pp)
|
||||
pp.set_downloader(self)
|
||||
|
||||
def to_screen(self, message, skip_eol=False):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
assert type(message) == type(u'')
|
||||
if not self.params.get('quiet', False):
|
||||
terminator = [u'\n', u''][skip_eol]
|
||||
output = message + terminator
|
||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||
output = output.encode(preferredencoding(), 'ignore')
|
||||
self._screen_file.write(output)
|
||||
self._screen_file.flush()
|
||||
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
assert type(message) == type(u'')
|
||||
output = message + u'\n'
|
||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||
output = output.encode(preferredencoding())
|
||||
sys.stderr.write(output)
|
||||
|
||||
def fixed_template(self):
|
||||
"""Checks if the output template is fixed."""
|
||||
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
||||
|
||||
def trouble(self, message=None, tb=None):
|
||||
"""Determine action to take when a download problem appears.
|
||||
|
||||
Depending on if the downloader has been configured to ignore
|
||||
download errors or not, this method may throw an exception or
|
||||
not when errors are found, after printing the message.
|
||||
|
||||
tb, if given, is additional traceback information.
|
||||
"""
|
||||
if message is not None:
|
||||
self.to_stderr(message)
|
||||
if self.params.get('verbose'):
|
||||
if tb is None:
|
||||
if sys.exc_info()[0]: # if .trouble has been called from an except block
|
||||
tb = u''
|
||||
if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
|
||||
tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
|
||||
tb += compat_str(traceback.format_exc())
|
||||
else:
|
||||
tb_data = traceback.format_list(traceback.extract_stack())
|
||||
tb = u''.join(tb_data)
|
||||
self.to_stderr(tb)
|
||||
if not self.params.get('ignoreerrors', False):
|
||||
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
|
||||
exc_info = sys.exc_info()[1].exc_info
|
||||
else:
|
||||
exc_info = sys.exc_info()
|
||||
raise DownloadError(message, exc_info)
|
||||
self._download_retcode = 1
|
||||
|
||||
def report_warning(self, message):
|
||||
'''
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
_msg_header=u'\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header=u'WARNING:'
|
||||
warning_message=u'%s %s' % (_msg_header,message)
|
||||
self.to_stderr(warning_message)
|
||||
|
||||
def report_error(self, message, tb=None):
|
||||
'''
|
||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||
in red if stderr is a tty file.
|
||||
'''
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
_msg_header = u'\033[0;31mERROR:\033[0m'
|
||||
else:
|
||||
_msg_header = u'ERROR:'
|
||||
error_message = u'%s %s' % (_msg_header, message)
|
||||
self.trouble(error_message, tb)
|
||||
|
||||
def slow_down(self, start_time, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit', None)
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
now = time.time()
|
||||
elapsed = now - start_time
|
||||
if elapsed <= 0.0:
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
||||
|
||||
def report_writedescription(self, descfn):
|
||||
""" Report that the description file is being written """
|
||||
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
||||
|
||||
def report_writesubtitles(self, sub_filename):
|
||||
""" Report that the subtitles file is being written """
|
||||
self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
|
||||
|
||||
def report_writeinfojson(self, infofn):
|
||||
""" Report that the metadata file has been written """
|
||||
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||
except (UnicodeEncodeError) as err:
|
||||
self.to_screen(u'[download] The file has already been downloaded')
|
||||
|
||||
def increment_downloads(self):
|
||||
"""Increment the ordinal that assigns a number to each file."""
|
||||
self._num_downloads += 1
|
||||
|
||||
def prepare_filename(self, info_dict):
|
||||
"""Generate the output filename."""
|
||||
try:
|
||||
template_dict = dict(info_dict)
|
||||
|
||||
template_dict['epoch'] = int(time.time())
|
||||
autonumber_size = self.params.get('autonumber_size')
|
||||
if autonumber_size is None:
|
||||
autonumber_size = 5
|
||||
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
|
||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||
if template_dict['playlist_index'] is not None:
|
||||
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
||||
|
||||
sanitize = lambda k,v: sanitize_filename(
|
||||
u'NA' if v is None else compat_str(v),
|
||||
restricted=self.params.get('restrictfilenames'),
|
||||
is_id=(k==u'id'))
|
||||
template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
|
||||
|
||||
filename = self.params['outtmpl'] % template_dict
|
||||
return filename
|
||||
except KeyError as err:
|
||||
self.report_error(u'Erroneous output template')
|
||||
return None
|
||||
except ValueError as err:
|
||||
self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict):
|
||||
""" Returns None iff the file should be downloaded """
|
||||
|
||||
title = info_dict['title']
|
||||
matchtitle = self.params.get('matchtitle', False)
|
||||
if matchtitle:
|
||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||
rejecttitle = self.params.get('rejecttitle', False)
|
||||
if rejecttitle:
|
||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
date = info_dict.get('upload_date', None)
|
||||
if date is not None:
|
||||
dateRange = self.params.get('daterange', DateRange())
|
||||
if date not in dateRange:
|
||||
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||
return None
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
|
||||
if ie_key:
|
||||
ie = get_info_extractor(ie_key)()
|
||||
ie.set_downloader(self)
|
||||
ies = [ie]
|
||||
else:
|
||||
ies = self._ies
|
||||
|
||||
for ie in ies:
|
||||
if not ie.suitable(url):
|
||||
continue
|
||||
|
||||
if not ie.working():
|
||||
self.report_warning(u'The program functionality for this site has been marked as broken, '
|
||||
u'and will probably not work.')
|
||||
|
||||
try:
|
||||
ie_result = ie.extract(url)
|
||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||
break
|
||||
if isinstance(ie_result, list):
|
||||
# Backwards compatibility: old IE result format
|
||||
for result in ie_result:
|
||||
result.update(extra_info)
|
||||
ie_result = {
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
else:
|
||||
ie_result.update(extra_info)
|
||||
if 'extractor' not in ie_result:
|
||||
ie_result['extractor'] = ie.IE_NAME
|
||||
return self.process_ie_result(ie_result, download=download)
|
||||
except ExtractorError as de: # An error we somewhat expected
|
||||
self.report_error(compat_str(de), de.format_traceback())
|
||||
break
|
||||
except Exception as e:
|
||||
if self.params.get('ignoreerrors', False):
|
||||
self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
|
||||
break
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
self.report_error(u'no suitable InfoExtractor: %s' % url)
|
||||
|
||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||
"""
|
||||
Take the result of the ie(may be modified) and resolve all unresolved
|
||||
references (URLs, playlist items).
|
||||
|
||||
It will also download the videos if 'download'.
|
||||
Returns the resolved ie_result.
|
||||
"""
|
||||
|
||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
||||
if result_type == 'video':
|
||||
ie_result.update(extra_info)
|
||||
if 'playlist' not in ie_result:
|
||||
# It isn't part of a playlist
|
||||
ie_result['playlist'] = None
|
||||
ie_result['playlist_index'] = None
|
||||
if download:
|
||||
self.process_info(ie_result)
|
||||
return ie_result
|
||||
elif result_type == 'url':
|
||||
# We have to add extra_info to the results because it may be
|
||||
# contained in a playlist
|
||||
return self.extract_info(ie_result['url'],
|
||||
download,
|
||||
ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info)
|
||||
elif result_type == 'playlist':
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
n_all_entries = len(ie_result['entries'])
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend', -1)
|
||||
|
||||
if playlistend == -1:
|
||||
entries = ie_result['entries'][playliststart:]
|
||||
else:
|
||||
entries = ie_result['entries'][playliststart:playlistend]
|
||||
|
||||
n_entries = len(entries)
|
||||
|
||||
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
|
||||
for i,entry in enumerate(entries,1):
|
||||
self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
|
||||
extra = {
|
||||
'playlist': playlist,
|
||||
'playlist_index': i + playliststart,
|
||||
}
|
||||
if not 'extractor' in entry:
|
||||
# We set the extractor, if it's an url it will be set then to
|
||||
# the new extractor, but if it's already a video we must make
|
||||
# sure it's present: see issue #877
|
||||
entry['extractor'] = ie_result['extractor']
|
||||
entry_result = self.process_ie_result(entry,
|
||||
download=download,
|
||||
extra_info=extra)
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
return ie_result
|
||||
elif result_type == 'compat_list':
|
||||
def _fixup(r):
|
||||
r.setdefault('extractor', ie_result['extractor'])
|
||||
return r
|
||||
ie_result['entries'] = [
|
||||
self.process_ie_result(_fixup(r), download=download)
|
||||
for r in ie_result['entries']
|
||||
]
|
||||
return ie_result
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result."""
|
||||
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
#We increment the download the download count here to match the previous behaviour.
|
||||
self.increment_downloads()
|
||||
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
if len(info_dict['title']) > 200:
|
||||
info_dict['title'] = info_dict['title'][:197] + u'...'
|
||||
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
|
||||
if not 'format' in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
reason = self._match_entry(info_dict)
|
||||
if reason is not None:
|
||||
self.to_screen(u'[download] ' + reason)
|
||||
return
|
||||
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
if max_downloads is not None:
|
||||
if self._num_downloads > int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
filename = self.prepare_filename(info_dict)
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
compat_print(info_dict['title'])
|
||||
if self.params.get('forceid', False):
|
||||
compat_print(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
compat_print(info_dict['url'])
|
||||
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
|
||||
compat_print(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and 'description' in info_dict:
|
||||
compat_print(info_dict['description'])
|
||||
if self.params.get('forcefilename', False) and filename is not None:
|
||||
compat_print(filename)
|
||||
if self.params.get('forceformat', False):
|
||||
compat_print(info_dict['format'])
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
return
|
||||
|
||||
if filename is None:
|
||||
return
|
||||
|
||||
try:
|
||||
dn = os.path.dirname(encodeFilename(filename))
|
||||
if dn != '' and not os.path.exists(dn):
|
||||
os.makedirs(dn)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to create directory ' + compat_str(err))
|
||||
return
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
try:
|
||||
descfn = filename + u'.description'
|
||||
self.report_writedescription(descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(info_dict['description'])
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write description file ' + descfn)
|
||||
return
|
||||
|
||||
if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
# that way it will silently go on when used with unsupporting IE
|
||||
subtitle = info_dict['subtitles'][0]
|
||||
(sub_error, sub_lang, sub) = subtitle
|
||||
sub_format = self.params.get('subtitlesformat')
|
||||
if sub_error:
|
||||
self.report_warning("Some error while getting the subtitles")
|
||||
else:
|
||||
try:
|
||||
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
|
||||
self.report_writesubtitles(sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
subtitles = info_dict['subtitles']
|
||||
sub_format = self.params.get('subtitlesformat')
|
||||
for subtitle in subtitles:
|
||||
(sub_error, sub_lang, sub) = subtitle
|
||||
if sub_error:
|
||||
self.report_warning("Some error while getting the subtitles")
|
||||
else:
|
||||
try:
|
||||
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
|
||||
self.report_writesubtitles(sub_filename)
|
||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||
subfile.write(sub)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write subtitles file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = filename + u'.info.json'
|
||||
self.report_writeinfojson(infofn)
|
||||
try:
|
||||
json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
|
||||
write_json_file(json_info_dict, encodeFilename(infofn))
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
||||
return
|
||||
|
||||
if self.params.get('writethumbnail', False):
|
||||
if info_dict.get('thumbnail') is not None:
|
||||
thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
|
||||
thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
|
||||
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
|
||||
(info_dict['extractor'], info_dict['id']))
|
||||
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
|
||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
||||
|
||||
if not self.params.get('skip_download', False):
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
||||
success = True
|
||||
else:
|
||||
try:
|
||||
success = self.fd._do_download(filename, info_dict)
|
||||
except (OSError, IOError) as err:
|
||||
raise UnavailableVideoError()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_error(u'unable to download video data: %s' % str(err))
|
||||
return
|
||||
except (ContentTooShortError, ) as err:
|
||||
self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
|
||||
return
|
||||
|
||||
if success:
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError) as err:
|
||||
self.report_error(u'postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
if len(url_list) > 1 and self.fixed_template():
|
||||
raise SameFileError(self.params['outtmpl'])
|
||||
|
||||
for url in url_list:
|
||||
try:
|
||||
#It also downloads the videos
|
||||
videos = self.extract_info(url)
|
||||
except UnavailableVideoError:
|
||||
self.report_error(u'unable to download video')
|
||||
except MaxDownloadsReached:
|
||||
self.to_screen(u'[info] Maximum number of downloaded files reached.')
|
||||
raise
|
||||
|
||||
return self._download_retcode
|
||||
|
||||
def post_process(self, filename, ie_info):
|
||||
"""Run all the postprocessors on the given file."""
|
||||
info = dict(ie_info)
|
||||
info['filepath'] = filename
|
||||
keep_video = None
|
||||
for pp in self._pps:
|
||||
try:
|
||||
keep_video_wish,new_info = pp.run(info)
|
||||
if keep_video_wish is not None:
|
||||
if keep_video_wish:
|
||||
keep_video = keep_video_wish
|
||||
elif keep_video is None:
|
||||
# No clear decision yet, let IE decide
|
||||
keep_video = keep_video_wish
|
||||
except PostProcessingError as e:
|
||||
self.to_stderr(u'ERROR: ' + e.msg)
|
||||
if keep_video is False and not self.params.get('keepvideo', False):
|
||||
try:
|
||||
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
|
||||
os.remove(encodeFilename(filename))
|
||||
except (IOError, OSError):
|
||||
self.report_warning(u'Unable to remove downloaded video file')
|
@@ -1,9 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import with_statement
|
||||
from __future__ import absolute_import
|
||||
|
||||
__authors__ = (
|
||||
'Ricardo Garcia Gonzalez',
|
||||
'Danny Colligan',
|
||||
@@ -23,13 +20,22 @@ __authors__ = (
|
||||
'Dave Vasilevsky',
|
||||
'Jaime Marquínez Ferrándiz',
|
||||
'Jeff Crouse',
|
||||
)
|
||||
'Osama Khalid',
|
||||
'Michael Walter',
|
||||
'M. Yasoob Ullah Khalid',
|
||||
'Julien Fraichard',
|
||||
'Johny Mo Swag',
|
||||
'Axel Noack',
|
||||
'Albert Kim',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import getpass
|
||||
import optparse
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import shlex
|
||||
import socket
|
||||
@@ -42,10 +48,11 @@ from .utils import *
|
||||
from .update import update_self
|
||||
from .version import __version__
|
||||
from .FileDownloader import *
|
||||
from .InfoExtractors import gen_extractors
|
||||
from .extractor import gen_extractors
|
||||
from .YoutubeDL import YoutubeDL
|
||||
from .PostProcessor import *
|
||||
|
||||
def parseOpts():
|
||||
def parseOpts(overrideArguments=None):
|
||||
def _readOptions(filename_bytes):
|
||||
try:
|
||||
optionf = open(filename_bytes)
|
||||
@@ -112,6 +119,7 @@ def parseOpts():
|
||||
selection = optparse.OptionGroup(parser, 'Video Selection')
|
||||
authentication = optparse.OptionGroup(parser, 'Authentication Options')
|
||||
video_format = optparse.OptionGroup(parser, 'Video Format Options')
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
||||
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
|
||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||
@@ -121,27 +129,26 @@ def parseOpts():
|
||||
general.add_option('-v', '--version',
|
||||
action='version', help='print program version and exit')
|
||||
general.add_option('-U', '--update',
|
||||
action='store_true', dest='update_self', help='update this program to latest version')
|
||||
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
|
||||
general.add_option('-i', '--ignore-errors',
|
||||
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
|
||||
general.add_option('-r', '--rate-limit',
|
||||
dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
|
||||
general.add_option('-R', '--retries',
|
||||
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
|
||||
general.add_option('--buffer-size',
|
||||
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
|
||||
general.add_option('--no-resize-buffer',
|
||||
action='store_true', dest='noresizebuffer',
|
||||
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
|
||||
general.add_option('--dump-user-agent',
|
||||
action='store_true', dest='dump_user_agent',
|
||||
help='display the current browser identification', default=False)
|
||||
general.add_option('--user-agent',
|
||||
dest='user_agent', help='specify a custom user agent', metavar='UA')
|
||||
general.add_option('--referer',
|
||||
dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
|
||||
metavar='REF', default=None)
|
||||
general.add_option('--list-extractors',
|
||||
action='store_true', dest='list_extractors',
|
||||
help='List all supported extractors and the URLs they would handle', default=False)
|
||||
general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
|
||||
general.add_option('--extractor-descriptions',
|
||||
action='store_true', dest='list_extractor_descriptions',
|
||||
help='Output descriptions of all supported extractors', default=False)
|
||||
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
|
||||
|
||||
selection.add_option('--playlist-start',
|
||||
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
|
||||
@@ -152,6 +159,9 @@ def parseOpts():
|
||||
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
|
||||
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
||||
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
||||
|
||||
|
||||
authentication.add_option('-u', '--username',
|
||||
@@ -160,10 +170,13 @@ def parseOpts():
|
||||
dest='password', metavar='PASSWORD', help='account password')
|
||||
authentication.add_option('-n', '--netrc',
|
||||
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
|
||||
authentication.add_option('--video-password',
|
||||
dest='videopassword', metavar='PASSWORD', help='video password (vimeo only)')
|
||||
|
||||
|
||||
video_format.add_option('-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT', help='video format code')
|
||||
action='store', dest='format', metavar='FORMAT',
|
||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
|
||||
video_format.add_option('--all-formats',
|
||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||
video_format.add_option('--prefer-free-formats',
|
||||
@@ -172,12 +185,38 @@ def parseOpts():
|
||||
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
|
||||
video_format.add_option('-F', '--list-formats',
|
||||
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
|
||||
video_format.add_option('--write-srt',
|
||||
video_format.add_option('--write-sub', '--write-srt',
|
||||
action='store_true', dest='writesubtitles',
|
||||
help='write video closed captions to a .srt file (currently youtube only)', default=False)
|
||||
video_format.add_option('--srt-lang',
|
||||
help='write subtitle file (currently youtube only)', default=False)
|
||||
video_format.add_option('--write-auto-sub', '--write-automatic-sub',
|
||||
action='store_true', dest='writeautomaticsub',
|
||||
help='write automatic subtitle file (currently youtube only)', default=False)
|
||||
video_format.add_option('--only-sub',
|
||||
action='store_true', dest='skip_download',
|
||||
help='[deprecated] alias of --skip-download', default=False)
|
||||
video_format.add_option('--all-subs',
|
||||
action='store_true', dest='allsubtitles',
|
||||
help='downloads all the available subtitles of the video (currently youtube only)', default=False)
|
||||
video_format.add_option('--list-subs',
|
||||
action='store_true', dest='listsubtitles',
|
||||
help='lists all available subtitles for the video (currently youtube only)', default=False)
|
||||
video_format.add_option('--sub-format',
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT',
|
||||
help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
|
||||
video_format.add_option('--sub-lang', '--srt-lang',
|
||||
action='store', dest='subtitleslang', metavar='LANG',
|
||||
help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
|
||||
help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
|
||||
|
||||
downloader.add_option('-r', '--rate-limit',
|
||||
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
|
||||
downloader.add_option('-R', '--retries',
|
||||
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
|
||||
downloader.add_option('--buffer-size',
|
||||
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
|
||||
downloader.add_option('--no-resize-buffer',
|
||||
action='store_true', dest='noresizebuffer',
|
||||
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
|
||||
downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
|
||||
|
||||
verbosity.add_option('-q', '--quiet',
|
||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||
@@ -189,6 +228,8 @@ def parseOpts():
|
||||
action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
|
||||
verbosity.add_option('-e', '--get-title',
|
||||
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
|
||||
verbosity.add_option('--get-id',
|
||||
action='store_true', dest='getid', help='simulate, quiet but print id', default=False)
|
||||
verbosity.add_option('--get-thumbnail',
|
||||
action='store_true', dest='getthumbnail',
|
||||
help='simulate, quiet but print thumbnail URL', default=False)
|
||||
@@ -201,6 +242,8 @@ def parseOpts():
|
||||
verbosity.add_option('--get-format',
|
||||
action='store_true', dest='getformat',
|
||||
help='simulate, quiet but print output format', default=False)
|
||||
verbosity.add_option('--newline',
|
||||
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
||||
verbosity.add_option('--no-progress',
|
||||
action='store_true', dest='noprogress', help='do not print progress bar', default=False)
|
||||
verbosity.add_option('--console-title',
|
||||
@@ -208,19 +251,33 @@ def parseOpts():
|
||||
help='display progress in console titlebar', default=False)
|
||||
verbosity.add_option('-v', '--verbose',
|
||||
action='store_true', dest='verbose', help='print various debugging information', default=False)
|
||||
|
||||
verbosity.add_option('--dump-intermediate-pages',
|
||||
action='store_true', dest='dump_intermediate_pages', default=False,
|
||||
help='print downloaded pages to debug problems(very verbose)')
|
||||
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='use title in file name', default=False)
|
||||
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
|
||||
filesystem.add_option('--id',
|
||||
action='store_true', dest='useid', help='use video ID in file name', default=False)
|
||||
action='store_true', dest='useid', help='use only video ID in file name', default=False)
|
||||
filesystem.add_option('-l', '--literal',
|
||||
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
|
||||
filesystem.add_option('-A', '--auto-number',
|
||||
action='store_true', dest='autonumber',
|
||||
help='number downloaded files starting from 00000', default=False)
|
||||
filesystem.add_option('-o', '--output',
|
||||
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')
|
||||
dest='outtmpl', metavar='TEMPLATE',
|
||||
help=('output filename template. Use %(title)s to get the title, '
|
||||
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
|
||||
'%(autonumber)s to get an automatically incremented number, '
|
||||
'%(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), '
|
||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||
'%(id)s for the video id , %(playlist)s for the playlist the video is in, '
|
||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||
'Use - to output to stdout. Can also be used to download to a different directory, '
|
||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||
filesystem.add_option('--autonumber-size',
|
||||
dest='autonumber_size', metavar='NUMBER',
|
||||
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
|
||||
filesystem.add_option('--restrict-filenames',
|
||||
action='store_true', dest='restrictfilenames',
|
||||
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
||||
@@ -246,6 +303,9 @@ def parseOpts():
|
||||
filesystem.add_option('--write-info-json',
|
||||
action='store_true', dest='writeinfojson',
|
||||
help='write video metadata to a .info.json file', default=False)
|
||||
filesystem.add_option('--write-thumbnail',
|
||||
action='store_true', dest='writethumbnail',
|
||||
help='write thumbnail image to disk', default=False)
|
||||
|
||||
|
||||
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
|
||||
@@ -264,24 +324,42 @@ def parseOpts():
|
||||
|
||||
parser.add_option_group(general)
|
||||
parser.add_option_group(selection)
|
||||
parser.add_option_group(downloader)
|
||||
parser.add_option_group(filesystem)
|
||||
parser.add_option_group(verbosity)
|
||||
parser.add_option_group(video_format)
|
||||
parser.add_option_group(authentication)
|
||||
parser.add_option_group(postproc)
|
||||
|
||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||
if xdg_config_home:
|
||||
userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
||||
if overrideArguments is not None:
|
||||
opts, args = parser.parse_args(overrideArguments)
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||
else:
|
||||
userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||
argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
|
||||
opts, args = parser.parse_args(argv)
|
||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||
if xdg_config_home:
|
||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
||||
else:
|
||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||
userConf = _readOptions(userConfFile)
|
||||
commandLineConf = sys.argv[1:]
|
||||
argv = systemConf + userConf + commandLineConf
|
||||
opts, args = parser.parse_args(argv)
|
||||
if opts.verbose:
|
||||
sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
|
||||
sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
|
||||
sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
|
||||
|
||||
return parser, opts, args
|
||||
|
||||
def _real_main():
|
||||
parser, opts, args = parseOpts()
|
||||
def _real_main(argv=None):
|
||||
# Compatibility fixes for Windows
|
||||
if sys.platform == 'win32':
|
||||
# https://github.com/rg3/youtube-dl/issues/820
|
||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
||||
|
||||
parser, opts, args = parseOpts(argv)
|
||||
|
||||
# Open appropriate CookieJar
|
||||
if opts.cookiefile is None:
|
||||
@@ -299,10 +377,14 @@ def _real_main():
|
||||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
std_headers['User-Agent'] = opts.user_agent
|
||||
|
||||
# Set referer
|
||||
if opts.referer is not None:
|
||||
std_headers['Referer'] = opts.referer
|
||||
|
||||
# Dump user agent
|
||||
if opts.dump_user_agent:
|
||||
print(std_headers['User-Agent'])
|
||||
compat_print(std_headers['User-Agent'])
|
||||
sys.exit(0)
|
||||
|
||||
# Batch file verification
|
||||
@@ -323,27 +405,50 @@ def _real_main():
|
||||
|
||||
# General configuration
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
if opts.proxy is not None:
|
||||
if opts.proxy == '':
|
||||
proxies = {}
|
||||
else:
|
||||
proxies = {'http': opts.proxy, 'https': opts.proxy}
|
||||
else:
|
||||
proxies = compat_urllib_request.getproxies()
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
https_handler = make_HTTPS_handler(opts)
|
||||
opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
|
||||
|
||||
extractors = gen_extractors()
|
||||
|
||||
if opts.list_extractors:
|
||||
for ie in extractors:
|
||||
print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
||||
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||
all_urls = [url for url in all_urls if url not in matchedUrls]
|
||||
for mu in matchedUrls:
|
||||
print(u' ' + mu)
|
||||
compat_print(u' ' + mu)
|
||||
sys.exit(0)
|
||||
if opts.list_extractor_descriptions:
|
||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
||||
if not ie._WORKING:
|
||||
continue
|
||||
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
||||
if hasattr(ie, 'SEARCH_KEY'):
|
||||
_SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
|
||||
_COUNTS = (u'', u'5', u'10', u'all')
|
||||
desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||
compat_print(desc)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
# Conflicting, missing and erroneous options
|
||||
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
||||
parser.error(u'using .netrc conflicts with giving username/password')
|
||||
if opts.password is not None and opts.username is None:
|
||||
parser.error(u'account username missing')
|
||||
parser.error(u' account username missing\n')
|
||||
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
||||
parser.error(u'using output template conflicts with using title, video ID or auto number')
|
||||
if opts.usetitle and opts.useid:
|
||||
@@ -397,6 +502,10 @@ def _real_main():
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
|
||||
parser.error(u'invalid video recode format specified')
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
date = DateRange(opts.dateafter, opts.datebefore)
|
||||
|
||||
if sys.version_info < (3,):
|
||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||
@@ -409,25 +518,29 @@ def _real_main():
|
||||
or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
|
||||
or (opts.useid and u'%(id)s.%(ext)s')
|
||||
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
||||
or u'%(id)s.%(ext)s')
|
||||
# File downloader
|
||||
fd = FileDownloader({
|
||||
or u'%(title)s-%(id)s.%(ext)s')
|
||||
|
||||
# YoutubeDL
|
||||
ydl = YoutubeDL({
|
||||
'usenetrc': opts.usenetrc,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'videopassword': opts.videopassword,
|
||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
'forceid': opts.getid,
|
||||
'forcethumbnail': opts.getthumbnail,
|
||||
'forcedescription': opts.getdescription,
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'simulate': opts.simulate,
|
||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'format': opts.format,
|
||||
'format_limit': opts.format_limit,
|
||||
'listformats': opts.listformats,
|
||||
'outtmpl': outtmpl,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'ratelimit': opts.ratelimit,
|
||||
@@ -437,6 +550,7 @@ def _real_main():
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'continuedl': opts.continue_dl,
|
||||
'noprogress': opts.noprogress,
|
||||
'progress_with_newline': opts.progress_with_newline,
|
||||
'playliststart': opts.playliststart,
|
||||
'playlistend': opts.playlistend,
|
||||
'logtostderr': opts.outtmpl == '-',
|
||||
@@ -445,45 +559,56 @@ def _real_main():
|
||||
'updatetime': opts.updatetime,
|
||||
'writedescription': opts.writedescription,
|
||||
'writeinfojson': opts.writeinfojson,
|
||||
'writethumbnail': opts.writethumbnail,
|
||||
'writesubtitles': opts.writesubtitles,
|
||||
'writeautomaticsub': opts.writeautomaticsub,
|
||||
'allsubtitles': opts.allsubtitles,
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
'subtitleslang': opts.subtitleslang,
|
||||
'matchtitle': opts.matchtitle,
|
||||
'rejecttitle': opts.rejecttitle,
|
||||
'matchtitle': decodeOption(opts.matchtitle),
|
||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
||||
'max_downloads': opts.max_downloads,
|
||||
'prefer_free_formats': opts.prefer_free_formats,
|
||||
'verbose': opts.verbose,
|
||||
'dump_intermediate_pages': opts.dump_intermediate_pages,
|
||||
'test': opts.test,
|
||||
'keepvideo': opts.keepvideo,
|
||||
'min_filesize': opts.min_filesize,
|
||||
'max_filesize': opts.max_filesize
|
||||
'max_filesize': opts.max_filesize,
|
||||
'daterange': date,
|
||||
})
|
||||
|
||||
if opts.verbose:
|
||||
fd.to_screen(u'[debug] youtube-dl version ' + __version__)
|
||||
sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||
try:
|
||||
sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
sp = subprocess.Popen(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
out, err = sp.communicate()
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
fd.to_screen(u'[debug] Git HEAD: ' + out)
|
||||
sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n')
|
||||
except:
|
||||
pass
|
||||
fd.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
|
||||
fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
|
||||
try:
|
||||
sys.exc_clear()
|
||||
except:
|
||||
pass
|
||||
sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
|
||||
sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
|
||||
|
||||
for extractor in extractors:
|
||||
fd.add_info_extractor(extractor)
|
||||
ydl.add_default_info_extractors()
|
||||
|
||||
# PostProcessors
|
||||
if opts.extractaudio:
|
||||
fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||
if opts.recodevideo:
|
||||
fd.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
update_self(fd.to_screen, opts.verbose, sys.argv[0])
|
||||
update_self(ydl.to_screen, opts.verbose, sys.argv[0])
|
||||
|
||||
# Maybe do nothing
|
||||
if len(all_urls) < 1:
|
||||
@@ -493,9 +618,9 @@ def _real_main():
|
||||
sys.exit()
|
||||
|
||||
try:
|
||||
retcode = fd.download(all_urls)
|
||||
retcode = ydl.download(all_urls)
|
||||
except MaxDownloadsReached:
|
||||
fd.to_screen(u'--max-download limit reached, aborting.')
|
||||
ydl.to_screen(u'--max-download limit reached, aborting.')
|
||||
retcode = 101
|
||||
|
||||
# Dump cookie jar if requested
|
||||
@@ -507,9 +632,9 @@ def _real_main():
|
||||
|
||||
sys.exit(retcode)
|
||||
|
||||
def main():
|
||||
def main(argv=None):
|
||||
try:
|
||||
_real_main()
|
||||
_real_main(argv)
|
||||
except DownloadError:
|
||||
sys.exit(1)
|
||||
except SameFileError:
|
||||
|
@@ -9,7 +9,8 @@ import sys
|
||||
if __package__ is None and not hasattr(sys, "frozen"):
|
||||
# direct call of __main__.py
|
||||
import os.path
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
path = os.path.realpath(os.path.abspath(__file__))
|
||||
sys.path.append(os.path.dirname(os.path.dirname(path)))
|
||||
|
||||
import youtube_dl
|
||||
|
||||
|
117
youtube_dl/extractor/__init__.py
Normal file
117
youtube_dl/extractor/__init__.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import ARDIE
|
||||
from .arte import ArteTvIE
|
||||
from .auengine import AUEngineIE
|
||||
from .bandcamp import BandcampIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE
|
||||
from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .cspan import CSpanIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .escapist import EscapistIE
|
||||
from .exfm import ExfmIE
|
||||
from .facebook import FacebookIE
|
||||
from .flickr import FlickrIE
|
||||
from .freesound import FreesoundIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .generic import GenericIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .hypem import HypemIE
|
||||
from .ign import IGNIE, OneUPIE
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .keek import KeekIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mtv import MTVIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .nba import NBAIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
from .sina import SinaIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .statigram import StatigramIE
|
||||
from .steam import SteamIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .ted import TEDIE
|
||||
from .tf1 import TF1IE
|
||||
from .thisav import ThisAVIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .ustream import UstreamIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
from .vimeo import VimeoIE
|
||||
from .vine import VineIE
|
||||
from .c56 import C56IE
|
||||
from .wat import WatIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .yahoo import YahooIE, YahooSearchIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
from .youporn import YouPornIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeFavouritesIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
||||
_ALL_CLASSES = [
|
||||
klass
|
||||
for name, klass in globals().items()
|
||||
if name.endswith('IE') and name != 'GenericIE'
|
||||
]
|
||||
_ALL_CLASSES.append(GenericIE)
|
||||
|
||||
def gen_extractors():
|
||||
""" Return a list of an instance of every supported extractor.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
return [klass() for klass in _ALL_CLASSES]
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
return globals()[ie_name+'IE']
|
67
youtube_dl/extractor/archiveorg.py
Normal file
67
youtube_dl/extractor/archiveorg.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_TEST = {
|
||||
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
||||
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
||||
u'md5': u'8af1d4cf447933ed3c7f4871162602db',
|
||||
u'info_dict': {
|
||||
u"title": u"1968 Demo - FJCC Conference Presentation Reel #1",
|
||||
u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
|
||||
u"upload_date": u"19681210",
|
||||
u"uploader": u"SRI International"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
json_url = url + (u'?' if u'?' in url else '&') + u'output=json'
|
||||
json_data = self._download_webpage(json_url, video_id)
|
||||
data = json.loads(json_data)
|
||||
|
||||
title = data['metadata']['title'][0]
|
||||
description = data['metadata']['description'][0]
|
||||
uploader = data['metadata']['creator'][0]
|
||||
upload_date = unified_strdate(data['metadata']['date'][0])
|
||||
|
||||
formats = [{
|
||||
'format': fdata['format'],
|
||||
'url': 'http://' + data['server'] + data['dir'] + fn,
|
||||
'file_size': int(fdata['size']),
|
||||
}
|
||||
for fn,fdata in data['files'].items()
|
||||
if 'Video' in fdata['format']]
|
||||
formats.sort(key=lambda fdata: fdata['file_size'])
|
||||
|
||||
info = {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
thumbnail = data.get('misc', {}).get('image')
|
||||
if thumbnail:
|
||||
info['thumbnail'] = thumbnail
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = determine_ext(formats[-1]['url'])
|
||||
|
||||
return info
|
54
youtube_dl/extractor/ard.py
Normal file
54
youtube_dl/extractor/ard.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
|
||||
_TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
|
||||
_MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640',
|
||||
u'file': u'14077640.mp4',
|
||||
u'md5': u'6ca8824255460c787376353f9e20bbd8',
|
||||
u'info_dict': {
|
||||
u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden"
|
||||
},
|
||||
u'skip': u'Requires rtmpdump'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
# determine video id from url
|
||||
m = re.match(self._VALID_URL, url)
|
||||
|
||||
numid = re.search(r'documentId=([0-9]+)', url)
|
||||
if numid:
|
||||
video_id = numid.group(1)
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
# determine title and media streams from webpage
|
||||
html = self._download_webpage(url, video_id)
|
||||
title = re.search(self._TITLE, html).group('title')
|
||||
streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)]
|
||||
if not streams:
|
||||
assert '"fsk"' in html
|
||||
raise ExtractorError(u'This video is only available after 8:00 pm')
|
||||
|
||||
# choose default media type and highest quality for now
|
||||
stream = max([s for s in streams if int(s["media_type"]) == 0],
|
||||
key=lambda s: int(s["quality"]))
|
||||
|
||||
# there's two possibilities: RTMP stream or HTTP download
|
||||
info = {'id': video_id, 'title': title, 'ext': 'mp4'}
|
||||
if stream['rtmp_url']:
|
||||
self.to_screen(u'RTMP download detected')
|
||||
assert stream['video_url'].startswith('mp4:')
|
||||
info["url"] = stream["rtmp_url"]
|
||||
info["play_path"] = stream['video_url']
|
||||
else:
|
||||
assert stream["video_url"].endswith('.mp4')
|
||||
info["url"] = stream["video_url"]
|
||||
return [info]
|
146
youtube_dl/extractor/arte.py
Normal file
146
youtube_dl/extractor/arte.py
Normal file
@@ -0,0 +1,146 @@
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
"""
|
||||
There are two sources of video in arte.tv: videos.arte.tv and
|
||||
www.arte.tv/guide, the extraction process is different for each one.
|
||||
The videos expire in 7 days, so we can't add tests.
|
||||
"""
|
||||
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
|
||||
IE_NAME = u'arte.tv'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
|
||||
|
||||
# TODO implement Live Stream
|
||||
# from ..utils import compat_urllib_parse
|
||||
# def extractLiveStream(self, url):
|
||||
# video_lang = url.split('/')[-4]
|
||||
# info = self.grep_webpage(
|
||||
# url,
|
||||
# r'src="(.*?/videothek_js.*?\.js)',
|
||||
# 0,
|
||||
# [
|
||||
# (1, 'url', u'Invalid URL: %s' % url)
|
||||
# ]
|
||||
# )
|
||||
# http_host = url.split('/')[2]
|
||||
# next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
|
||||
# info = self.grep_webpage(
|
||||
# next_url,
|
||||
# r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
|
||||
# '(http://.*?\.swf).*?' +
|
||||
# '(rtmp://.*?)\'',
|
||||
# re.DOTALL,
|
||||
# [
|
||||
# (1, 'path', u'could not extract video path: %s' % url),
|
||||
# (2, 'player', u'could not extract video player: %s' % url),
|
||||
# (3, 'url', u'could not extract video url: %s' % url)
|
||||
# ]
|
||||
# )
|
||||
# video_url = u'%s/%s' % (info.get('url'), info.get('path'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._EMISSION_URL, url)
|
||||
if mobj is not None:
|
||||
lang = mobj.group('lang')
|
||||
# This is not a real id, it can be for example AJT for the news
|
||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||
video_id = mobj.group('id')
|
||||
return self._extract_emission(url, video_id, lang)
|
||||
|
||||
mobj = re.match(self._VIDEOS_URL, url)
|
||||
if mobj is not None:
|
||||
id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_video(url, id, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, video_id) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
# return
|
||||
|
||||
def _extract_emission(self, url, video_id, lang):
|
||||
"""Extract from www.arte.tv/guide"""
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
|
||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
||||
self.report_extraction(video_id)
|
||||
info = json.loads(json_info)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
info_dict = {'id': player_info['VID'],
|
||||
'title': player_info['VTI'],
|
||||
'description': player_info['VDE'],
|
||||
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
|
||||
'thumbnail': player_info['programImage'],
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
||||
formats = player_info['VSR'].values()
|
||||
def _match_lang(f):
|
||||
# Return true if that format is in the language of the url
|
||||
if lang == 'fr':
|
||||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
formats = filter(_match_lang, formats)
|
||||
# We order the formats by quality
|
||||
formats = sorted(formats, key=lambda f: int(f['height']))
|
||||
# Pick the best quality
|
||||
format_info = formats[-1]
|
||||
if format_info['mediaType'] == u'rtmp':
|
||||
info_dict['url'] = format_info['streamer']
|
||||
info_dict['play_path'] = 'mp4:' + format_info['url']
|
||||
else:
|
||||
info_dict['url'] = format_info['url']
|
||||
|
||||
return info_dict
|
||||
|
||||
def _extract_video(self, url, video_id, lang):
|
||||
"""Extract from videos.arte.tv"""
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||
|
||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||
def _key(m):
|
||||
quality = m.group('quality')
|
||||
if quality == 'hd':
|
||||
return 2
|
||||
else:
|
||||
return 1
|
||||
# We pick the best quality
|
||||
video_urls = sorted(video_urls, key=_key)
|
||||
video_url = list(video_urls)[-1].group('url')
|
||||
|
||||
title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
|
||||
thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
|
||||
config_xml, 'thumbnail')
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
46
youtube_dl/extractor/auengine.py
Normal file
46
youtube_dl/extractor/auengine.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
|
||||
class AUEngineIE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
||||
u'file': u'lfvlytY6.mp4',
|
||||
u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f',
|
||||
u'info_dict': {
|
||||
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
||||
webpage, u'title')
|
||||
title = title.strip()
|
||||
links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage)
|
||||
links = [compat_urllib_parse.unquote(l) for l in links]
|
||||
for link in links:
|
||||
root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path)
|
||||
if pathext == '.png':
|
||||
thumbnail = link
|
||||
elif pathext == '.mp4':
|
||||
url = link
|
||||
ext = pathext
|
||||
if ext == title[-len(ext):]:
|
||||
title = title[:-len(ext)]
|
||||
ext = ext[1:]
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}]
|
63
youtube_dl/extractor/bandcamp.py
Normal file
63
youtube_dl/extractor/bandcamp.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
u'file': u'1812978515.mp3',
|
||||
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
|
||||
u'info_dict': {
|
||||
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
||||
},
|
||||
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
# We get the link to the free download page
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if m_download is None:
|
||||
raise ExtractorError(u'No free songs found')
|
||||
|
||||
download_link = m_download.group(1)
|
||||
id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
|
||||
webpage, re.MULTILINE|re.DOTALL).group('id')
|
||||
|
||||
download_webpage = self._download_webpage(download_link, id,
|
||||
'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascrip code
|
||||
info = re.search(r'items: (.*?),$',
|
||||
download_webpage, re.MULTILINE).group(1)
|
||||
info = json.loads(info)[0]
|
||||
# We pick mp3-320 for now, until format selection can be easily implemented.
|
||||
mp3_info = info[u'downloads'][u'mp3-320']
|
||||
# If we try to use this url it says the link has expired
|
||||
initial_url = mp3_info[u'url']
|
||||
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
|
||||
m_url = re.match(re_url, initial_url)
|
||||
#We build the url we will use to get the final track url
|
||||
# This url is build in Bandcamp in the script download_bunde_*.js
|
||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts'))
|
||||
final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url')
|
||||
# If we could correctly generate the .rand field the url would be
|
||||
#in the "download_url" key
|
||||
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
|
||||
|
||||
track_info = {'id':id,
|
||||
'title' : info[u'title'],
|
||||
'ext' : 'mp3',
|
||||
'url' : final_url,
|
||||
'thumbnail' : info[u'thumb_url'],
|
||||
'uploader' : info[u'artist']
|
||||
}
|
||||
|
||||
return [track_info]
|
193
youtube_dl/extractor/bliptv.py
Normal file
193
youtube_dl/extractor/bliptv.py
Normal file
@@ -0,0 +1,193 @@
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class BlipTVIE(InfoExtractor):
|
||||
"""Information extractor for blip.tv"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
|
||||
_URL_EXT = r'^.*\.([a-z0-9]+)$'
|
||||
IE_NAME = u'blip.tv'
|
||||
_TEST = {
|
||||
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
u'file': u'5779306.m4v',
|
||||
u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20111205",
|
||||
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
|
||||
u"uploader": u"Comic Book Resources - CBR TV",
|
||||
u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
|
||||
}
|
||||
}
|
||||
|
||||
def report_direct_download(self, title):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Direct download detected' % title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
# See https://github.com/rg3/youtube-dl/issues/857
|
||||
api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
|
||||
if api_mobj is not None:
|
||||
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
if urlp.path.startswith('/play/'):
|
||||
request = compat_urllib_request.Request(url)
|
||||
response = compat_urllib_request.urlopen(request)
|
||||
redirecturl = response.geturl()
|
||||
rurlp = compat_urllib_parse_urlparse(redirecturl)
|
||||
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
|
||||
url = 'http://blip.tv/a/a-' + file_id
|
||||
return self._real_extract(url)
|
||||
|
||||
|
||||
if '?' in url:
|
||||
cchar = '&'
|
||||
else:
|
||||
cchar = '?'
|
||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||
request = compat_urllib_request.Request(json_url)
|
||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||
self.report_extraction(mobj.group(1))
|
||||
info = None
|
||||
try:
|
||||
urlh = compat_urllib_request.urlopen(request)
|
||||
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
||||
basename = url.split('/')[-1]
|
||||
title,ext = os.path.splitext(basename)
|
||||
title = title.decode('UTF-8')
|
||||
ext = ext.replace('.', '')
|
||||
self.report_direct_download(title)
|
||||
info = {
|
||||
'id': title,
|
||||
'url': url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
'urlhandle': urlh
|
||||
}
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
|
||||
if info is None: # Regular URL
|
||||
try:
|
||||
json_code_bytes = urlh.read()
|
||||
json_code = json_code_bytes.decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
|
||||
|
||||
try:
|
||||
json_data = json.loads(json_code)
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
else:
|
||||
data = json_data
|
||||
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
if 'additionalMedia' in data:
|
||||
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
|
||||
best_format = formats[-1]
|
||||
video_url = best_format['url']
|
||||
else:
|
||||
video_url = data['media']['url']
|
||||
umobj = re.match(self._URL_EXT, video_url)
|
||||
if umobj is None:
|
||||
raise ValueError('Can not determine filename extension')
|
||||
ext = umobj.group(1)
|
||||
|
||||
info = {
|
||||
'id': data['item_id'],
|
||||
'url': video_url,
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'ext': ext,
|
||||
'format': data['media']['mimeType'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'player_url': data['embedUrl'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
}
|
||||
except (ValueError,KeyError) as err:
|
||||
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
|
||||
|
||||
return [info]
|
||||
|
||||
|
||||
class BlipTVUserIE(InfoExtractor):
|
||||
"""Information Extractor for blip.tv users."""
|
||||
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
||||
_PAGE_SIZE = 12
|
||||
IE_NAME = u'blip.tv:user'
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract username
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
username = mobj.group(1)
|
||||
|
||||
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
||||
|
||||
page = self._download_webpage(url, username, u'Downloading user page')
|
||||
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
||||
page_base = page_base % mobj.group(1)
|
||||
|
||||
|
||||
# Download video ids using BlipTV Ajax calls. Result size per
|
||||
# query is limited (currently to 12 videos) so we need to query
|
||||
# page by page until there are no video ids - it means we got
|
||||
# all of them.
|
||||
|
||||
video_ids = []
|
||||
pagenum = 1
|
||||
|
||||
while True:
|
||||
url = page_base + "&page=" + str(pagenum)
|
||||
page = self._download_webpage(url, username,
|
||||
u'Downloading video ids from page %d' % pagenum)
|
||||
|
||||
# Extract video identifiers
|
||||
ids_in_page = []
|
||||
|
||||
for mobj in re.finditer(r'href="/([^"]+)"', page):
|
||||
if mobj.group(1) not in ids_in_page:
|
||||
ids_in_page.append(unescapeHTML(mobj.group(1)))
|
||||
|
||||
video_ids.extend(ids_in_page)
|
||||
|
||||
# A little optimization - if current page is not
|
||||
# "full", ie. does not contain PAGE_SIZE video ids then
|
||||
# we can assume that this page is the last one - there
|
||||
# are no more ids on further pages - no need to query
|
||||
# again.
|
||||
|
||||
if len(ids_in_page) < self._PAGE_SIZE:
|
||||
break
|
||||
|
||||
pagenum += 1
|
||||
|
||||
urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
38
youtube_dl/extractor/breakcom.py
Normal file
38
youtube_dl/extractor/breakcom.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||
u'file': u'2468056.mp4',
|
||||
u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b',
|
||||
u'info_dict': {
|
||||
u"title": u"When Girls Act Like D-Bags"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1).split("-")[-1]
|
||||
embed_url = 'http://www.break.com/embed/%s' % video_id
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
||||
u'info json', flags=re.DOTALL)
|
||||
info = json.loads(info_json)
|
||||
video_url = info['videoUri']
|
||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
||||
if m_youtube is not None:
|
||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||
final_url = video_url + '?' + info['AuthToken']
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': determine_ext(final_url),
|
||||
'title': info['contentName'],
|
||||
'thumbnail': info['thumbUri'],
|
||||
}]
|
86
youtube_dl/extractor/brightcove.py
Normal file
86
youtube_dl/extractor/brightcove.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
find_xpath_attr,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
class BrightcoveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||
_PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
|
||||
|
||||
# There is a test for Brigtcove in GenericIE, that way we test both the download
|
||||
# and the detection of videos, and we don't have to find an URL that is always valid
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url(cls, object_str):
|
||||
"""
|
||||
Build a Brightcove url from a xml string containing
|
||||
<object class="BrightcoveExperience">{params}</object>
|
||||
"""
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str)
|
||||
assert u'BrightcoveExperience' in object_doc.attrib['class']
|
||||
params = {'flashID': object_doc.attrib['id'],
|
||||
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
||||
}
|
||||
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
|
||||
# Not all pages define this value
|
||||
if playerKey is not None:
|
||||
params['playerKey'] = playerKey.attrib['value']
|
||||
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
|
||||
if videoPlayer is not None:
|
||||
params['@videoPlayer'] = videoPlayer.attrib['value']
|
||||
data = compat_urllib_parse.urlencode(params)
|
||||
return cls._FEDERATED_URL_TEMPLATE % data
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query_str = mobj.group('query')
|
||||
query = compat_urlparse.parse_qs(query_str)
|
||||
|
||||
videoPlayer = query.get('@videoPlayer')
|
||||
if videoPlayer:
|
||||
return self._get_video_info(videoPlayer[0], query_str)
|
||||
else:
|
||||
player_key = query['playerKey']
|
||||
return self._get_playlist_info(player_key[0])
|
||||
|
||||
def _get_video_info(self, video_id, query):
|
||||
request_url = self._FEDERATED_URL_TEMPLATE % query
|
||||
webpage = self._download_webpage(request_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
||||
info = json.loads(info)['data']
|
||||
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
||||
|
||||
return self._extract_video_info(video_info)
|
||||
|
||||
def _get_playlist_info(self, player_key):
|
||||
playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
|
||||
player_key, u'Downloading playlist information')
|
||||
|
||||
playlist_info = json.loads(playlist_info)['videoList']
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
renditions = video_info['renditions']
|
||||
renditions = sorted(renditions, key=lambda r: r['size'])
|
||||
best_format = renditions[-1]
|
||||
|
||||
return {'id': video_info['id'],
|
||||
'title': video_info['displayName'],
|
||||
'url': best_format['defaultURL'],
|
||||
'ext': 'mp4',
|
||||
'description': video_info.get('shortDescription'),
|
||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||
'uploader': video_info.get('publisherName'),
|
||||
}
|
36
youtube_dl/extractor/c56.py
Normal file
36
youtube_dl/extractor/c56.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
class C56IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
|
||||
IE_NAME = u'56.com'
|
||||
|
||||
_TEST ={
|
||||
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||
u'file': u'93440716.mp4',
|
||||
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
|
||||
u'info_dict': {
|
||||
u'title': u'网事知多少 第32期:车怒',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
text_id = mobj.group('textid')
|
||||
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
|
||||
text_id, u'Downloading video info')
|
||||
info = json.loads(info_page)['info']
|
||||
best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
|
||||
video_url = best_format['url']
|
||||
|
||||
return {'id': info['vid'],
|
||||
'title': info['Subject'],
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': info.get('bimg') or info.get('img'),
|
||||
}
|
46
youtube_dl/extractor/canalplus.py
Normal file
46
youtube_dl/extractor/canalplus.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||
IE_NAME = u'canalplus.fr'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861',
|
||||
u'file': u'889861.flv',
|
||||
u'md5': u'590a888158b5f0d6832f84001fbf3e99',
|
||||
u'info_dict': {
|
||||
u'title': u'Le Petit Journal 20/06/13 - La guerre des drone',
|
||||
u'upload_date': u'20130620',
|
||||
},
|
||||
u'skip': u'Requires rtmpdump'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||
info_page = self._download_webpage(info_url,video_id,
|
||||
u'Downloading video info')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
|
||||
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||
infos = video_info.find('INFOS')
|
||||
media = video_info.find('MEDIA')
|
||||
formats = [media.find('VIDEOS/%s' % format)
|
||||
for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
|
||||
video_url = [format.text for format in formats if format is not None][-1]
|
||||
|
||||
return {'id': video_id,
|
||||
'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text,
|
||||
infos.find('TITRAGE/SOUS_TITRE').text),
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
||||
'thumbnail': media.find('IMAGES/GRAND').text,
|
||||
}
|
73
youtube_dl/extractor/collegehumor.py
Normal file
73
youtube_dl/extractor/collegehumor.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
u'file': u'6902724.mp4',
|
||||
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||
u'info_dict': {
|
||||
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
self.report_extraction(video_id)
|
||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
||||
u'Downloading info XML',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||
try:
|
||||
videoNode = mdoc.findall('./video')[0]
|
||||
youtubeIdNode = videoNode.find('./youtubeID')
|
||||
if youtubeIdNode is not None:
|
||||
return self.url_result(youtubeIdNode.text, 'Youtube')
|
||||
info['description'] = videoNode.findall('./description')[0].text
|
||||
info['title'] = videoNode.findall('./caption')[0].text
|
||||
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
|
||||
manifest_url = videoNode.findall('./file')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid metadata XML file')
|
||||
|
||||
manifest_url += '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
return [info]
|
194
youtube_dl/extractor/comedycentral.py
Normal file
194
youtube_dl/extractor/comedycentral.py
Normal file
@@ -0,0 +1,194 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ComedyCentralIE(InfoExtractor):
|
||||
IE_DESC = u'The Daily Show / Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow or :colbert
|
||||
# urls for episodes like:
|
||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
||||
|(https?://)?(www\.)?
|
||||
(?P<showname>thedailyshow|colbertnation)\.com/
|
||||
(full-episodes/(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
|
||||
$"""
|
||||
_TEST = {
|
||||
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
u'file': u'422212.mp4',
|
||||
u'md5': u'4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20121214",
|
||||
u"description": u"Kristen Stewart",
|
||||
u"uploader": u"thedailyshow",
|
||||
u"title": u"thedailyshow-kristen-stewart part 1"
|
||||
}
|
||||
}
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
|
||||
_video_extensions = {
|
||||
'3500': 'mp4',
|
||||
'2200': 'mp4',
|
||||
'1700': 'mp4',
|
||||
'1200': 'mp4',
|
||||
'750': 'mp4',
|
||||
'400': 'mp4',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'3500': '1280x720',
|
||||
'2200': '960x540',
|
||||
'1700': '768x432',
|
||||
'1200': '640x360',
|
||||
'750': '512x288',
|
||||
'400': '384x216',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def _print_formats(self, formats):
|
||||
print('Available formats:')
|
||||
for x in formats:
|
||||
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
if mobj.group('shortname'):
|
||||
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
||||
url = u'http://www.thedailyshow.com/full-episodes/'
|
||||
else:
|
||||
url = u'http://www.colbertnation.com/full-episodes/'
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
assert mobj is not None
|
||||
|
||||
if mobj.group('clip'):
|
||||
if mobj.group('showname') == 'thedailyshow':
|
||||
epTitle = mobj.group('tdstitle')
|
||||
else:
|
||||
epTitle = mobj.group('cntitle')
|
||||
dlNewest = False
|
||||
elif mobj.group('interview'):
|
||||
epTitle = mobj.group('interview_title')
|
||||
dlNewest = False
|
||||
else:
|
||||
dlNewest = not mobj.group('episode')
|
||||
if dlNewest:
|
||||
epTitle = mobj.group('showname')
|
||||
else:
|
||||
epTitle = mobj.group('episode')
|
||||
|
||||
self.report_extraction(epTitle)
|
||||
webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||
if dlNewest:
|
||||
url = htmlHandle.geturl()
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid redirected URL: ' + url)
|
||||
if mobj.group('episode') == '':
|
||||
raise ExtractorError(u'Redirected URL is still not specific: ' + url)
|
||||
epTitle = mobj.group('episode')
|
||||
|
||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||
|
||||
if len(mMovieParams) == 0:
|
||||
# The Colbert Report embeds the information in a without
|
||||
# a URL prefix; so extract the alternate reference
|
||||
# and then add the URL prefix manually.
|
||||
|
||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
|
||||
if len(altMovieParams) == 0:
|
||||
raise ExtractorError(u'unable to find Flash URL in webpage ' + url)
|
||||
else:
|
||||
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||
indexXml = self._download_webpage(indexUrl, epTitle,
|
||||
u'Downloading show index',
|
||||
u'unable to download episode index')
|
||||
|
||||
results = []
|
||||
|
||||
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
||||
itemEls = idoc.findall('.//item')
|
||||
for partNum,itemEl in enumerate(itemEls):
|
||||
mediaId = itemEl.findall('./guid')[0].text
|
||||
shortMediaId = mediaId.split(':')[-1]
|
||||
showId = mediaId.split(':')[-2].replace('.com', '')
|
||||
officialTitle = itemEl.findall('./title')[0].text
|
||||
officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||
|
||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||
configXml = self._download_webpage(configUrl, epTitle,
|
||||
u'Downloading configuration for %s' % shortMediaId)
|
||||
|
||||
cdoc = xml.etree.ElementTree.fromstring(configXml)
|
||||
turls = []
|
||||
for rendition in cdoc.findall('.//rendition'):
|
||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||
turls.append(finfo)
|
||||
|
||||
if len(turls) == 0:
|
||||
self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
|
||||
continue
|
||||
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats([i[0] for i in turls])
|
||||
return
|
||||
|
||||
# For now, just pick the highest bitrate
|
||||
format,rtmp_video_url = turls[-1]
|
||||
|
||||
# Get the format arg from the arg stream
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
|
||||
# Select format if we can find one
|
||||
for f,v in turls:
|
||||
if f == req_format:
|
||||
format, rtmp_video_url = f, v
|
||||
break
|
||||
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
|
||||
if not m:
|
||||
raise ExtractorError(u'Cannot transform RTMP url')
|
||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||
video_url = base + m.group('finalid')
|
||||
|
||||
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
|
||||
info = {
|
||||
'id': shortMediaId,
|
||||
'url': video_url,
|
||||
'uploader': showId,
|
||||
'upload_date': officialDate,
|
||||
'title': effTitle,
|
||||
'ext': 'mp4',
|
||||
'format': format,
|
||||
'thumbnail': None,
|
||||
'description': compat_str(officialTitle),
|
||||
}
|
||||
results.append(info)
|
||||
|
||||
return results
|
332
youtube_dl/extractor/common.py
Normal file
332
youtube_dl/extractor/common.py
Normal file
@@ -0,0 +1,332 @@
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
import netrc
|
||||
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
class InfoExtractor(object):
|
||||
"""Information Extractor class.
|
||||
|
||||
Information extractors are the classes that, given a URL, extract
|
||||
information about the video (or videos) the URL refers to. This
|
||||
information includes the real video URL, the video title, author and
|
||||
others. The information is stored in a dictionary which is then
|
||||
passed to the FileDownloader. The FileDownloader processes this
|
||||
information possibly downloading the video to the file system, among
|
||||
other possible outcomes.
|
||||
|
||||
The dictionaries must include the following fields:
|
||||
|
||||
id: Video identifier.
|
||||
url: Final video URL.
|
||||
title: Video title, unescaped.
|
||||
ext: Video filename extension.
|
||||
|
||||
The following fields are optional:
|
||||
|
||||
format: The video format, defaults to ext (used for --get-format)
|
||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||
"url") for the varying thumbnails
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: One-line video description.
|
||||
uploader: Full name of the video uploader.
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
location: Physical location of the video.
|
||||
player_url: SWF Player URL (used for rtmpdump).
|
||||
subtitles: The subtitle file contents.
|
||||
view_count: How many users have watched the video on the platform.
|
||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||
like returned by urllib.request.urlopen
|
||||
|
||||
The fields should all be Unicode strings.
|
||||
|
||||
Subclasses of this one should re-define the _real_initialize() and
|
||||
_real_extract() methods and define a _VALID_URL regexp.
|
||||
Probably, they should also be added to the list of extractors.
|
||||
|
||||
_real_extract() must return a *list* of information dictionaries as
|
||||
described above.
|
||||
|
||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
"""
|
||||
|
||||
_ready = False
|
||||
_downloader = None
|
||||
_WORKING = True
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
"""Constructor. Receives an optional downloader."""
|
||||
self._ready = False
|
||||
self.set_downloader(downloader)
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
"""Getter method for _WORKING."""
|
||||
return cls._WORKING
|
||||
|
||||
def initialize(self):
|
||||
"""Initializes an instance (authentication, etc)."""
|
||||
if not self._ready:
|
||||
self._real_initialize()
|
||||
self._ready = True
|
||||
|
||||
def extract(self, url):
|
||||
"""Extracts URL information and returns it in list of dicts."""
|
||||
self.initialize()
|
||||
return self._real_extract(url)
|
||||
|
||||
def set_downloader(self, downloader):
|
||||
"""Sets the downloader for this IE."""
|
||||
self._downloader = downloader
|
||||
|
||||
def _real_initialize(self):
|
||||
"""Real initialization process. Redefine in subclasses."""
|
||||
pass
|
||||
|
||||
def _real_extract(self, url):
|
||||
"""Real extraction process. Redefine in subclasses."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def IE_NAME(self):
|
||||
return type(self).__name__[:-2]
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
||||
""" Returns the response handle """
|
||||
if note is None:
|
||||
self.report_download_webpage(video_id)
|
||||
elif note is not False:
|
||||
self.to_screen(u'%s: %s' % (video_id, note))
|
||||
try:
|
||||
return compat_urllib_request.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if errnote is None:
|
||||
errnote = u'Unable to download webpage'
|
||||
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
|
||||
content_type = urlh.headers.get('Content-Type', '')
|
||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||
if m:
|
||||
encoding = m.group(1)
|
||||
else:
|
||||
encoding = 'utf-8'
|
||||
webpage_bytes = urlh.read()
|
||||
if self._downloader.params.get('dump_intermediate_pages', False):
|
||||
try:
|
||||
url = url_or_request.get_full_url()
|
||||
except AttributeError:
|
||||
url = url_or_request
|
||||
self.to_screen(u'Dumping request to ' + url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
content = webpage_bytes.decode(encoding, 'replace')
|
||||
return (content, urlh)
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
||||
""" Returns the data of the page as a string """
|
||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
||||
|
||||
def to_screen(self, msg):
|
||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
||||
|
||||
def report_extraction(self, id_or_name):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Extracting information' % id_or_name)
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
"""Report webpage download."""
|
||||
self.to_screen(u'%s: Downloading webpage' % video_id)
|
||||
|
||||
def report_age_confirmation(self):
|
||||
"""Report attempt to confirm age."""
|
||||
self.to_screen(u'Confirming age')
|
||||
|
||||
def report_login(self):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
#Methods for following #608
|
||||
def url_result(self, url, ie=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
#TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
'url': url,
|
||||
'ie_key': ie}
|
||||
return video_info
|
||||
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
||||
"""Returns a playlist"""
|
||||
video_info = {'_type': 'playlist',
|
||||
'entries': entries}
|
||||
if playlist_id:
|
||||
video_info['id'] = playlist_id
|
||||
if playlist_title:
|
||||
video_info['title'] = playlist_title
|
||||
return video_info
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
|
||||
"""
|
||||
Perform a regex search on the given string, using a single or a list of
|
||||
patterns returning the first matching group.
|
||||
In case of failure return a default value or raise a WARNING or a
|
||||
ExtractorError, depending on fatal, specifying the field name.
|
||||
"""
|
||||
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||
mobj = re.search(pattern, string, flags)
|
||||
else:
|
||||
for p in pattern:
|
||||
mobj = re.search(p, string, flags)
|
||||
if mobj: break
|
||||
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
_name = u'\033[0;34m%s\033[0m' % name
|
||||
else:
|
||||
_name = name
|
||||
|
||||
if mobj:
|
||||
# return the first matching group
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
elif default is not None:
|
||||
return default
|
||||
elif fatal:
|
||||
raise ExtractorError(u'Unable to extract %s' % _name)
|
||||
else:
|
||||
self._downloader.report_warning(u'unable to extract %s; '
|
||||
u'please report this issue on http://yt-dl.org/bug' % _name)
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
|
||||
"""
|
||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||
"""
|
||||
res = self._search_regex(pattern, string, name, default, fatal, flags)
|
||||
if res:
|
||||
return clean_html(res).strip()
|
||||
else:
|
||||
return res
|
||||
|
||||
def _get_login_info(self):
|
||||
"""
|
||||
Get the the login info as (username, password)
|
||||
It will look in the netrc file using the _NETRC_MACHINE value
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
if self._downloader is None:
|
||||
return (None, None)
|
||||
|
||||
username = None
|
||||
password = None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username', None) is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
||||
|
||||
return (username, password)
|
||||
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
def _og_regex(prop):
|
||||
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
if name is None:
|
||||
name = 'OpenGraph %s' % prop
|
||||
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
|
||||
return unescapeHTML(escaped)
|
||||
|
||||
def _og_search_thumbnail(self, html, **kargs):
|
||||
return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
|
||||
|
||||
def _og_search_description(self, html, **kargs):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
|
||||
def _og_search_title(self, html, **kargs):
|
||||
return self._og_search_property('title', html, **kargs)
|
||||
|
||||
def _og_search_video_url(self, html, name='video url', **kargs):
|
||||
return self._html_search_regex([self._og_regex('video:secure_url'),
|
||||
self._og_regex('video')],
|
||||
html, name, **kargs)
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
Base class for paged search queries extractors.
|
||||
They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query}
|
||||
Instances should define _SEARCH_KEY and _MAX_RESULTS.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def _make_valid_url(cls):
|
||||
return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._make_valid_url(), url) is not None
|
||||
|
||||
def _real_extract(self, query):
|
||||
mobj = re.match(self._make_valid_url(), query)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid search query "%s"' % query)
|
||||
|
||||
prefix = mobj.group('prefix')
|
||||
query = mobj.group('query')
|
||||
if prefix == '':
|
||||
return self._get_n_results(query, 1)
|
||||
elif prefix == 'all':
|
||||
return self._get_n_results(query, self._MAX_RESULTS)
|
||||
else:
|
||||
n = int(prefix)
|
||||
if n <= 0:
|
||||
raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
|
||||
elif n > self._MAX_RESULTS:
|
||||
self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
|
||||
n = self._MAX_RESULTS
|
||||
return self._get_n_results(query, n)
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
raise NotImplementedError("This method must be implemented by sublclasses")
|
||||
|
||||
@property
|
||||
def SEARCH_KEY(self):
|
||||
return self._SEARCH_KEY
|
106
youtube_dl/extractor/condenast.py
Normal file
106
youtube_dl/extractor/condenast.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
orderedSet,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class CondeNastIE(InfoExtractor):
|
||||
"""
|
||||
Condé Nast is a media group, some of its sites use a custom HTML5 player
|
||||
that works the same in all of them.
|
||||
"""
|
||||
|
||||
# The keys are the supported sites and the values are the name to be shown
|
||||
# to the user and in the extractor description.
|
||||
_SITES = {'wired': u'WIRED',
|
||||
'gq': u'GQ',
|
||||
'vogue': u'Vogue',
|
||||
'glamour': u'Glamour',
|
||||
'wmagazine': u'W Magazine',
|
||||
'vanityfair': u'Vanity Fair',
|
||||
}
|
||||
|
||||
_VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
|
||||
IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||
u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
|
||||
u'md5': u'1921f713ed48aabd715691f774c451f7',
|
||||
u'info_dict': {
|
||||
u'title': u'3D Printed Speakers Lit With LED',
|
||||
u'description': u'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||
}
|
||||
}
|
||||
|
||||
def _extract_series(self, url, webpage):
|
||||
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||
webpage, u'series title', flags=re.DOTALL)
|
||||
url_object = compat_urllib_parse_urlparse(url)
|
||||
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
||||
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
|
||||
webpage, flags=re.DOTALL)
|
||||
paths = orderedSet(m.group(1) for m in m_paths)
|
||||
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||
return self.playlist_result(entries, playlist_title=title)
|
||||
|
||||
def _extract_video(self, webpage):
|
||||
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
|
||||
r'<div class="video-post-content">(.+?)</div>',
|
||||
],
|
||||
webpage, u'description',
|
||||
fatal=False, flags=re.DOTALL)
|
||||
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
||||
u'player params', flags=re.DOTALL)
|
||||
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
|
||||
player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
|
||||
target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
|
||||
data = compat_urllib_parse.urlencode({'videoId': video_id,
|
||||
'playerId': player_id,
|
||||
'target': target,
|
||||
})
|
||||
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
|
||||
webpage, u'base info url',
|
||||
default='http://player.cnevids.com/player/loader.js?')
|
||||
info_url = base_info_url + data
|
||||
info_page = self._download_webpage(info_url, video_id,
|
||||
u'Downloading video info')
|
||||
video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
|
||||
video_info = json.loads(video_info)
|
||||
|
||||
def _formats_sort_key(f):
|
||||
type_ord = 1 if f['type'] == 'video/mp4' else 0
|
||||
quality_ord = 1 if f['quality'] == 'high' else 0
|
||||
return (quality_ord, type_ord)
|
||||
best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
|
||||
|
||||
return {'id': video_id,
|
||||
'url': best_format['src'],
|
||||
'ext': best_format['type'].split('/')[-1],
|
||||
'title': video_info['title'],
|
||||
'thumbnail': video_info['poster_frame'],
|
||||
'description': description,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site = mobj.group('site')
|
||||
url_type = mobj.group('type')
|
||||
id = mobj.group('id')
|
||||
|
||||
self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
|
||||
webpage = self._download_webpage(url, id)
|
||||
|
||||
if url_type == 'series':
|
||||
return self._extract_series(url, webpage)
|
||||
else:
|
||||
return self._extract_video(webpage)
|
40
youtube_dl/extractor/criterion.py
Normal file
40
youtube_dl/extractor/criterion.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
class CriterionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+'
|
||||
_TEST = {
|
||||
u'url': u'http://www.criterion.com/films/184-le-samourai',
|
||||
u'file': u'184.mp4',
|
||||
u'md5': u'bc51beba55685509883a9a7830919ec3',
|
||||
u'info_dict': {
|
||||
u"title": u"Le Samouraï",
|
||||
u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;',
|
||||
webpage, 'video url')
|
||||
title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />',
|
||||
webpage, 'video title')
|
||||
description = self._html_search_regex(r'<meta name="description" content="(.+?)" />',
|
||||
webpage, 'video description')
|
||||
thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
webpage, 'thumbnail url')
|
||||
|
||||
return {'id': video_id,
|
||||
'url' : final_url,
|
||||
'title': title,
|
||||
'ext': determine_ext(final_url),
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
51
youtube_dl/extractor/cspan.py
Normal file
51
youtube_dl/extractor/cspan.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
|
||||
u'file': u'315139.flv',
|
||||
u'md5': u'74a623266956f69e4df0068ab6c80fe4',
|
||||
u'info_dict': {
|
||||
u"title": u"Attorney General Eric Holder on Voting Rights Act Decision"
|
||||
},
|
||||
u'skip': u'Requires rtmpdump'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
prog_name = mobj.group(1)
|
||||
webpage = self._download_webpage(url, prog_name)
|
||||
video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
|
||||
data = compat_urllib_parse.urlencode({'programid': video_id,
|
||||
'dynamic':'1'})
|
||||
info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
|
||||
video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
title = self._html_search_regex(r'<string name="title">(.*?)</string>',
|
||||
video_info, 'title')
|
||||
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
|
||||
webpage, 'description',
|
||||
flags=re.MULTILINE|re.DOTALL)
|
||||
|
||||
url = self._search_regex(r'<string name="URL">(.*?)</string>',
|
||||
video_info, 'video url')
|
||||
url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
|
||||
path = self._search_regex(r'<string name="path">(.*?)</string>',
|
||||
video_info, 'rtmp play path')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
'ext': 'flv',
|
||||
'url': url,
|
||||
'play_path': path,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
79
youtube_dl/extractor/dailymotion.py
Normal file
79
youtube_dl/extractor/dailymotion.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class DailymotionIE(InfoExtractor):
|
||||
"""Information Extractor for Dailymotion"""
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
|
||||
IE_NAME = u'dailymotion'
|
||||
_TEST = {
|
||||
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
||||
u'file': u'x33vw9.mp4',
|
||||
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
|
||||
u'info_dict': {
|
||||
u"uploader": u"Alex and Van .",
|
||||
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id and simplified title from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
||||
|
||||
video_extension = 'mp4'
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.add_header('Cookie', 'family_filter=off')
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
||||
# Looking for official user
|
||||
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
||||
webpage, 'video uploader')
|
||||
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
||||
|
||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
u'Downloading embed page')
|
||||
info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
|
||||
info = json.loads(info)
|
||||
|
||||
# TODO: support choosing qualities
|
||||
|
||||
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
|
||||
'stream_h264_hq_url','stream_h264_url',
|
||||
'stream_h264_ld_url']:
|
||||
if info.get(key):#key in info and info[key]:
|
||||
max_quality = key
|
||||
self.to_screen(u'Using %s' % key)
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
video_url = info[max_quality]
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'title': self._og_search_title(webpage),
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbnail_url']
|
||||
}]
|
60
youtube_dl/extractor/depositfiles.py
Normal file
60
youtube_dl/extractor/depositfiles.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import re
|
||||
import os
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class DepositFilesIE(InfoExtractor):
|
||||
"""Information extractor for depositfiles.com"""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
file_id = url.split('/')[-1]
|
||||
# Rebuild url in english locale
|
||||
url = 'http://depositfiles.com/en/files/' + file_id
|
||||
|
||||
# Retrieve file webpage with 'Free download' button pressed
|
||||
free_download_indication = { 'gateway_result' : '1' }
|
||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
|
||||
try:
|
||||
self.report_download_webpage(file_id)
|
||||
webpage = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
|
||||
|
||||
# Search for the real file URL
|
||||
mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
|
||||
if (mobj is None) or (mobj.group(1) is None):
|
||||
# Try to figure out reason of the error.
|
||||
mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
|
||||
if (mobj is not None) and (mobj.group(1) is not None):
|
||||
restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
|
||||
raise ExtractorError(u'%s' % restriction_message)
|
||||
else:
|
||||
raise ExtractorError(u'Unable to extract download URL from: %s' % url)
|
||||
|
||||
file_url = mobj.group(1)
|
||||
file_extension = os.path.splitext(file_url)[1][1:]
|
||||
|
||||
# Search for file title
|
||||
file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
|
||||
|
||||
return [{
|
||||
'id': file_id.decode('utf-8'),
|
||||
'url': file_url.decode('utf-8'),
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': file_title,
|
||||
'ext': file_extension.decode('utf-8'),
|
||||
}]
|
41
youtube_dl/extractor/dotsub.py
Normal file
41
youtube_dl/extractor/dotsub.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?dotsub\.com/view/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||
u'file': u'aed3b8b2-1889-4df5-ae63-ad85f5572f27.flv',
|
||||
u'md5': u'0914d4d69605090f623b7ac329fea66e',
|
||||
u'info_dict': {
|
||||
u"title": u"Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary",
|
||||
u"uploader": u"4v4l0n42",
|
||||
u'description': u'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
||||
u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||
u'upload_date': u'20101213',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
info_url = "https://dotsub.com/api/media/%s/metadata" %(video_id)
|
||||
webpage = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(webpage)
|
||||
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': info['mediaURI'],
|
||||
'ext': 'flv',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['screenshotURI'],
|
||||
'description': info['description'],
|
||||
'uploader': info['user'],
|
||||
'view_count': info['numberOfViews'],
|
||||
'upload_date': u'%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
||||
}]
|
85
youtube_dl/extractor/dreisat.py
Normal file
85
youtube_dl/extractor/dreisat.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.webm',
|
||||
u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
|
||||
u'info_dict': {
|
||||
u"title": u"Kaffeeland Schweiz",
|
||||
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
|
||||
u"uploader": u"3sat",
|
||||
u"upload_date": u"20130622"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
||||
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
'width': te.attrib['key'].partition('x')[0],
|
||||
'height': te.attrib['key'].partition('x')[2],
|
||||
'url': te.text,
|
||||
} for te in thumbnail_els]
|
||||
|
||||
information_el = details_doc.find('.//information')
|
||||
video_title = information_el.find('./title').text
|
||||
video_description = information_el.find('./detail').text
|
||||
|
||||
details_el = details_doc.find('.//details')
|
||||
video_uploader = details_el.find('./channel').text
|
||||
upload_date = unified_strdate(details_el.find('./airtime').text)
|
||||
|
||||
format_els = details_doc.findall('.//formitaet')
|
||||
formats = [{
|
||||
'format_id': fe.attrib['basetype'],
|
||||
'width': int(fe.find('./width').text),
|
||||
'height': int(fe.find('./height').text),
|
||||
'url': fe.find('./url').text,
|
||||
'filesize': int(fe.find('./filesize').text),
|
||||
'video_bitrate': int(fe.find('./videoBitrate').text),
|
||||
'3sat_qualityname': fe.find('./quality').text,
|
||||
} for fe in format_els
|
||||
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
|
||||
|
||||
def _sortkey(format):
|
||||
qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
|
||||
prefer_http = 1 if 'rtmp' in format['url'] else 0
|
||||
return (qidx, prefer_http, format['video_bitrate'])
|
||||
formats.sort(key=_sortkey)
|
||||
|
||||
info = {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'description': video_description,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnails[-1]['url'],
|
||||
'uploader': video_uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info['url'] = formats[-1]['url']
|
||||
info['ext'] = determine_ext(formats[-1]['url'])
|
||||
|
||||
return info
|
46
youtube_dl/extractor/ehow.py
Normal file
46
youtube_dl/extractor/ehow.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
determine_ext
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EHowIE(InfoExtractor):
|
||||
IE_NAME = u'eHow'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
|
||||
u'file': u'12245069.flv',
|
||||
u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
|
||||
u'info_dict': {
|
||||
u"title": u"Hardwood Flooring Basics",
|
||||
u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
|
||||
u"uploader": u"Erick Nathan"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
||||
webpage, u'video URL')
|
||||
final_url = compat_urllib_parse.unquote(video_url)
|
||||
uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
|
||||
webpage, u'uploader')
|
||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||
ext = determine_ext(final_url)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
122
youtube_dl/extractor/eighttracks.py
Normal file
122
youtube_dl/extractor/eighttracks.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EightTracksIE(InfoExtractor):
|
||||
IE_NAME = '8tracks'
|
||||
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||
_TEST = {
|
||||
u"name": u"EightTracks",
|
||||
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||
u"playlist": [
|
||||
{
|
||||
u"file": u"11885610.m4a",
|
||||
u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
|
||||
u"info_dict": {
|
||||
u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885608.m4a",
|
||||
u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885679.m4a",
|
||||
u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885680.m4a",
|
||||
u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885682.m4a",
|
||||
u"md5": u"1893e872e263a2705558d1d319ad19e8",
|
||||
u"info_dict": {
|
||||
u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885683.m4a",
|
||||
u"md5": u"b673c46f47a216ab1741ae8836af5899",
|
||||
u"info_dict": {
|
||||
u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885684.m4a",
|
||||
u"md5": u"1d74534e95df54986da7f5abf7d842b7",
|
||||
u"info_dict": {
|
||||
u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"11885685.m4a",
|
||||
u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
|
||||
u"info_dict": {
|
||||
u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
|
||||
u"uploader_id": u"ytdl"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
|
||||
data = json.loads(json_like)
|
||||
|
||||
session = str(random.randint(0, 1000000000))
|
||||
mix_id = data['id']
|
||||
track_count = data['tracks_count']
|
||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||
next_url = first_url
|
||||
res = []
|
||||
for i in itertools.count():
|
||||
api_json = self._download_webpage(next_url, playlist_id,
|
||||
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
|
||||
errnote=u'Failed to download song information')
|
||||
api_data = json.loads(api_json)
|
||||
track_data = api_data[u'set']['track']
|
||||
info = {
|
||||
'id': track_data['id'],
|
||||
'url': track_data['track_file_stream_url'],
|
||||
'title': track_data['performer'] + u' - ' + track_data['name'],
|
||||
'raw_title': track_data['name'],
|
||||
'uploader_id': data['user']['login'],
|
||||
'ext': 'm4a',
|
||||
}
|
||||
res.append(info)
|
||||
if api_data['set']['at_last_track']:
|
||||
break
|
||||
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
|
||||
return res
|
74
youtube_dl/extractor/escapist.py
Normal file
74
youtube_dl/extractor/escapist.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
||||
u'md5': u'c6793dbda81388f4264c1ba18684a74d',
|
||||
u'info_dict': {
|
||||
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||
u"uploader": u"the-escapist-presents",
|
||||
u"title": u"Breaking Down Baldur's Gate"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
showName = mobj.group('showname')
|
||||
videoId = mobj.group('episode')
|
||||
|
||||
self.report_extraction(videoId)
|
||||
webpage = self._download_webpage(url, videoId)
|
||||
|
||||
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
||||
webpage, u'description', fatal=False)
|
||||
|
||||
playerUrl = self._og_search_video_url(webpage, name='player url')
|
||||
|
||||
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
||||
webpage, u'player url').split(' : ')[-1]
|
||||
|
||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
|
||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
||||
|
||||
configJSON = self._download_webpage(configUrl, videoId,
|
||||
u'Downloading configuration',
|
||||
u'unable to download configuration')
|
||||
|
||||
# Technically, it's JavaScript, not JSON
|
||||
configJSON = configJSON.replace("'", '"')
|
||||
|
||||
try:
|
||||
config = json.loads(configJSON)
|
||||
except (ValueError,) as err:
|
||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
||||
|
||||
playlist = config['playlist']
|
||||
videoUrl = playlist[1]['url']
|
||||
|
||||
info = {
|
||||
'id': videoId,
|
||||
'url': videoUrl,
|
||||
'uploader': showName,
|
||||
'upload_date': None,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': videoDesc,
|
||||
'player_url': playerUrl,
|
||||
}
|
||||
|
||||
return [info]
|
54
youtube_dl/extractor/exfm.py
Normal file
54
youtube_dl/extractor/exfm.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ExfmIE(InfoExtractor):
|
||||
IE_NAME = u'exfm'
|
||||
IE_DESC = u'ex.fm'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'95223130.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop - Miley Cyrus",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'upload_date': u'20130603',
|
||||
u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
|
||||
},
|
||||
u'note': u'Soundcloud song',
|
||||
},
|
||||
{
|
||||
u'url': u'http://ex.fm/song/wddt8',
|
||||
u'file': u'wddt8.mp3',
|
||||
u'md5': u'966bd70741ac5b8570d8e45bfaed3643',
|
||||
u'info_dict': {
|
||||
u'title': u'Safe and Sound',
|
||||
u'uploader': u'Capital Cities',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group(1)
|
||||
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
|
||||
webpage = self._download_webpage(info_url, song_id)
|
||||
info = json.loads(webpage)
|
||||
song_url = info['song']['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream',''), 'Soundcloud')
|
||||
return [{
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
'ext': 'mp3',
|
||||
'title': info['song']['title'],
|
||||
'thumbnail': info['song']['image']['large'],
|
||||
'uploader': info['song']['artist'],
|
||||
'view_count': info['song']['loved_count'],
|
||||
}]
|
120
youtube_dl/extractor/facebook.py
Normal file
120
youtube_dl/extractor/facebook.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import json
|
||||
import netrc
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FacebookIE(InfoExtractor):
|
||||
"""Information Extractor for Facebook"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
||||
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = u'facebook'
|
||||
_TEST = {
|
||||
u'url': u'https://www.facebook.com/photo.php?v=120708114770723',
|
||||
u'file': u'120708114770723.mp4',
|
||||
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
||||
u'info_dict': {
|
||||
u"duration": 279,
|
||||
u"title": u"PEOPLE ARE AWESOME 2013"
|
||||
}
|
||||
}
|
||||
|
||||
def report_login(self):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
|
||||
useremail = None
|
||||
password = None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username', None) is not None:
|
||||
useremail = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
useremail = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
if useremail is None:
|
||||
return
|
||||
|
||||
# Log in
|
||||
login_form = {
|
||||
'email': useremail,
|
||||
'pass': password,
|
||||
'login': 'Log+In'
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
try:
|
||||
self.report_login()
|
||||
login_results = compat_urllib_request.urlopen(request).read()
|
||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||
return
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('ID')
|
||||
|
||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});\n'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||
if not m:
|
||||
raise ExtractorError(u'Cannot parse data')
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
video_data = params['video_data'][0]
|
||||
video_url = video_data.get('hd_src')
|
||||
if not video_url:
|
||||
video_url = video_data['sd_src']
|
||||
if not video_url:
|
||||
raise ExtractorError(u'Cannot find video URL')
|
||||
video_duration = int(video_data['video_duration'])
|
||||
thumbnail = video_data['thumbnail_src']
|
||||
|
||||
video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
|
||||
webpage, u'title')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'duration': video_duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
return [info]
|
58
youtube_dl/extractor/flickr.py
Normal file
58
youtube_dl/extractor/flickr.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class FlickrIE(InfoExtractor):
|
||||
"""Information Extractor for Flickr videos"""
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
||||
_TEST = {
|
||||
u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
|
||||
u'file': u'5645318632.mp4',
|
||||
u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b',
|
||||
u'info_dict': {
|
||||
u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
||||
u"uploader_id": u"forestwander-nature-pictures",
|
||||
u"title": u"Dark Hollow Waterfalls"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
video_uploader_id = mobj.group('uploader_id')
|
||||
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
|
||||
|
||||
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
|
||||
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
|
||||
|
||||
node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
|
||||
first_xml, u'node_id')
|
||||
|
||||
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
|
||||
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id': video_uploader_id,
|
||||
}]
|
36
youtube_dl/extractor/freesound.py
Normal file
36
youtube_dl/extractor/freesound.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
class FreesoundIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.freesound.org/people/miklovan/sounds/194503/',
|
||||
u'file': u'194503.mp3',
|
||||
u'md5': u'12280ceb42c81f19a515c745eae07650',
|
||||
u'info_dict': {
|
||||
u"title": u"gulls in the city.wav",
|
||||
u"uploader" : u"miklovan",
|
||||
u'description': u'the sounds of seagulls in the city',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
music_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, music_id)
|
||||
title = self._html_search_regex(r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
|
||||
webpage, 'music title', flags=re.DOTALL)
|
||||
music_url = self._og_search_property('audio', webpage, 'music url')
|
||||
description = self._html_search_regex(r'<div id="sound_description">(.*?)</div>',
|
||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||
|
||||
return [{
|
||||
'id': music_id,
|
||||
'title': title,
|
||||
'url': music_url,
|
||||
'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'),
|
||||
'ext': determine_ext(music_url),
|
||||
'description': description,
|
||||
}]
|
37
youtube_dl/extractor/funnyordie.py
Normal file
37
youtube_dl/extractor/funnyordie.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FunnyOrDieIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
u'file': u'0732f586d7.mp4',
|
||||
u'md5': u'f647e9e90064b53b6e046e75d0241fbd',
|
||||
u'info_dict': {
|
||||
u"description": u"Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.",
|
||||
u"title": u"Heart-Shaped Box: Literal Video Version"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
|
||||
webpage, u'video URL', flags=re.DOTALL)
|
||||
|
||||
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
|
||||
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
return [info]
|
55
youtube_dl/extractor/gamespot.py
Normal file
55
youtube_dl/extractor/gamespot.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
|
||||
u"file": u"6410818.mp4",
|
||||
u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
|
||||
u"info_dict": {
|
||||
u"title": u"Arma III - Community Guide: SITREP I",
|
||||
u"upload_date": u"20130627",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = mobj.group('page_id')
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"',
|
||||
r'http://www\.gamespot\.com/videoembed/(\d+)'],
|
||||
webpage, 'video id')
|
||||
data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'})
|
||||
info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data
|
||||
info_xml = self._download_webpage(info_url, video_id)
|
||||
doc = xml.etree.ElementTree.fromstring(info_xml)
|
||||
clip_el = doc.find('./playList/clip')
|
||||
|
||||
http_urls = [{'url': node.find('filePath').text,
|
||||
'rate': int(node.find('rate').text)}
|
||||
for node in clip_el.find('./httpURI')]
|
||||
best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1]
|
||||
video_url = best_quality['url']
|
||||
title = clip_el.find('./title').text
|
||||
ext = video_url.rpartition('.')[2]
|
||||
thumbnail_url = clip_el.find('./screenGrabURI').text
|
||||
view_count = int(clip_el.find('./views').text)
|
||||
upload_date = unified_strdate(clip_el.find('./postDate').text)
|
||||
|
||||
return [{
|
||||
'id' : video_id,
|
||||
'url' : video_url,
|
||||
'ext' : ext,
|
||||
'title' : title,
|
||||
'thumbnail' : thumbnail_url,
|
||||
'upload_date' : upload_date,
|
||||
'view_count' : view_count,
|
||||
}]
|
36
youtube_dl/extractor/gametrailers.py
Normal file
36
youtube_dl/extractor/gametrailers.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import re
|
||||
|
||||
from .mtv import MTVIE, _media_xml_tag
|
||||
|
||||
class GametrailersIE(MTVIE):
|
||||
"""
|
||||
Gametrailers use the same videos system as MTVIE, it just changes the feed
|
||||
url, where the uri is and the method to get the thumbnails.
|
||||
"""
|
||||
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
||||
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
|
||||
u'md5': u'4c8e67681a0ea7ec241e8c09b3ea8cf7',
|
||||
u'info_dict': {
|
||||
u'title': u'E3 2013: Debut Trailer',
|
||||
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
|
||||
},
|
||||
}
|
||||
# Overwrite MTVIE properties we don't want
|
||||
_TESTS = []
|
||||
|
||||
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||
return itemdoc.find(search_path).attrib['url']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mgid = self._search_regex([r'data-video="(?P<mgid>mgid:.*?)"',
|
||||
r'data-contentId=\'(?P<mgid>mgid:.*?)\''],
|
||||
webpage, u'mgid')
|
||||
return self._get_videos_info(mgid)
|
182
youtube_dl/extractor/generic.py
Normal file
182
youtube_dl/extractor/generic.py
Normal file
@@ -0,0 +1,182 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
IE_DESC = u'Generic downloader that works on some sites'
|
||||
_VALID_URL = r'.*'
|
||||
IE_NAME = u'generic'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||
u'file': u'13601338388002.mp4',
|
||||
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
|
||||
u'info_dict': {
|
||||
u"uploader": u"www.hodiho.fr",
|
||||
u"title": u"R\u00e9gis plante sa Jeep"
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/',
|
||||
u'file': u'2371591881001.mp4',
|
||||
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
|
||||
u'note': u'Test Brightcove downloads and detection in GenericIE',
|
||||
u'info_dict': {
|
||||
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
||||
u'uploader': u'8TV',
|
||||
u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
"""Report webpage download."""
|
||||
if not self._downloader.params.get('test', False):
|
||||
self._downloader.report_warning(u'Falling back on generic information extractor.')
|
||||
super(GenericIE, self).report_download_webpage(video_id)
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
"""Report information extraction."""
|
||||
self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
|
||||
|
||||
def _test_redirect(self, url):
|
||||
"""Check if it is a redirect, like url shorteners, in case return the new url."""
|
||||
class HeadRequest(compat_urllib_request.Request):
|
||||
def get_method(self):
|
||||
return "HEAD"
|
||||
|
||||
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||
"""
|
||||
Subclass the HTTPRedirectHandler to make it use our
|
||||
HeadRequest also on the redirected URL
|
||||
"""
|
||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||
if code in (301, 302, 303, 307):
|
||||
newurl = newurl.replace(' ', '%20')
|
||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||
if k.lower() not in ("content-length", "content-type"))
|
||||
return HeadRequest(newurl,
|
||||
headers=newheaders,
|
||||
origin_req_host=req.get_origin_req_host(),
|
||||
unverifiable=True)
|
||||
else:
|
||||
raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
|
||||
|
||||
class HTTPMethodFallback(compat_urllib_request.BaseHandler):
|
||||
"""
|
||||
Fallback to GET if HEAD is not allowed (405 HTTP error)
|
||||
"""
|
||||
def http_error_405(self, req, fp, code, msg, headers):
|
||||
fp.read()
|
||||
fp.close()
|
||||
|
||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||
if k.lower() not in ("content-length", "content-type"))
|
||||
return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
|
||||
headers=newheaders,
|
||||
origin_req_host=req.get_origin_req_host(),
|
||||
unverifiable=True))
|
||||
|
||||
# Build our opener
|
||||
opener = compat_urllib_request.OpenerDirector()
|
||||
for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
|
||||
HTTPMethodFallback, HEADRedirectHandler,
|
||||
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
|
||||
opener.add_handler(handler())
|
||||
|
||||
response = opener.open(HeadRequest(url))
|
||||
if response is None:
|
||||
raise ExtractorError(u'Invalid URL protocol')
|
||||
new_url = response.geturl()
|
||||
|
||||
if url == new_url:
|
||||
return False
|
||||
|
||||
self.report_following_redirect(new_url)
|
||||
return new_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url: return [self.url_result(new_url)]
|
||||
|
||||
video_id = url.split('/')[-1]
|
||||
try:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
except ValueError:
|
||||
# since this is the last-resort InfoExtractor, if
|
||||
# this error is thrown, it'll be thrown here
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
# Look for BrigthCove:
|
||||
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
|
||||
if m_brightcove is not None:
|
||||
self.to_screen(u'Brightcove video detected.')
|
||||
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
|
||||
return self.url_result(bc_url, 'Brightcove')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit
|
||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit: JWPlayer JS loader
|
||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
# Try to find twitter cards info
|
||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
if mobj is None:
|
||||
# We look for Open Graph info:
|
||||
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
|
||||
m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
# It's possible that one of the regexes
|
||||
# matched, but returned an empty group:
|
||||
if mobj.group(1) is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
video_id = os.path.basename(video_url)
|
||||
|
||||
# here's a fun little line of code for you:
|
||||
video_extension = os.path.splitext(video_id)[1][1:]
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
||||
url, u'video uploader')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
}]
|
96
youtube_dl/extractor/googleplus.py
Normal file
96
youtube_dl/extractor/googleplus.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class GooglePlusIE(InfoExtractor):
|
||||
IE_DESC = u'Google Plus'
|
||||
_VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
|
||||
IE_NAME = u'plus.google'
|
||||
_TEST = {
|
||||
u"url": u"https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
|
||||
u"file": u"ZButuJc6CtH.flv",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120613",
|
||||
u"uploader": u"井上ヨシマサ",
|
||||
u"title": u"嘆きの天使 降臨"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
post_url = mobj.group(0)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
video_extension = 'flv'
|
||||
|
||||
# Step 1, Retrieve post webpage to extract further information
|
||||
webpage = self._download_webpage(post_url, video_id, u'Downloading entry webpage')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Extract update date
|
||||
upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
|
||||
webpage, u'upload date', fatal=False)
|
||||
if upload_date:
|
||||
# Convert timestring to a format suitable for filename
|
||||
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
|
||||
upload_date = upload_date.strftime('%Y%m%d')
|
||||
|
||||
# Extract uploader
|
||||
uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>',
|
||||
webpage, u'uploader', fatal=False)
|
||||
|
||||
# Extract title
|
||||
# Get the first line for title
|
||||
video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
|
||||
webpage, 'title', default=u'NA')
|
||||
|
||||
# Step 2, Simulate clicking the image box to launch video
|
||||
DOMAIN = 'https://plus.google.com'
|
||||
video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
|
||||
webpage, u'video page URL')
|
||||
if not video_page.startswith(DOMAIN):
|
||||
video_page = DOMAIN + video_page
|
||||
|
||||
webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
|
||||
|
||||
# Extract video links on video page
|
||||
"""Extract video links of all sizes"""
|
||||
pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
|
||||
mobj = re.findall(pattern, webpage)
|
||||
if len(mobj) == 0:
|
||||
raise ExtractorError(u'Unable to extract video links')
|
||||
|
||||
# Sort in resolution
|
||||
links = sorted(mobj)
|
||||
|
||||
# Choose the lowest of the sort, i.e. highest resolution
|
||||
video_url = links[-1]
|
||||
# Only get the url. The resolution part in the tuple has no use anymore
|
||||
video_url = video_url[-1]
|
||||
# Treat escaped \u0026 style hex
|
||||
try:
|
||||
video_url = video_url.decode("unicode_escape")
|
||||
except AttributeError: # Python 3
|
||||
video_url = bytes(video_url, 'ascii').decode('unicode-escape')
|
||||
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
}]
|
39
youtube_dl/extractor/googlesearch.py
Normal file
39
youtube_dl/extractor/googlesearch.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import SearchInfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class GoogleSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = u'Google Video search'
|
||||
_MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"'
|
||||
_MAX_RESULTS = 1000
|
||||
IE_NAME = u'video.google:search'
|
||||
_SEARCH_KEY = 'gvsearch'
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
|
||||
res = {
|
||||
'_type': 'playlist',
|
||||
'id': query,
|
||||
'entries': []
|
||||
}
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10)
|
||||
webpage = self._download_webpage(result_url, u'gvsearch:' + query,
|
||||
note='Downloading result page ' + str(pagenum))
|
||||
|
||||
for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage):
|
||||
e = {
|
||||
'_type': 'url',
|
||||
'url': mobj.group(1)
|
||||
}
|
||||
res['entries'].append(e)
|
||||
|
||||
if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage):
|
||||
return res
|
44
youtube_dl/extractor/hotnewhiphop.py
Normal file
44
youtube_dl/extractor/hotnewhiphop.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HotNewHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
|
||||
_TEST = {
|
||||
u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'",
|
||||
u'file': u'1435540.mp3',
|
||||
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
u'info_dict': {
|
||||
u"title": u"Freddie Gibbs Songs - Lay It Down"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
webpage_src = self._download_webpage(url, video_id)
|
||||
|
||||
video_url_base64 = self._search_regex(r'data-path="(.*?)"',
|
||||
webpage_src, u'video URL', fatal=False)
|
||||
|
||||
if video_url_base64 == None:
|
||||
video_url = self._search_regex(r'"contentUrl" content="(.*?)"', webpage_src,
|
||||
u'video URL')
|
||||
return self.url_result(video_url, ie='Youtube')
|
||||
|
||||
video_url = base64.b64decode(video_url_base64).decode('utf-8')
|
||||
|
||||
video_title = self._html_search_regex(r"<title>(.*)</title>",
|
||||
webpage_src, u'title')
|
||||
|
||||
results = [{
|
||||
'id': video_id,
|
||||
'url' : video_url,
|
||||
'title' : video_title,
|
||||
'thumbnail' : self._og_search_thumbnail(webpage_src),
|
||||
'ext' : 'mp3',
|
||||
}]
|
||||
return results
|
46
youtube_dl/extractor/howcast.py
Normal file
46
youtube_dl/extractor/howcast.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HowcastIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||
u'file': u'390161.mp4',
|
||||
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
|
||||
u'info_dict': {
|
||||
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
||||
u"title": u"How to Tie a Square Knot Properly"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'http://www.howcast.com/videos/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
||||
webpage, u'video URL')
|
||||
|
||||
video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
|
||||
webpage, u'title')
|
||||
|
||||
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
||||
webpage, u'description', fatal=False)
|
||||
|
||||
thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
|
||||
webpage, u'thumbnail', fatal=False)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': thumbnail,
|
||||
}]
|
71
youtube_dl/extractor/hypem.py
Normal file
71
youtube_dl/extractor/hypem.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class HypemIE(InfoExtractor):
|
||||
"""Information Extractor for hypem"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
|
||||
u'file': u'1v6ga.mp3',
|
||||
u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
|
||||
u'info_dict': {
|
||||
u"title": u"Tame"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
track_id = mobj.group(1)
|
||||
|
||||
data = { 'ax': 1, 'ts': time.time() }
|
||||
data_encoded = compat_urllib_parse.urlencode(data)
|
||||
complete_url = url + "?" + data_encoded
|
||||
request = compat_urllib_request.Request(complete_url)
|
||||
response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
|
||||
cookie = urlh.headers.get('Set-Cookie', '')
|
||||
|
||||
self.report_extraction(track_id)
|
||||
|
||||
html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
|
||||
response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
|
||||
try:
|
||||
track_list = json.loads(html_tracks)
|
||||
track = track_list[u'tracks'][0]
|
||||
except ValueError:
|
||||
raise ExtractorError(u'Hypemachine contained invalid JSON.')
|
||||
|
||||
key = track[u"key"]
|
||||
track_id = track[u"id"]
|
||||
artist = track[u"artist"]
|
||||
title = track[u"song"]
|
||||
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
|
||||
request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
|
||||
request.add_header('cookie', cookie)
|
||||
song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
|
||||
try:
|
||||
song_data = json.loads(song_data_json)
|
||||
except ValueError:
|
||||
raise ExtractorError(u'Hypemachine contained invalid JSON.')
|
||||
final_url = song_data[u"url"]
|
||||
|
||||
return [{
|
||||
'id': track_id,
|
||||
'url': final_url,
|
||||
'ext': "mp3",
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
}]
|
91
youtube_dl/extractor/ign.py
Normal file
91
youtube_dl/extractor/ign.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class IGNIE(InfoExtractor):
|
||||
"""
|
||||
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||
Some videos of it.ign.com are also supported
|
||||
"""
|
||||
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
|
||||
IE_NAME = u'ign.com'
|
||||
|
||||
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
|
||||
_DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
|
||||
r'id="my_show_video">.*?<p>(.*?)</p>',
|
||||
]
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||
u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
|
||||
u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
|
||||
u'info_dict': {
|
||||
u'title': u'The Last of Us Review',
|
||||
u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||
}
|
||||
}
|
||||
|
||||
def _find_video_id(self, webpage):
|
||||
res_id = [r'data-video-id="(.+?)"',
|
||||
r'<object id="vid_(.+?)"',
|
||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||
]
|
||||
return self._search_regex(res_id, webpage, 'video id')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name_or_id = mobj.group('name_or_id')
|
||||
webpage = self._download_webpage(url, name_or_id)
|
||||
video_id = self._find_video_id(webpage)
|
||||
result = self._get_video_info(video_id)
|
||||
description = self._html_search_regex(self._DESCRIPTION_RE,
|
||||
webpage, 'video description',
|
||||
flags=re.DOTALL)
|
||||
result['description'] = description
|
||||
return result
|
||||
|
||||
def _get_video_info(self, video_id):
|
||||
config_url = self._CONFIG_URL_TEMPLATE % video_id
|
||||
config = json.loads(self._download_webpage(config_url, video_id,
|
||||
u'Downloading video info'))
|
||||
media = config['playlist']['media']
|
||||
video_url = media['url']
|
||||
|
||||
return {'id': media['metadata']['videoId'],
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'title': media['metadata']['title'],
|
||||
'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
|
||||
}
|
||||
|
||||
|
||||
class OneUPIE(IGNIE):
|
||||
"""Extractor for 1up.com, it uses the ign videos system."""
|
||||
|
||||
_VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
|
||||
IE_NAME = '1up.com'
|
||||
|
||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://gamevideos.1up.com/video/id/34976',
|
||||
u'file': u'34976.mp4',
|
||||
u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
|
||||
u'info_dict': {
|
||||
u'title': u'Sniper Elite V2 - Trailer',
|
||||
u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
id = mobj.group('name_or_id')
|
||||
result = super(OneUPIE, self)._real_extract(url)
|
||||
result['id'] = id
|
||||
return result
|
39
youtube_dl/extractor/ina.py
Normal file
39
youtube_dl/extractor/ina.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
"""Information Extractor for Ina.fr"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>[A-F0-9]+)/.*'
|
||||
_TEST = {
|
||||
u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
u'file': u'I12055569.mp4',
|
||||
u'md5': u'a667021bf2b41f8dc6049479d9bb38a3',
|
||||
u'info_dict': {
|
||||
u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\""
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id
|
||||
video_extension = 'mp4'
|
||||
webpage = self._download_webpage(mrss_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
|
||||
webpage, u'video URL')
|
||||
|
||||
video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
|
||||
webpage, u'title')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'title': video_title,
|
||||
}]
|
62
youtube_dl/extractor/infoq.py
Normal file
62
youtube_dl/extractor/infoq.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class InfoQIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
|
||||
_TEST = {
|
||||
u"name": u"InfoQ",
|
||||
u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||
u"file": u"12-jan-pythonthings.mp4",
|
||||
u"info_dict": {
|
||||
u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
|
||||
u"title": u"A Few of My Favorite [Python] Things"
|
||||
},
|
||||
u"params": {
|
||||
u"skip_download": True
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id=url)
|
||||
self.report_extraction(url)
|
||||
|
||||
# Extract video URL
|
||||
mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
|
||||
|
||||
# Extract title
|
||||
video_title = self._search_regex(r'contentTitle = "(.*?)";',
|
||||
webpage, u'title')
|
||||
|
||||
# Extract description
|
||||
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
|
||||
webpage, u'description', fatal=False)
|
||||
|
||||
video_filename = video_url.split('/')[-1]
|
||||
video_id, extension = video_filename.split('.')
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
||||
'thumbnail': None,
|
||||
'description': video_description,
|
||||
}
|
||||
|
||||
return [info]
|
35
youtube_dl/extractor/instagram.py
Normal file
35
youtube_dl/extractor/instagram.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
|
||||
_TEST = {
|
||||
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
u'file': u'aye83DjauH.mp4',
|
||||
u'md5': u'0d2da106a9d2631273e192b372806516',
|
||||
u'info_dict': {
|
||||
u"uploader_id": u"naomipq",
|
||||
u"title": u"Video by naomipq",
|
||||
u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||
webpage, u'uploader id', fatal=False)
|
||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
|
||||
fatal=False)
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'ext': 'mp4',
|
||||
'title': u'Video by %s' % uploader_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id' : uploader_id,
|
||||
'description': desc,
|
||||
}]
|
56
youtube_dl/extractor/jukebox.py
Normal file
56
youtube_dl/extractor/jukebox.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html'
|
||||
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
||||
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
||||
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
||||
_IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
html = self._download_webpage(url, video_id)
|
||||
|
||||
mobj = re.search(self._IFRAME, html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract iframe url')
|
||||
iframe_url = unescapeHTML(mobj.group('iframe'))
|
||||
|
||||
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
|
||||
mobj = re.search(r'class="jkb_waiting"', iframe_html)
|
||||
if mobj is not None:
|
||||
raise ExtractorError(u'Video is not available(in your country?)!')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(self._VIDEO_URL, iframe_html)
|
||||
if mobj is None:
|
||||
mobj = re.search(self._IS_YOUTUBE, iframe_html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract video url')
|
||||
youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
|
||||
self.to_screen(u'Youtube video detected')
|
||||
return self.url_result(youtube_url,ie='Youtube')
|
||||
video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
|
||||
video_ext = unescapeHTML(mobj.group('video_ext'))
|
||||
|
||||
mobj = re.search(self._TITLE, html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract title')
|
||||
title = unescapeHTML(mobj.group('title'))
|
||||
artist = unescapeHTML(mobj.group('artist'))
|
||||
|
||||
return [{'id': video_id,
|
||||
'url': video_url,
|
||||
'title': artist + '-' + title,
|
||||
'ext': video_ext
|
||||
}]
|
155
youtube_dl/extractor/justintv.py
Normal file
155
youtube_dl/extractor/justintv.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
formatSeconds,
|
||||
)
|
||||
|
||||
|
||||
class JustinTVIE(InfoExtractor):
|
||||
"""Information extractor for justin.tv and twitch.tv"""
|
||||
# TODO: One broadcast may be split into multiple videos. The key
|
||||
# 'broadcast_id' is the same for all parts, and 'broadcast_part'
|
||||
# starts at 1 and increases. Can we treat all parts as one video?
|
||||
|
||||
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
|
||||
(?:
|
||||
(?P<channelid>[^/]+)|
|
||||
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
||||
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
||||
)
|
||||
/?(?:\#.*)?$
|
||||
"""
|
||||
_JUSTIN_PAGE_LIMIT = 100
|
||||
IE_NAME = u'justin.tv'
|
||||
_TEST = {
|
||||
u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
|
||||
u'file': u'296128360.flv',
|
||||
u'md5': u'ecaa8a790c22a40770901460af191c9a',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20110927",
|
||||
u"uploader_id": 25114803,
|
||||
u"uploader": u"thegamedevhub",
|
||||
u"title": u"Beginner Series - Scripting With Python Pt.1"
|
||||
}
|
||||
}
|
||||
|
||||
def report_download_page(self, channel, offset):
|
||||
"""Report attempt to download a single page of videos."""
|
||||
self.to_screen(u'%s: Downloading video information from %d to %d' %
|
||||
(channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
|
||||
|
||||
# Return count of items, list of *valid* items
|
||||
def _parse_page(self, url, video_id):
|
||||
info_json = self._download_webpage(url, video_id,
|
||||
u'Downloading video info JSON',
|
||||
u'unable to download video info JSON')
|
||||
|
||||
response = json.loads(info_json)
|
||||
if type(response) != list:
|
||||
error_text = response.get('error', 'unknown error')
|
||||
raise ExtractorError(u'Justin.tv API: %s' % error_text)
|
||||
info = []
|
||||
for clip in response:
|
||||
video_url = clip['video_file_url']
|
||||
if video_url:
|
||||
video_extension = os.path.splitext(video_url)[1][1:]
|
||||
video_date = re.sub('-', '', clip['start_time'][:10])
|
||||
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
|
||||
video_id = clip['id']
|
||||
video_title = clip.get('title', video_id)
|
||||
info.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'uploader': clip.get('channel_name', video_uploader_id),
|
||||
'uploader_id': video_uploader_id,
|
||||
'upload_date': video_date,
|
||||
'ext': video_extension,
|
||||
})
|
||||
return (len(response), info)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'invalid URL: %s' % url)
|
||||
|
||||
api_base = 'http://api.justin.tv'
|
||||
paged = False
|
||||
if mobj.group('channelid'):
|
||||
paged = True
|
||||
video_id = mobj.group('channelid')
|
||||
api = api_base + '/channel/archives/%s.json' % video_id
|
||||
elif mobj.group('chapterid'):
|
||||
chapter_id = mobj.group('chapterid')
|
||||
|
||||
webpage = self._download_webpage(url, chapter_id)
|
||||
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
||||
if not m:
|
||||
raise ExtractorError(u'Cannot find archive of a chapter')
|
||||
archive_id = m.group(1)
|
||||
|
||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||
chapter_info_xml = self._download_webpage(api, chapter_id,
|
||||
note=u'Downloading chapter information',
|
||||
errnote=u'Chapter information download failed')
|
||||
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
|
||||
for a in doc.findall('.//archive'):
|
||||
if archive_id == a.find('./id').text:
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(u'Could not find chapter in chapter information')
|
||||
|
||||
video_url = a.find('./video_file_url').text
|
||||
video_ext = video_url.rpartition('.')[2] or u'flv'
|
||||
|
||||
chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
||||
chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
|
||||
note='Downloading chapter metadata',
|
||||
errnote='Download of chapter metadata failed')
|
||||
chapter_info = json.loads(chapter_info_json)
|
||||
|
||||
bracket_start = int(doc.find('.//bracket_start').text)
|
||||
bracket_end = int(doc.find('.//bracket_end').text)
|
||||
|
||||
# TODO determine start (and probably fix up file)
|
||||
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
||||
#video_url += u'?start=' + TODO:start_timestamp
|
||||
# bracket_start is 13290, but we want 51670615
|
||||
self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
|
||||
u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
||||
|
||||
info = {
|
||||
'id': u'c' + chapter_id,
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
'title': chapter_info['title'],
|
||||
'thumbnail': chapter_info['preview'],
|
||||
'description': chapter_info['description'],
|
||||
'uploader': chapter_info['channel']['display_name'],
|
||||
'uploader_id': chapter_info['channel']['name'],
|
||||
}
|
||||
return [info]
|
||||
else:
|
||||
video_id = mobj.group('videoid')
|
||||
api = api_base + '/broadcast/by_archive/%s.json' % video_id
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
info = []
|
||||
offset = 0
|
||||
limit = self._JUSTIN_PAGE_LIMIT
|
||||
while True:
|
||||
if paged:
|
||||
self.report_download_page(video_id, offset)
|
||||
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
||||
page_count, page_info = self._parse_page(page_url, video_id)
|
||||
info.extend(page_info)
|
||||
if not paged or page_count != limit:
|
||||
break
|
||||
offset += limit
|
||||
return info
|
40
youtube_dl/extractor/keek.py
Normal file
40
youtube_dl/extractor/keek.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KeekIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
|
||||
IE_NAME = u'keek'
|
||||
_TEST = {
|
||||
u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
|
||||
u'file': u'NODfbab.mp4',
|
||||
u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
u'info_dict': {
|
||||
u"uploader": u"ytdl",
|
||||
u"title": u"test chars: \"'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
|
||||
video_url = u'http://cdn.keek.com/keek/video/%s' % video_id
|
||||
thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._og_search_title(webpage)
|
||||
|
||||
uploader = self._html_search_regex(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
|
||||
webpage, u'uploader', fatal=False)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader
|
||||
}
|
||||
return [info]
|
52
youtube_dl/extractor/liveleak.py
Normal file
52
youtube_dl/extractor/liveleak.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class LiveLeakIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||
IE_NAME = u'liveleak'
|
||||
_TEST = {
|
||||
u'url': u'http://www.liveleak.com/view?i=757_1364311680',
|
||||
u'file': u'757_1364311680.mp4',
|
||||
u'md5': u'0813c2430bea7a46bf13acf3406992f4',
|
||||
u'info_dict': {
|
||||
u"description": u"extremely bad day for this guy..!",
|
||||
u"uploader": u"ljfriel2",
|
||||
u"title": u"Most unlucky car accident"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(r'file: "(.*?)",',
|
||||
webpage, u'video URL')
|
||||
|
||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||
|
||||
video_description = self._og_search_description(webpage)
|
||||
|
||||
video_uploader = self._html_search_regex(r'By:.*?(\w+)</a>',
|
||||
webpage, u'uploader', fatal=False)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader
|
||||
}
|
||||
|
||||
return [info]
|
52
youtube_dl/extractor/livestream.py
Normal file
52
youtube_dl/extractor/livestream.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse_urlparse, compat_urlparse
|
||||
|
||||
|
||||
class LivestreamIE(InfoExtractor):
|
||||
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||
_TEST = {
|
||||
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||
u'file': u'4719370.mp4',
|
||||
u'md5': u'0d2186e3187d185a04b3cdd02b828836',
|
||||
u'info_dict': {
|
||||
u'title': u'Live from Webster Hall NYC',
|
||||
u'upload_date': u'20121012',
|
||||
}
|
||||
}
|
||||
|
||||
def _extract_video_info(self, video_data):
|
||||
video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
|
||||
return {'id': video_data['id'],
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_data['caption'],
|
||||
'thumbnail': video_data['thumbnail_url'],
|
||||
'upload_date': video_data['updated_at'].replace('-','')[:8],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
event_name = mobj.group('event_name')
|
||||
webpage = self._download_webpage(url, video_id or event_name)
|
||||
|
||||
if video_id is None:
|
||||
# This is an event page:
|
||||
api_url = self._search_regex(r'event_design_eventId: \'(.+?)\'',
|
||||
webpage, 'api url')
|
||||
info = json.loads(self._download_webpage(api_url, event_name,
|
||||
u'Downloading event info'))
|
||||
videos = [self._extract_video_info(video_data['data'])
|
||||
for video_data in info['feed']['data'] if video_data['type'] == u'video']
|
||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||
else:
|
||||
og_video = self._og_search_video_url(webpage, name=u'player url')
|
||||
query_str = compat_urllib_parse_urlparse(og_video).query
|
||||
query = compat_urlparse.parse_qs(query_str)
|
||||
api_url = query['play_url'][0].replace('.smil', '')
|
||||
info = json.loads(self._download_webpage(api_url, video_id,
|
||||
u'Downloading video info'))
|
||||
return self._extract_video_info(info)
|
137
youtube_dl/extractor/metacafe.py
Normal file
137
youtube_dl/extractor/metacafe.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_parse_qs,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class MetacafeIE(InfoExtractor):
|
||||
"""Information Extractor for metacafe.com."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||
IE_NAME = u'metacafe'
|
||||
_TESTS = [{
|
||||
u"add_ie": ["Youtube"],
|
||||
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
||||
u"file": u"_aUehQsCQtM.flv",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20090102",
|
||||
u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
|
||||
u"description": u"md5:2439a8ef6d5a70e380c22f5ad323e5a8",
|
||||
u"uploader": u"PBS",
|
||||
u"uploader_id": u"PBS"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
||||
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
||||
u"info_dict": {
|
||||
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
||||
u"uploader": u"anyclip",
|
||||
u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
def report_disclaimer(self):
|
||||
"""Report disclaimer retrieval."""
|
||||
self.to_screen(u'Retrieving disclaimer')
|
||||
|
||||
def _real_initialize(self):
|
||||
# Retrieve disclaimer
|
||||
request = compat_urllib_request.Request(self._DISCLAIMER)
|
||||
try:
|
||||
self.report_disclaimer()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
|
||||
|
||||
# Confirm age
|
||||
disclaimer_form = {
|
||||
'filters': '0',
|
||||
'submit': "Continue - I'm over 18",
|
||||
}
|
||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||
try:
|
||||
self.report_age_confirmation()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id and simplified title from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
# Check if video comes from YouTube
|
||||
mobj2 = re.match(r'^yt-(.*)$', video_id)
|
||||
if mobj2 is not None:
|
||||
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
req.headers['Cookie'] = 'flashVersion=0;'
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
||||
if mobj is not None:
|
||||
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
||||
video_ext = mediaURL[-3:]
|
||||
|
||||
# Extract gdaKey if available
|
||||
mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
|
||||
if mobj is None:
|
||||
video_url = mediaURL
|
||||
else:
|
||||
gdaKey = mobj.group(1)
|
||||
video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
|
||||
else:
|
||||
mobj = re.search(r'<video src="([^"]+)"', webpage)
|
||||
if mobj:
|
||||
video_url = mobj.group(1)
|
||||
video_ext = 'mp4'
|
||||
else:
|
||||
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
vardict = compat_parse_qs(mobj.group(1))
|
||||
if 'mediaData' not in vardict:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
mobj = re.search(r'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict['mediaData'][0])
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract media URL')
|
||||
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
||||
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
||||
video_ext = determine_ext(video_url)
|
||||
|
||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
|
||||
description = self._og_search_description(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
|
||||
webpage, u'uploader nickname', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'description': description,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_ext,
|
||||
}
|
115
youtube_dl/extractor/mixcloud.py
Normal file
115
youtube_dl/extractor/mixcloud.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class MixcloudIE(InfoExtractor):
|
||||
_WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
|
||||
IE_NAME = u'mixcloud'
|
||||
|
||||
def report_download_json(self, file_id):
|
||||
"""Report JSON download."""
|
||||
self.to_screen(u'Downloading json')
|
||||
|
||||
def get_urls(self, jsonData, fmt, bitrate='best'):
|
||||
"""Get urls from 'audio_formats' section in json"""
|
||||
try:
|
||||
bitrate_list = jsonData[fmt]
|
||||
if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
|
||||
bitrate = max(bitrate_list) # select highest
|
||||
|
||||
url_list = jsonData[fmt][bitrate]
|
||||
except TypeError: # we have no bitrate info.
|
||||
url_list = jsonData[fmt]
|
||||
return url_list
|
||||
|
||||
def check_urls(self, url_list):
|
||||
"""Returns 1st active url from list"""
|
||||
for url in url_list:
|
||||
try:
|
||||
compat_urllib_request.urlopen(url)
|
||||
return url
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
|
||||
url = None
|
||||
|
||||
return None
|
||||
|
||||
def _print_formats(self, formats):
|
||||
print('Available formats:')
|
||||
for fmt in formats.keys():
|
||||
for b in formats[fmt]:
|
||||
try:
|
||||
ext = formats[fmt][b][0]
|
||||
print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
|
||||
except TypeError: # we have no bitrate info
|
||||
ext = formats[fmt][0]
|
||||
print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
# extract uploader & filename from url
|
||||
uploader = mobj.group(1).decode('utf-8')
|
||||
file_id = uploader + "-" + mobj.group(2).decode('utf-8')
|
||||
|
||||
# construct API request
|
||||
file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
|
||||
# retrieve .json file with links to files
|
||||
request = compat_urllib_request.Request(file_url)
|
||||
try:
|
||||
self.report_download_json(file_url)
|
||||
jsonData = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
|
||||
|
||||
# parse JSON
|
||||
json_data = json.loads(jsonData)
|
||||
player_url = json_data['player_swf_url']
|
||||
formats = dict(json_data['audio_formats'])
|
||||
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats(formats)
|
||||
return
|
||||
|
||||
if req_format is None or req_format == 'best':
|
||||
for format_param in formats.keys():
|
||||
url_list = self.get_urls(formats, format_param)
|
||||
# check urls
|
||||
file_url = self.check_urls(url_list)
|
||||
if file_url is not None:
|
||||
break # got it!
|
||||
else:
|
||||
if req_format not in formats:
|
||||
raise ExtractorError(u'Format is not available')
|
||||
|
||||
url_list = self.get_urls(formats, req_format)
|
||||
file_url = self.check_urls(url_list)
|
||||
format_param = req_format
|
||||
|
||||
return [{
|
||||
'id': file_id.decode('utf-8'),
|
||||
'url': file_url.decode('utf-8'),
|
||||
'uploader': uploader.decode('utf-8'),
|
||||
'upload_date': None,
|
||||
'title': json_data['name'],
|
||||
'ext': file_url.split('.')[-1].decode('utf-8'),
|
||||
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
||||
'thumbnail': json_data['thumbnail_url'],
|
||||
'description': json_data['description'],
|
||||
'player_url': player_url.decode('utf-8'),
|
||||
}]
|
121
youtube_dl/extractor/mtv.py
Normal file
121
youtube_dl/extractor/mtv.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
def _media_xml_tag(tag):
|
||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||
|
||||
class MTVIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
|
||||
|
||||
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
||||
u'file': u'853555.mp4',
|
||||
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
|
||||
u'info_dict': {
|
||||
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
||||
u'file': u'USCJY1331283.mp4',
|
||||
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
|
||||
u'info_dict': {
|
||||
u'title': u'Everything Has Changed',
|
||||
u'upload_date': u'20130606',
|
||||
u'uploader': u'Taylor Swift',
|
||||
},
|
||||
u'skip': u'VEVO is only available in some countries',
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _id_from_uri(uri):
|
||||
return uri.split(':')[-1]
|
||||
|
||||
# This was originally implemented for ComedyCentral, but it also works here
|
||||
@staticmethod
|
||||
def _transform_rtmp_url(rtmp_video_url):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||
if not m:
|
||||
raise ExtractorError(u'Cannot transform RTMP url')
|
||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||
return base + m.group('finalid')
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
return 'http://mtv.mtvnimages.com/uri/' + uri
|
||||
|
||||
def _extract_video_url(self, metadataXml):
|
||||
if '/error_country_block.swf' in metadataXml:
|
||||
raise ExtractorError(u'This video is not available from your country.', expected=True)
|
||||
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
|
||||
renditions = mdoc.findall('.//rendition')
|
||||
|
||||
# For now, always pick the highest quality.
|
||||
rendition = renditions[-1]
|
||||
|
||||
try:
|
||||
_,_,ext = rendition.attrib['type'].partition('/')
|
||||
format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
|
||||
rtmp_video_url = rendition.find('./src').text
|
||||
except KeyError:
|
||||
raise ExtractorError('Invalid rendition field.')
|
||||
video_url = self._transform_rtmp_url(rtmp_video_url)
|
||||
return {'ext': ext, 'url': video_url, 'format': format}
|
||||
|
||||
def _get_video_info(self, itemdoc):
|
||||
uri = itemdoc.find('guid').text
|
||||
video_id = self._id_from_uri(uri)
|
||||
self.report_extraction(video_id)
|
||||
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
|
||||
if 'acceptMethods' not in mediagen_url:
|
||||
mediagen_url += '&acceptMethods=fms'
|
||||
mediagen_page = self._download_webpage(mediagen_url, video_id,
|
||||
u'Downloading video urls')
|
||||
video_info = self._extract_video_url(mediagen_page)
|
||||
|
||||
description_node = itemdoc.find('description')
|
||||
if description_node is not None:
|
||||
description = description_node.text
|
||||
else:
|
||||
description = None
|
||||
video_info.update({'title': itemdoc.find('title').text,
|
||||
'id': video_id,
|
||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||
'description': description,
|
||||
})
|
||||
return video_info
|
||||
|
||||
def _get_videos_info(self, uri):
|
||||
video_id = self._id_from_uri(uri)
|
||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
|
||||
u'Downloading info')
|
||||
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
|
||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Some videos come from Vevo.com
|
||||
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
|
||||
webpage, re.DOTALL)
|
||||
if m_vevo:
|
||||
vevo_id = m_vevo.group(1);
|
||||
self.to_screen(u'Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
|
||||
uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, u'uri')
|
||||
return self._get_videos_info(uri)
|
73
youtube_dl/extractor/myspass.py
Normal file
73
youtube_dl/extractor/myspass.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import os.path
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class MySpassIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.myspass.de/.*'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||
u'file': u'11741.mp4',
|
||||
u'md5': u'0b49f4844a068f8b33f4b7c88405862b',
|
||||
u'info_dict': {
|
||||
u"description": u"Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
|
||||
u"title": u"Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
|
||||
|
||||
# video id is the last path element of the URL
|
||||
# usually there is a trailing slash, so also try the second but last
|
||||
url_path = compat_urllib_parse_urlparse(url).path
|
||||
url_parent_path, video_id = os.path.split(url_path)
|
||||
if not video_id:
|
||||
_, video_id = os.path.split(url_parent_path)
|
||||
|
||||
# get metadata
|
||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||
metadata_text = self._download_webpage(metadata_url, video_id)
|
||||
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
|
||||
|
||||
# extract values from metadata
|
||||
url_flv_el = metadata.find('url_flv')
|
||||
if url_flv_el is None:
|
||||
raise ExtractorError(u'Unable to extract download url')
|
||||
video_url = url_flv_el.text
|
||||
extension = os.path.splitext(video_url)[1][1:]
|
||||
title_el = metadata.find('title')
|
||||
if title_el is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
title = title_el.text
|
||||
format_id_el = metadata.find('format_id')
|
||||
if format_id_el is None:
|
||||
format = 'mp4'
|
||||
else:
|
||||
format = format_id_el.text
|
||||
description_el = metadata.find('description')
|
||||
if description_el is not None:
|
||||
description = description_el.text
|
||||
else:
|
||||
description = None
|
||||
imagePreview_el = metadata.find('imagePreview')
|
||||
if imagePreview_el is not None:
|
||||
thumbnail = imagePreview_el.text
|
||||
else:
|
||||
thumbnail = None
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description
|
||||
}
|
||||
return [info]
|
172
youtube_dl/extractor/myvideo.py
Normal file
172
youtube_dl/extractor/myvideo.py
Normal file
@@ -0,0 +1,172 @@
|
||||
import binascii
|
||||
import base64
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_ord,
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
||||
class MyVideoIE(InfoExtractor):
|
||||
"""Information Extractor for myvideo.de."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
|
||||
IE_NAME = u'myvideo'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
u'file': u'8229274.flv',
|
||||
u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
|
||||
u'info_dict': {
|
||||
u"title": u"bowling-fail-or-win"
|
||||
}
|
||||
}
|
||||
|
||||
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
|
||||
# Released into the Public Domain by Tristan Fischer on 2013-05-19
|
||||
# https://github.com/rg3/youtube-dl/pull/842
|
||||
def __rc4crypt(self,data, key):
|
||||
x = 0
|
||||
box = list(range(256))
|
||||
for i in list(range(256)):
|
||||
x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
|
||||
box[i], box[x] = box[x], box[i]
|
||||
x = 0
|
||||
y = 0
|
||||
out = ''
|
||||
for char in data:
|
||||
x = (x + 1) % 256
|
||||
y = (y + box[x]) % 256
|
||||
box[x], box[y] = box[y], box[x]
|
||||
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
|
||||
return out
|
||||
|
||||
def __md5(self,s):
|
||||
return hashlib.md5(s).hexdigest().encode()
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
GK = (
|
||||
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
|
||||
b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
|
||||
b'TnpsbA0KTVRkbU1tSTRNdz09'
|
||||
)
|
||||
|
||||
# Get video webpage
|
||||
webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
|
||||
if mobj is not None:
|
||||
self.report_extraction(video_id)
|
||||
video_url = mobj.group(1) + '.flv'
|
||||
|
||||
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
||||
webpage, u'title')
|
||||
|
||||
video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_ext,
|
||||
}]
|
||||
|
||||
# try encxml
|
||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video')
|
||||
|
||||
params = {}
|
||||
encxml = ''
|
||||
sec = mobj.group(1)
|
||||
for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
|
||||
if not a == '_encxml':
|
||||
params[a] = b
|
||||
else:
|
||||
encxml = compat_urllib_parse.unquote(b)
|
||||
if not params.get('domain'):
|
||||
params['domain'] = 'www.myvideo.de'
|
||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||
if 'flash_playertype=MTV' in xmldata_url:
|
||||
self._downloader.report_warning(u'avoiding MTV player')
|
||||
xmldata_url = (
|
||||
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
|
||||
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
|
||||
) % video_id
|
||||
|
||||
# get enc data
|
||||
enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
|
||||
enc_data_b = binascii.unhexlify(enc_data)
|
||||
sk = self.__md5(
|
||||
base64.b64decode(base64.b64decode(GK)) +
|
||||
self.__md5(
|
||||
str(video_id).encode('utf-8')
|
||||
)
|
||||
)
|
||||
dec_data = self.__rc4crypt(enc_data_b, sk)
|
||||
|
||||
# extracting infos
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = None
|
||||
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
|
||||
if mobj:
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
if 'myvideo2flash' in video_url:
|
||||
self._downloader.report_warning(u'forcing RTMPT ...')
|
||||
video_url = video_url.replace('rtmpe://', 'rtmpt://')
|
||||
|
||||
if not video_url:
|
||||
# extract non rtmp videos
|
||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'unable to extract url')
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
||||
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
|
||||
video_file = compat_urllib_parse.unquote(video_file)
|
||||
|
||||
if not video_file.endswith('f4m'):
|
||||
ppath, prefix = video_file.split('.')
|
||||
video_playpath = '%s:%s' % (prefix, ppath)
|
||||
video_hls_playlist = ''
|
||||
else:
|
||||
video_playpath = ''
|
||||
video_hls_playlist = (
|
||||
video_file
|
||||
).replace('.f4m', '.m3u8')
|
||||
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
|
||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
webpage, u'title')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'tc_url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': u'flv',
|
||||
'play_path': video_playpath,
|
||||
'video_file': video_file,
|
||||
'video_hls_playlist': video_hls_playlist,
|
||||
'player_url': video_swfobj,
|
||||
}]
|
||||
|
48
youtube_dl/extractor/nba.py
Normal file
48
youtube_dl/extractor/nba.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
u'file': u'0021200253-okc-bkn-recap.nba.mp4',
|
||||
u'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
u'info_dict': {
|
||||
u"description": u"Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.",
|
||||
u"title": u"Thunder vs. Nets"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||
|
||||
shortened_video_id = video_id.rpartition('/')[2]
|
||||
title = self._og_search_title(webpage, default=shortened_video_id).replace('NBA.com: ', '')
|
||||
|
||||
# It isn't there in the HTML it returns to us
|
||||
# uploader_date = self._html_search_regex(r'<b>Date:</b> (.*?)</div>', webpage, 'upload_date', fatal=False)
|
||||
|
||||
description = self._html_search_regex(r'<meta name="description" (?:content|value)="(.*?)" />', webpage, 'description', fatal=False)
|
||||
|
||||
info = {
|
||||
'id': shortened_video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
# 'uploader_date': uploader_date,
|
||||
'description': description,
|
||||
}
|
||||
return [info]
|
76
youtube_dl/extractor/photobucket.py
Normal file
76
youtube_dl/extractor/photobucket.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class PhotobucketIE(InfoExtractor):
|
||||
"""Information extractor for photobucket.com."""
|
||||
|
||||
# TODO: the original _VALID_URL was:
|
||||
# r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
|
||||
# Check if it's necessary to keep the old extracion process
|
||||
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
||||
IE_NAME = u'photobucket'
|
||||
_TEST = {
|
||||
u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
||||
u'file': u'zpsc0c3b9fa.mp4',
|
||||
u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130504",
|
||||
u"uploader": u"rachaneronas",
|
||||
u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_extension = mobj.group('ext')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Extract URL, uploader, and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
# We try first by looking the javascript code:
|
||||
mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
|
||||
if mobj is not None:
|
||||
info = json.loads(mobj.group('json'))
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': info[u'downloadUrl'],
|
||||
'uploader': info[u'username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
|
||||
'title': info[u'title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info[u'thumbUrl'],
|
||||
}]
|
||||
|
||||
# We try looking in other parts of the webpage
|
||||
video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
|
||||
webpage, u'video URL')
|
||||
|
||||
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1).decode('utf-8')
|
||||
video_uploader = mobj.group(2).decode('utf-8')
|
||||
|
||||
return [{
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_url.decode('utf-8'),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
}]
|
50
youtube_dl/extractor/pornotube.py
Normal file
50
youtube_dl/extractor/pornotube.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class PornotubeIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
|
||||
_TEST = {
|
||||
u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
||||
u'file': u'1689755.flv',
|
||||
u'md5': u'374dd6dcedd24234453b295209aa69b6',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20090708",
|
||||
u"title": u"Marilyn-Monroe-Bathing"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('videoid')
|
||||
video_title = mobj.group('title')
|
||||
|
||||
# Get webpage content
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Get the video URL
|
||||
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
|
||||
video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
|
||||
video_url = compat_urllib_parse.unquote(video_url)
|
||||
|
||||
#Get the uploaded date
|
||||
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
|
||||
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
|
||||
if upload_date: upload_date = unified_strdate(upload_date)
|
||||
|
||||
info = {'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'format': 'flv'}
|
||||
|
||||
return [info]
|
56
youtube_dl/extractor/rbmaradio.py
Normal file
56
youtube_dl/extractor/rbmaradio.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RBMARadioIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||
u'file': u'ford-lopatin-live-at-primavera-sound-2011.mp3',
|
||||
u'md5': u'6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||
u'info_dict': {
|
||||
u"uploader_id": u"ford-lopatin",
|
||||
u"location": u"Spain",
|
||||
u"description": u"Joel Ford and Daniel \u2019Oneohtrix Point Never\u2019 Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||
u"uploader": u"Ford & Lopatin",
|
||||
u"title": u"Live at Primavera Sound 2011"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
|
||||
webpage, u'json data', flags=re.MULTILINE)
|
||||
|
||||
try:
|
||||
data = json.loads(json_data)
|
||||
except ValueError as e:
|
||||
raise ExtractorError(u'Invalid JSON: ' + str(e))
|
||||
|
||||
video_url = data['akamai_url'] + '&cbr=256'
|
||||
url_parts = compat_urllib_parse_urlparse(video_url)
|
||||
video_ext = url_parts.path.rpartition('.')[2]
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
'title': data['title'],
|
||||
'description': data.get('teaser_text'),
|
||||
'location': data.get('country_of_origin'),
|
||||
'uploader': data.get('host', {}).get('name'),
|
||||
'uploader_id': data.get('host', {}).get('slug'),
|
||||
'thumbnail': data.get('image', {}).get('large_url_2x'),
|
||||
'duration': data.get('duration'),
|
||||
}
|
||||
return [info]
|
37
youtube_dl/extractor/redtube.py
Normal file
37
youtube_dl/extractor/redtube.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.redtube.com/66418',
|
||||
u'file': u'66418.mp4',
|
||||
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
||||
u'info_dict': {
|
||||
u"title": u"Sucked on a toilet"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
video_extension = 'mp4'
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
|
||||
webpage, u'video URL')
|
||||
|
||||
video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
|
||||
webpage, u'title')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'title': video_title,
|
||||
}]
|
37
youtube_dl/extractor/ringtv.py
Normal file
37
youtube_dl/extractor/ringtv.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RingTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/videos/video/([^/]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://ringtv.craveonline.com/videos/video/746619-canelo-alvarez-talks-about-mayweather-showdown",
|
||||
u"file": u"746619.mp4",
|
||||
u"md5": u"7c46b4057d22de32e0a539f017e64ad3",
|
||||
u"info_dict": {
|
||||
u"title": u"Canelo Alvarez talks about Mayweather showdown",
|
||||
u"description": u"Saul \\\"Canelo\\\" Alvarez spoke to the media about his Sept. 14 showdown with Floyd Mayweather after their kick-off presser in NYC. Canelo is motivated and confident that he will have the speed and gameplan to beat the pound-for-pound king."
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1).split('-')[0]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._search_regex(r'<title>(.+?)</title>',
|
||||
webpage, 'video title').replace(' | RingTV','')
|
||||
description = self._search_regex(r'<div class="blurb">(.+?)</div>',
|
||||
webpage, 'Description')
|
||||
final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" %(str(video_id))
|
||||
thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" %(str(video_id))
|
||||
ext = final_url.split('.')[-1]
|
||||
return [{
|
||||
'id' : video_id,
|
||||
'url' : final_url,
|
||||
'ext' : ext,
|
||||
'title' : title,
|
||||
'thumbnail' : thumbnail_url,
|
||||
'description' : description,
|
||||
}]
|
||||
|
67
youtube_dl/extractor/sina.py
Normal file
67
youtube_dl/extractor/sina.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class SinaIE(InfoExtractor):
|
||||
_VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
|
||||
(
|
||||
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&))))
|
||||
|
|
||||
# This is used by external sites like Weibo
|
||||
(api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
|
||||
)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
|
||||
u'file': u'110028898.flv',
|
||||
u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f',
|
||||
u'info_dict': {
|
||||
u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
||||
|
||||
def _extract_video(self, video_id):
|
||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||
url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
|
||||
video_id, u'Downloading video url')
|
||||
image_page = self._download_webpage(
|
||||
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||
video_id, u'Downloading thumbnail info')
|
||||
url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
|
||||
|
||||
return {'id': video_id,
|
||||
'url': url_doc.find('./durl/url').text,
|
||||
'ext': 'flv',
|
||||
'title': url_doc.find('./vname').text,
|
||||
'thumbnail': image_page.split('=')[1],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
video_id = mobj.group('id')
|
||||
if mobj.group('token') is not None:
|
||||
# The video id is in the redirected url
|
||||
self.to_screen(u'Getting video id')
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.get_method = lambda: 'HEAD'
|
||||
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
|
||||
return self._real_extract(urlh.geturl())
|
||||
elif video_id is None:
|
||||
pseudo_id = mobj.group('pseudo_id')
|
||||
webpage = self._download_webpage(url, pseudo_id)
|
||||
video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id')
|
||||
|
||||
return self._extract_video(video_id)
|
195
youtube_dl/extractor/soundcloud.py
Normal file
195
youtube_dl/extractor/soundcloud.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SoundcloudIE(InfoExtractor):
|
||||
"""Information extractor for soundcloud.com
|
||||
To access the media, the uid of the song and a stream token
|
||||
must be extracted from the page source and the script must make
|
||||
a request to media.soundcloud.com/crossdomain.xml. Then
|
||||
the media can be grabbed by requesting from an url composed
|
||||
of the stream token and uid
|
||||
"""
|
||||
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
)
|
||||
'''
|
||||
IE_NAME = u'soundcloud'
|
||||
_TEST = {
|
||||
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||
u'file': u'62986583.mp3',
|
||||
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20121011",
|
||||
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
|
||||
u"uploader": u"E.T. ExTerrestrial Music",
|
||||
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
|
||||
}
|
||||
}
|
||||
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Resolving id' % video_id)
|
||||
|
||||
@classmethod
|
||||
def _resolv_url(cls, url):
|
||||
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||
|
||||
def _extract_info_dict(self, info, full_title=None):
|
||||
video_id = info['id']
|
||||
name = full_title or video_id
|
||||
self.report_extraction(name)
|
||||
|
||||
thumbnail = info['artwork_url']
|
||||
if thumbnail is not None:
|
||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||
return {
|
||||
'id': info['id'],
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'uploader': info['user']['username'],
|
||||
'upload_date': unified_strdate(info['created_at']),
|
||||
'title': info['title'],
|
||||
'ext': u'mp3',
|
||||
'description': info['description'],
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
track_id = mobj.group('track_id')
|
||||
if track_id is not None:
|
||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
full_title = track_id
|
||||
else:
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
full_title = '%s/%s' % (uploader, slug_title)
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/%s' % (uploader, slug_title)
|
||||
info_json_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
|
||||
|
||||
info = json.loads(info_json)
|
||||
return self._extract_info_dict(info, full_title)
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
||||
IE_NAME = u'soundcloud:set'
|
||||
_TEST = {
|
||||
u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
|
||||
u"playlist": [
|
||||
{
|
||||
u"file":"30510138.mp3",
|
||||
u"md5":"f9136bf103901728f29e419d2c70f55d",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20111213",
|
||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"D-D-Dance"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"47127625.mp3",
|
||||
u"md5":"09b6758a018470570f8fd423c9453dd8",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120521",
|
||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"The Royal Concept - Gimme Twice"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"47127627.mp3",
|
||||
u"md5":"154abd4e418cea19c3b901f1e1306d9c",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120521",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"Goldrushed"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"47127629.mp3",
|
||||
u"md5":"2f5471edc79ad3f33a683153e96a79c1",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120521",
|
||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"In the End"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"47127631.mp3",
|
||||
u"md5":"f9ba87aa940af7213f98949254f1c6e2",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120521",
|
||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"Knocked Up"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"75206121.mp3",
|
||||
u"md5":"f9d1fe9406717e302980c30de4af9353",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20130116",
|
||||
u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"World On Fire"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
full_title = '%s/sets/%s' % (uploader, slug_title)
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
resolv_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(resolv_url, full_title)
|
||||
|
||||
videos = []
|
||||
info = json.loads(info_json)
|
||||
if 'errors' in info:
|
||||
for err in info['errors']:
|
||||
self._downloader.report_error(u'unable to download video webpage: %s' % compat_str(err['error_message']))
|
||||
return
|
||||
|
||||
self.report_extraction(full_title)
|
||||
return {'_type': 'playlist',
|
||||
'entries': [self._extract_info_dict(track) for track in info['tracks']],
|
||||
'id': info['id'],
|
||||
'title': info['title'],
|
||||
}
|
45
youtube_dl/extractor/spiegel.py
Normal file
45
youtube_dl/extractor/spiegel.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SpiegelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
u'file': u'1259285.mp4',
|
||||
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
||||
u'info_dict': {
|
||||
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
|
||||
webpage, u'title')
|
||||
|
||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
||||
xml_code = self._download_webpage(xml_url, video_id,
|
||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||
|
||||
idoc = xml.etree.ElementTree.fromstring(xml_code)
|
||||
last_type = idoc[-1]
|
||||
filename = last_type.findall('./filename')[0].text
|
||||
duration = float(last_type.findall('./duration')[0].text)
|
||||
|
||||
video_url = 'http://video2.spiegel.de/flash/' + filename
|
||||
video_ext = filename.rpartition('.')[2]
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
'title': video_title,
|
||||
'duration': duration,
|
||||
}
|
||||
return [info]
|
119
youtube_dl/extractor/stanfordoc.py
Normal file
119
youtube_dl/extractor/stanfordoc.py
Normal file
@@ -0,0 +1,119 @@
|
||||
import re
|
||||
import socket
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
orderedSet,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class StanfordOpenClassroomIE(InfoExtractor):
|
||||
IE_NAME = u'stanfordoc'
|
||||
IE_DESC = u'Stanford Open ClassRoom'
|
||||
_VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
|
||||
_TEST = {
|
||||
u'url': u'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
|
||||
u'file': u'PracticalUnix_intro-environment.mp4',
|
||||
u'md5': u'544a9468546059d4e80d76265b0443b8',
|
||||
u'info_dict': {
|
||||
u"title": u"Intro Environment"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
if mobj.group('course') and mobj.group('video'): # A specific video
|
||||
course = mobj.group('course')
|
||||
video = mobj.group('video')
|
||||
info = {
|
||||
'id': course + '_' + video,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
self.report_extraction(info['id'])
|
||||
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
|
||||
xmlUrl = baseUrl + video + '.xml'
|
||||
try:
|
||||
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||
try:
|
||||
info['title'] = mdoc.findall('./title')[0].text
|
||||
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid metadata XML file')
|
||||
info['ext'] = info['url'].rpartition('.')[2]
|
||||
return [info]
|
||||
elif mobj.group('course'): # A course page
|
||||
course = mobj.group('course')
|
||||
info = {
|
||||
'id': course,
|
||||
'type': 'playlist',
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
coursepage = self._download_webpage(url, info['id'],
|
||||
note='Downloading course info page',
|
||||
errnote='Unable to download course info page')
|
||||
|
||||
info['title'] = self._html_search_regex('<h1>([^<]+)</h1>', coursepage, 'title', default=info['id'])
|
||||
|
||||
info['description'] = self._html_search_regex('<description>([^<]+)</description>',
|
||||
coursepage, u'description', fatal=False)
|
||||
|
||||
links = orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
|
||||
info['list'] = [
|
||||
{
|
||||
'type': 'reference',
|
||||
'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
|
||||
}
|
||||
for vpage in links]
|
||||
results = []
|
||||
for entry in info['list']:
|
||||
assert entry['type'] == 'reference'
|
||||
results += self.extract(entry['url'])
|
||||
return results
|
||||
else: # Root page
|
||||
info = {
|
||||
'id': 'Stanford OpenClassroom',
|
||||
'type': 'playlist',
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
}
|
||||
|
||||
self.report_download_webpage(info['id'])
|
||||
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
||||
try:
|
||||
rootpage = compat_urllib_request.urlopen(rootURL).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
|
||||
|
||||
info['title'] = info['id']
|
||||
|
||||
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
|
||||
info['list'] = [
|
||||
{
|
||||
'type': 'reference',
|
||||
'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
|
||||
}
|
||||
for cpage in links]
|
||||
|
||||
results = []
|
||||
for entry in info['list']:
|
||||
assert entry['type'] == 'reference'
|
||||
results += self.extract(entry['url'])
|
||||
return results
|
36
youtube_dl/extractor/statigram.py
Normal file
36
youtube_dl/extractor/statigram.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class StatigramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://statigr.am/p/484091715184808010_284179915',
|
||||
u'file': u'484091715184808010_284179915.mp4',
|
||||
u'md5': u'deda4ff333abe2e118740321e992605b',
|
||||
u'info_dict': {
|
||||
u"uploader_id": u"videoseconds",
|
||||
u"title": u"Instagram photo by @videoseconds"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
html_title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>',
|
||||
webpage, u'title')
|
||||
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'@([^ ]+)', title, u'uploader name', fatal=False)
|
||||
ext = 'mp4'
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id' : uploader_id
|
||||
}]
|
85
youtube_dl/extractor/steam.py
Normal file
85
youtube_dl/extractor/steam.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class SteamIE(InfoExtractor):
|
||||
_VALID_URL = r"""http://store\.steampowered\.com/
|
||||
(agecheck/)?
|
||||
(?P<urltype>video|app)/ #If the page is only for videos or for a game
|
||||
(?P<gameID>\d+)/?
|
||||
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
|
||||
"""
|
||||
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
|
||||
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
|
||||
_TEST = {
|
||||
u"url": u"http://store.steampowered.com/video/105600/",
|
||||
u"playlist": [
|
||||
{
|
||||
u"file": u"81300.flv",
|
||||
u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
|
||||
u"info_dict": {
|
||||
u"title": u"Terraria 1.1 Trailer",
|
||||
u'playlist_index': 1,
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file": u"80859.flv",
|
||||
u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
|
||||
u"info_dict": {
|
||||
u"title": u"Terraria Trailer",
|
||||
u'playlist_index': 2,
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
gameID = m.group('gameID')
|
||||
|
||||
videourl = self._VIDEO_PAGE_TEMPLATE % gameID
|
||||
webpage = self._download_webpage(videourl, gameID)
|
||||
|
||||
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
|
||||
videourl = self._AGECHECK_TEMPLATE % gameID
|
||||
self.report_age_confirmation()
|
||||
webpage = self._download_webpage(videourl, gameID)
|
||||
|
||||
self.report_extraction(gameID)
|
||||
game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
|
||||
webpage, 'game title')
|
||||
|
||||
urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
|
||||
mweb = re.finditer(urlRE, webpage)
|
||||
namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
|
||||
titles = re.finditer(namesRE, webpage)
|
||||
thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
|
||||
thumbs = re.finditer(thumbsRE, webpage)
|
||||
videos = []
|
||||
for vid,vtitle,thumb in zip(mweb,titles,thumbs):
|
||||
video_id = vid.group('videoID')
|
||||
title = vtitle.group('videoName')
|
||||
video_url = vid.group('videoURL')
|
||||
video_thumb = thumb.group('thumbnail')
|
||||
if not video_url:
|
||||
raise ExtractorError(u'Cannot find video url for %s' % video_id)
|
||||
info = {
|
||||
'id':video_id,
|
||||
'url':video_url,
|
||||
'ext': 'flv',
|
||||
'title': unescapeHTML(title),
|
||||
'thumbnail': video_thumb
|
||||
}
|
||||
videos.append(info)
|
||||
return [self.playlist_result(videos, gameID, game_title)]
|
46
youtube_dl/extractor/teamcoco.py
Normal file
46
youtube_dl/extractor/teamcoco.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class TeamcocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||
u'file': u'19705.mp4',
|
||||
u'md5': u'27b6f7527da5acf534b15f21b032656e',
|
||||
u'info_dict': {
|
||||
u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||
u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
url_title = mobj.group('url_title')
|
||||
webpage = self._download_webpage(url, url_title)
|
||||
|
||||
video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
|
||||
webpage, u'video id')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||
|
||||
video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
|
||||
data, u'video URL')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}]
|
88
youtube_dl/extractor/ted.py
Normal file
88
youtube_dl/extractor/ted.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TEDIE(InfoExtractor):
|
||||
_VALID_URL=r'''http://www\.ted\.com/
|
||||
(
|
||||
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
||||
|
|
||||
((?P<type_talk>talks)) # We have a simple talk
|
||||
)
|
||||
(/lang/(.*?))? # The url may contain the language
|
||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||
'''
|
||||
_TEST = {
|
||||
u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
u'file': u'102.mp4',
|
||||
u'md5': u'2d76ee1576672e0bd8f187513267adf6',
|
||||
u'info_dict': {
|
||||
u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922",
|
||||
u"title": u"Dan Dennett: The illusion of consciousness"
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
|
||||
def _real_extract(self, url):
|
||||
m=re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if m.group('type_talk'):
|
||||
return [self._talk_info(url)]
|
||||
else :
|
||||
playlist_id=m.group('playlist_id')
|
||||
name=m.group('name')
|
||||
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
|
||||
return [self._playlist_videos_info(url,name,playlist_id)]
|
||||
|
||||
def _playlist_videos_info(self,url,name,playlist_id=0):
|
||||
'''Returns the videos of the playlist'''
|
||||
video_RE=r'''
|
||||
<li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
|
||||
([.\s]*?)data-playlist_item_id="(\d+)"
|
||||
([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
|
||||
'''
|
||||
video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
|
||||
webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
|
||||
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
|
||||
m_names=re.finditer(video_name_RE,webpage)
|
||||
|
||||
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
|
||||
webpage, 'playlist title')
|
||||
|
||||
playlist_entries = []
|
||||
for m_video, m_name in zip(m_videos,m_names):
|
||||
talk_url='http://www.ted.com%s' % m_name.group('talk_url')
|
||||
playlist_entries.append(self.url_result(talk_url, 'TED'))
|
||||
return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
|
||||
|
||||
def _talk_info(self, url, video_id=0):
|
||||
"""Return the video for the talk in the url"""
|
||||
m = re.match(self._VALID_URL, url,re.VERBOSE)
|
||||
video_name = m.group('name')
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
|
||||
self.report_extraction(video_name)
|
||||
# If the url includes the language we get the title translated
|
||||
title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
|
||||
webpage, 'title')
|
||||
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
|
||||
webpage, 'json data')
|
||||
info = json.loads(json_data)
|
||||
desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
|
||||
webpage, 'description', flags = re.DOTALL)
|
||||
|
||||
thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
|
||||
webpage, 'thumbnail')
|
||||
info = {
|
||||
'id': info['id'],
|
||||
'url': info['htmlStreams'][-1]['file'],
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': desc,
|
||||
}
|
||||
return info
|
36
youtube_dl/extractor/tf1.py
Normal file
36
youtube_dl/extractor/tf1.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""
|
||||
TF1 uses the wat.tv player, currently it can only download videos with the
|
||||
html5 player enabled, it cannot download HD videos.
|
||||
"""
|
||||
_WORKING = False
|
||||
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
|
||||
_TEST = {
|
||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
u'file': u'10635995.mp4',
|
||||
u'md5': u'66789d3e91278d332f75e1feb7aea327',
|
||||
u'info_dict': {
|
||||
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
|
||||
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
id = mobj.group(1)
|
||||
webpage = self._download_webpage(url, id)
|
||||
embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
|
||||
webpage, 'embed url')
|
||||
embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
|
||||
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
||||
wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
|
||||
wat_info = json.loads(wat_info)['media']
|
||||
wat_url = wat_info['url']
|
||||
return self.url_result(wat_url, 'Wat')
|
47
youtube_dl/extractor/thisav.py
Normal file
47
youtube_dl/extractor/thisav.py
Normal file
@@ -0,0 +1,47 @@
|
||||
#coding: utf-8
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
class ThisAVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
|
||||
_TEST = {
|
||||
u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
|
||||
u"file": u"47734.flv",
|
||||
u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
|
||||
u"info_dict": {
|
||||
u"title": u"高樹マリア - Just fit",
|
||||
u"uploader": u"dj7970",
|
||||
u"uploader_id": u"dj7970"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
|
||||
video_url = self._html_search_regex(
|
||||
r"addVariable\('file','([^']+)'\);", webpage, u'video url')
|
||||
uploader = self._html_search_regex(
|
||||
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
|
||||
webpage, u'uploader name', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
|
||||
webpage, u'uploader id', fatal=False)
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
}
|
52
youtube_dl/extractor/traileraddict.py
Normal file
52
youtube_dl/extractor/traileraddict.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TrailerAddictIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||
u'file': u'76184.mp4',
|
||||
u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
|
||||
u'info_dict': {
|
||||
u"title": u"Prince Avalanche Trailer",
|
||||
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('movie') + '/' + mobj.group('trailer_name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
title = self._search_regex(r'<title>(.+?)</title>',
|
||||
webpage, 'video title').replace(' - Trailer Addict','')
|
||||
view_count = self._search_regex(r'Views: (.+?)<br />',
|
||||
webpage, 'Views Count')
|
||||
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
|
||||
|
||||
# Presence of (no)watchplus function indicates HD quality is available
|
||||
if re.search(r'function (no)?watchplus()', webpage):
|
||||
fvar = "fvarhd"
|
||||
else:
|
||||
fvar = "fvar"
|
||||
|
||||
info_url = "http://www.traileraddict.com/%s.php?tid=%s" % (fvar, str(video_id))
|
||||
info_webpage = self._download_webpage(info_url, video_id , "Downloading the info webpage")
|
||||
|
||||
final_url = self._search_regex(r'&fileurl=(.+)',
|
||||
info_webpage, 'Download url').replace('%3F','?')
|
||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||
info_webpage, 'thumbnail url')
|
||||
ext = final_url.split('.')[-1].split('?')[0]
|
||||
|
||||
return [{
|
||||
'id' : video_id,
|
||||
'url' : final_url,
|
||||
'ext' : ext,
|
||||
'title' : title,
|
||||
'thumbnail' : thumbnail_url,
|
||||
'description' : self._og_search_description(webpage),
|
||||
'view_count' : view_count,
|
||||
}]
|
63
youtube_dl/extractor/tudou.py
Normal file
63
youtube_dl/extractor/tudou.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TudouIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
|
||||
_TEST = {
|
||||
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||
u'file': u'159448201.f4v',
|
||||
u'md5': u'140a49ed444bd22f93330985d8475fcb',
|
||||
u'info_dict': {
|
||||
u"title": u"卡马乔国足开大脚长传冲吊集锦"
|
||||
}
|
||||
}
|
||||
|
||||
def _url_for_id(self, id, quality = None):
|
||||
info_url = "http://v2.tudou.com/f?id="+str(id)
|
||||
if quality:
|
||||
info_url += '&hd' + quality
|
||||
webpage = self._download_webpage(info_url, id, "Opening the info webpage")
|
||||
final_url = self._html_search_regex('>(.+?)</f>',webpage, 'video url')
|
||||
return final_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(2)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = re.search(",kw:\"(.+)\"",webpage)
|
||||
if title is None:
|
||||
title = re.search(",kw: \'(.+)\'",webpage)
|
||||
title = title.group(1)
|
||||
thumbnail_url = re.search(",pic: \'(.+?)\'",webpage)
|
||||
if thumbnail_url is None:
|
||||
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
|
||||
thumbnail_url = thumbnail_url.group(1)
|
||||
|
||||
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
|
||||
segments = json.loads(segs_json)
|
||||
# It looks like the keys are the arguments that have to be passed as
|
||||
# the hd field in the request url, we pick the higher
|
||||
quality = sorted(segments.keys())[-1]
|
||||
parts = segments[quality]
|
||||
result = []
|
||||
len_parts = len(parts)
|
||||
if len_parts > 1:
|
||||
self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
|
||||
for part in parts:
|
||||
part_id = part['k']
|
||||
final_url = self._url_for_id(part_id, quality)
|
||||
ext = (final_url.split('?')[0]).split('.')[-1]
|
||||
part_info = {'id': part_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
result.append(part_info)
|
||||
|
||||
return result
|
49
youtube_dl/extractor/tumblr.py
Normal file
49
youtube_dl/extractor/tumblr.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class TumblrIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)/(.*?)'
|
||||
_TEST = {
|
||||
u'url': u'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
|
||||
u'file': u'54196191430.mp4',
|
||||
u'md5': u'479bb068e5b16462f5176a6828829767',
|
||||
u'info_dict': {
|
||||
u"title": u"tatiana maslany news"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m_url = re.match(self._VALID_URL, url)
|
||||
video_id = m_url.group('id')
|
||||
blog = m_url.group('blog_name')
|
||||
|
||||
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id)
|
||||
video = re.search(re_video, webpage)
|
||||
if video is None:
|
||||
raise ExtractorError(u'Unable to extract video')
|
||||
video_url = video.group('video_url')
|
||||
ext = video.group('ext')
|
||||
|
||||
video_thumbnail = self._search_regex(r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22',
|
||||
webpage, u'thumbnail', fatal=False) # We pick the first poster
|
||||
if video_thumbnail: video_thumbnail = video_thumbnail.replace('\\', '')
|
||||
|
||||
# The only place where you can get a title, it's not complete,
|
||||
# but searching in other places doesn't work for all videos
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
|
||||
webpage, u'title', flags=re.DOTALL)
|
||||
|
||||
return [{'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'ext': ext
|
||||
}]
|
39
youtube_dl/extractor/tutv.py
Normal file
39
youtube_dl/extractor/tutv.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
)
|
||||
|
||||
class TutvIE(InfoExtractor):
|
||||
_VALID_URL=r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
|
||||
u'file': u'2742556.flv',
|
||||
u'md5': u'5eb766671f69b82e528dc1e7769c5cb2',
|
||||
u'info_dict': {
|
||||
u"title": u"Noah en pabellon cuahutemoc"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, u'internal video ID')
|
||||
|
||||
data_url = u'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
|
||||
data_content = self._download_webpage(data_url, video_id, note=u'Downloading video info')
|
||||
data = compat_parse_qs(data_content)
|
||||
video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
|
||||
ext = video_url.partition(u'?')[0].rpartition(u'.')[2]
|
||||
|
||||
info = {
|
||||
'id': internal_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': self._og_search_title(webpage),
|
||||
}
|
||||
return [info]
|
45
youtube_dl/extractor/ustream.py
Normal file
45
youtube_dl/extractor/ustream.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class UstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
||||
IE_NAME = u'ustream'
|
||||
_TEST = {
|
||||
u'url': u'http://www.ustream.tv/recorded/20274954',
|
||||
u'file': u'20274954.flv',
|
||||
u'md5': u'088f151799e8f572f84eb62f17d73e5c',
|
||||
u'info_dict': {
|
||||
u"uploader": u"Young Americans for Liberty",
|
||||
u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
|
||||
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
||||
webpage, u'title')
|
||||
|
||||
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
||||
|
||||
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
||||
webpage, u'thumbnail', fatal=False)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
return info
|
54
youtube_dl/extractor/vbox7.py
Normal file
54
youtube_dl/extractor/vbox7.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class Vbox7IE(InfoExtractor):
|
||||
"""Information Extractor for Vbox7"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://vbox7.com/play:249bb972c2',
|
||||
u'file': u'249bb972c2.flv',
|
||||
u'md5': u'9c70d6d956f888bdc08c124acc120cfe',
|
||||
u'info_dict': {
|
||||
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
||||
new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
|
||||
redirect_url = urlh.geturl() + new_location
|
||||
webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
|
||||
|
||||
title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'title').split('/')[0].strip()
|
||||
|
||||
ext = "flv"
|
||||
info_url = "http://vbox7.com/play/magare.do"
|
||||
data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
|
||||
info_request = compat_urllib_request.Request(info_url, data)
|
||||
info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
|
||||
if info_response is None:
|
||||
raise ExtractorError(u'Unable to extract the media url')
|
||||
(final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}]
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user