Compare commits
499 Commits
2014.12.06
...
2015.01.10
Author | SHA1 | Date | |
---|---|---|---|
|
149f05c7b6 | ||
|
8a1b9b068e | ||
|
c5a59d9391 | ||
|
500b8b41c1 | ||
|
be4a824d74 | ||
|
ed3958d714 | ||
|
6ce08764a1 | ||
|
c80ede5f13 | ||
|
bc694039e4 | ||
|
3462af03e6 | ||
|
ea1d5bdcdd | ||
|
121c09c7be | ||
|
76bfaf6daf | ||
|
d89c6e336a | ||
|
776dc3992a | ||
|
27ca82ebc6 | ||
|
385f8ae468 | ||
|
b9f030cc26 | ||
|
52afb2ac1b | ||
|
43bc88903d | ||
|
6ef9f88299 | ||
|
f71fdb0acc | ||
|
c24dfef63c | ||
|
6271f1cad9 | ||
|
fb4b030aaf | ||
|
ff21a8e0ee | ||
|
904fffffeb | ||
|
51897bb77c | ||
|
bd1a281ede | ||
|
45598f1578 | ||
|
d02115f837 | ||
|
34c781a24d | ||
|
1302394603 | ||
|
dd622d7c4e | ||
|
d120e9013f | ||
|
b8da6b9fc6 | ||
|
4baea47c42 | ||
|
176cf9e0c3 | ||
|
7b6faddfc8 | ||
|
f90ad27375 | ||
|
230b2287dd | ||
|
754c838903 | ||
|
aa2fd59857 | ||
|
9932a65370 | ||
|
5e4166478d | ||
|
b0e87c3110 | ||
|
ff0813313a | ||
|
c0bdf32a3c | ||
|
92b065dc53 | ||
|
9298d4e3df | ||
|
740a7fcbc8 | ||
|
5fbf25a681 | ||
|
db6e625005 | ||
|
811cacdc2c | ||
|
ce08a86462 | ||
|
11497d5bba | ||
|
0217c78377 | ||
|
bd6b25ce0e | ||
|
d51a853d5c | ||
|
9ed99402f5 | ||
|
ec3a6a3137 | ||
|
796858a53f | ||
|
5b78caca94 | ||
|
bec2248141 | ||
|
211503c39f | ||
|
adb1307b9a | ||
|
99673f04bc | ||
|
e9a537774d | ||
|
367f539769 | ||
|
398133cf55 | ||
|
52fc3ba405 | ||
|
fdd6e18b75 | ||
|
58a84b8cb6 | ||
|
c5d666d374 | ||
|
5d8993b06a | ||
|
c758bf9fd7 | ||
|
900813a328 | ||
|
2bad0e5d20 | ||
|
ccc5842bc9 | ||
|
fd86c2026d | ||
|
e4a8eae701 | ||
|
75e51819d0 | ||
|
8ee341500d | ||
|
0590062925 | ||
|
799d88d3d8 | ||
|
760aea9a96 | ||
|
d6a31b1766 | ||
|
0b54a5b10a | ||
|
6309cb9b41 | ||
|
27a82a1b93 | ||
|
ecd1936695 | ||
|
76b3c61012 | ||
|
0df2dea73b | ||
|
f8bb576c4f | ||
|
ee61f6f3e2 | ||
|
f14f2a6d79 | ||
|
2c322cc5d6 | ||
|
3b8f3a1504 | ||
|
8f9529cd05 | ||
|
f4bca0b348 | ||
|
6291438073 | ||
|
18c3c15391 | ||
|
dda620e88c | ||
|
d7cc31b63e | ||
|
5e3e1c82d8 | ||
|
aa80652f47 | ||
|
9d247bbd2d | ||
|
93e40a7b2f | ||
|
03ff2cc1c4 | ||
|
a285b6377b | ||
|
cd791a5ea0 | ||
|
87830900a9 | ||
|
dfc9d9f50a | ||
|
75311a7e16 | ||
|
628bc4d1e7 | ||
|
a4c3f48639 | ||
|
bdf80aa542 | ||
|
adf3c58ad3 | ||
|
caf90bfaa5 | ||
|
2f985f4bb4 | ||
|
67c2bcdf4c | ||
|
1d2d0e3ff2 | ||
|
9fda6ee39f | ||
|
bc3e582fe4 | ||
|
bc1fc5ddbc | ||
|
63948fc62c | ||
|
f4858a7103 | ||
|
26886e6140 | ||
|
7a1818c99b | ||
|
2ccd1b10e5 | ||
|
788fa208c8 | ||
|
8848314c08 | ||
|
c11125f9ed | ||
|
95ceeec722 | ||
|
b68ff25917 | ||
|
3e3327ea17 | ||
|
b158bb8693 | ||
|
2bf098eda4 | ||
|
382e05fa56 | ||
|
19b05d886e | ||
|
e65566a9cc | ||
|
baa3c3f0f6 | ||
|
f4f339529c | ||
|
7d02fae85b | ||
|
6e46c3f1fd | ||
|
c7e675940c | ||
|
d26b1317ed | ||
|
a221f22969 | ||
|
817f786fbb | ||
|
62420c73cb | ||
|
2522a0b7da | ||
|
46d32a12c9 | ||
|
c491418526 | ||
|
c067545c17 | ||
|
823a155293 | ||
|
324b2c78fa | ||
|
d34f98289b | ||
|
644096b15c | ||
|
15cebcc363 | ||
|
faa4ea68c0 | ||
|
29a9385ff0 | ||
|
476eae0c2a | ||
|
8399267671 | ||
|
db546cf87f | ||
|
317639758a | ||
|
fdbabca85f | ||
|
6f790e5821 | ||
|
6f5cdeb611 | ||
|
9eb4f404cb | ||
|
f58487b392 | ||
|
5b9aefef77 | ||
|
defaf19f5d | ||
|
772fd5cc44 | ||
|
50a0f6df7e | ||
|
9f435c5f1c | ||
|
931e2d1d26 | ||
|
a42419da42 | ||
|
9a237b776c | ||
|
02ec32a1ef | ||
|
a1e9e6440f | ||
|
5878e6398c | ||
|
6c6f1408f2 | ||
|
b7a7319c38 | ||
|
68f705cac5 | ||
|
079d1dcd80 | ||
|
7b24bbdf49 | ||
|
f86d543ebb | ||
|
60e47a2699 | ||
|
b8bc7a696b | ||
|
7d900ef1bf | ||
|
1931a73f39 | ||
|
966ea3aebd | ||
|
b3013681ff | ||
|
416c7fcbce | ||
|
e83eebb12f | ||
|
a349873226 | ||
|
fccae2b911 | ||
|
3ee08848db | ||
|
0129b4dd45 | ||
|
1c57e7f1f4 | ||
|
d0caf3a11e | ||
|
a87bb090d9 | ||
|
beb95e7781 | ||
|
5435d7af91 | ||
|
0c0a70f4c6 | ||
|
e3947e2b7f | ||
|
da3f7fb7f8 | ||
|
429ddfd38d | ||
|
479514d015 | ||
|
355e41466d | ||
|
03d9aad87c | ||
|
3e2bcf530b | ||
|
6343a5f68e | ||
|
00de9a9828 | ||
|
7fc2cd819e | ||
|
974739aab5 | ||
|
0cc4f8e385 | ||
|
513fd2a872 | ||
|
ae6986fb89 | ||
|
e8e28989eb | ||
|
0fa629d05b | ||
|
ff7a07d5c4 | ||
|
5a18403057 | ||
|
1b7b1d6eac | ||
|
23cfa4ae45 | ||
|
e82def52a9 | ||
|
bcfe9db299 | ||
|
cf00ae7640 | ||
|
f9b9e88646 | ||
|
c2500434c3 | ||
|
f74b341dde | ||
|
461b00f34a | ||
|
4cda41ac7b | ||
|
6a1c4fbfcb | ||
|
31424c126f | ||
|
53096539dc | ||
|
2c0b475235 | ||
|
a542405200 | ||
|
3e2b085ef9 | ||
|
885e4384a1 | ||
|
2b8f151094 | ||
|
5ac71f0b27 | ||
|
39ac7c9435 | ||
|
ed7bdc8a90 | ||
|
55f0cab3a3 | ||
|
544dec6298 | ||
|
e0ae1814b1 | ||
|
9532d72371 | ||
|
1362bbbb4b | ||
|
f00fd51dae | ||
|
a8896c5ac2 | ||
|
5d3808524d | ||
|
c8f167823f | ||
|
70f6796e7d | ||
|
85d253af6b | ||
|
a86cbf5876 | ||
|
3f1399de8a | ||
|
1f809a8560 | ||
|
653d14e2f9 | ||
|
85fab7e47b | ||
|
3aa9176f08 | ||
|
33b53b6021 | ||
|
3f7421b71b | ||
|
ee45625290 | ||
|
2c2a42587b | ||
|
e2f65efcf9 | ||
|
081d6e4784 | ||
|
1d4247f64e | ||
|
1ff30d7b79 | ||
|
16ea817968 | ||
|
a2a4bae929 | ||
|
c58843b3a1 | ||
|
a22524b004 | ||
|
87c4c21e75 | ||
|
b9465395cb | ||
|
edf41477f0 | ||
|
5f627b4448 | ||
|
60e5428925 | ||
|
748ec66725 | ||
|
e54a3a2f01 | ||
|
0e4cb4f406 | ||
|
f7ffe72ac7 | ||
|
cd58dc3e56 | ||
|
c33bcf2051 | ||
|
7642c08763 | ||
|
fdc8000810 | ||
|
a91c9b15e3 | ||
|
27d67ea2ba | ||
|
d6a8160902 | ||
|
6e1b9395c6 | ||
|
b1ccbed3d4 | ||
|
37381350f8 | ||
|
7af808a5ef | ||
|
876bef5937 | ||
|
a16af51873 | ||
|
dc9a441bfa | ||
|
ee6dfe8308 | ||
|
2cb5b03e53 | ||
|
964b190350 | ||
|
13d27a42cc | ||
|
ec05fee43a | ||
|
b50e3bc67f | ||
|
ac78b5e97b | ||
|
17e0d63957 | ||
|
9209fe3878 | ||
|
84d84211ac | ||
|
b4116dcdd5 | ||
|
bb18d787b5 | ||
|
0647084f39 | ||
|
734ea11e3c | ||
|
3940450878 | ||
|
ccbfaa83b0 | ||
|
d86007873e | ||
|
4b7df0d30c | ||
|
caff59499c | ||
|
99a0f9824a | ||
|
3013bbb27d | ||
|
6f9b54933f | ||
|
1bbe317508 | ||
|
e97a534f13 | ||
|
8acb83d993 | ||
|
71b640cc5b | ||
|
4f026fafbc | ||
|
39f594d660 | ||
|
cae97f6521 | ||
|
6cbf345f28 | ||
|
a0ab29f8a1 | ||
|
4a4fbfc967 | ||
|
408b5839b1 | ||
|
60620368d7 | ||
|
4927de4f86 | ||
|
bad5c1a303 | ||
|
6f18cc9abc | ||
|
4d144be8b0 | ||
|
2128b696b8 | ||
|
a23669220a | ||
|
051c46256b | ||
|
d5524947b5 | ||
|
74f91c4af7 | ||
|
da4d4191a9 | ||
|
2564300e55 | ||
|
cb0713d2c9 | ||
|
ac265bef1e | ||
|
4a0132c570 | ||
|
1fa174692a | ||
|
04c9544187 | ||
|
8085fc15cc | ||
|
2f15832f56 | ||
|
1557ed153c | ||
|
a6620ac28d | ||
|
89e36657cc | ||
|
7129bed51b | ||
|
1cc79574fc | ||
|
20e35880bf | ||
|
5e1912cfc1 | ||
|
293f0f39ce | ||
|
0db261ba56 | ||
|
7668a2c5cb | ||
|
26c06f0c51 | ||
|
23d3608c6b | ||
|
baa7081d68 | ||
|
19bf2b4e88 | ||
|
6a1b20de2a | ||
|
3c864e930d | ||
|
dc5596ff54 | ||
|
46d9760f5e | ||
|
90d71d3f08 | ||
|
e9404524cc | ||
|
dc65a213fd | ||
|
4237ba10dc | ||
|
c3f3b29b92 | ||
|
1c985da0ca | ||
|
7a60322abf | ||
|
07bc9a3530 | ||
|
a099965bad | ||
|
146323a7f8 | ||
|
57e086dcea | ||
|
2101f5d4cc | ||
|
cc8c9281e6 | ||
|
cf372f0778 | ||
|
34bc0ae667 | ||
|
2865cf0419 | ||
|
58c1f6f0a7 | ||
|
7c7a0d395c | ||
|
8bdcb436f9 | ||
|
ff815fe65a | ||
|
da3a2d8137 | ||
|
13dcfd41bd | ||
|
e56190b378 | ||
|
a79553f39f | ||
|
b3efb3ebae | ||
|
68d301ffd4 | ||
|
3b0bec8d11 | ||
|
412c617d0f | ||
|
751536f5c8 | ||
|
025f30ba38 | ||
|
0d2fb1d193 | ||
|
82b34105d3 | ||
|
73aeb2dc56 | ||
|
c6973bd412 | ||
|
f8780e6d11 | ||
|
e2f89ec7aa | ||
|
62651c556a | ||
|
bf94e38d3d | ||
|
4f97852316 | ||
|
16040f46d6 | ||
|
d068ba24f3 | ||
|
f5e43bc695 | ||
|
6a5308ab49 | ||
|
63e0f29564 | ||
|
42bdd9d051 | ||
|
4e40de6e2a | ||
|
0fa2b899d1 | ||
|
f17e4c9c28 | ||
|
807962f4a1 | ||
|
9c1aa1d668 | ||
|
69f491f14e | ||
|
cb007f47c1 | ||
|
9abd500a74 | ||
|
cf68bcaeff | ||
|
cbe2bd914d | ||
|
75111274ed | ||
|
624dcebff6 | ||
|
9684f17cde | ||
|
e52a40abf7 | ||
|
0daa05961b | ||
|
158731f83e | ||
|
24270b0301 | ||
|
3c1b81b957 | ||
|
45c24df512 | ||
|
bf671b605e | ||
|
09c82fbc9a | ||
|
3bca0409fe | ||
|
d6f78a354d | ||
|
e0b9d47387 | ||
|
f8795e102b | ||
|
4bb4a18876 | ||
|
8560c61842 | ||
|
a81bbebf44 | ||
|
72e3ffeb74 | ||
|
2fc9f2b41d | ||
|
5f3544baa3 | ||
|
da27660014 | ||
|
b8a6114309 | ||
|
774e208f94 | ||
|
f20b52778b | ||
|
83e865a370 | ||
|
b89a938687 | ||
|
e89a2aabed | ||
|
f58766ce5c | ||
|
15644a40df | ||
|
d4800f3c3f | ||
|
09a5dd2d3b | ||
|
819039ee63 | ||
|
ce36339575 | ||
|
684712076f | ||
|
603c92080f | ||
|
16ae61f655 | ||
|
0ef4d4ab7e | ||
|
4542535f94 | ||
|
6a52eed80e | ||
|
acf5cbfe93 | ||
|
8d1c8cae9c | ||
|
c84890f708 | ||
|
6d0886204a | ||
|
04d02a9d57 | ||
|
6ac4e8065a | ||
|
b82f815f37 | ||
|
158f8cadc0 | ||
|
7d70cf4157 | ||
|
6591fdf51f | ||
|
47d7c64274 | ||
|
db175341c7 | ||
|
9ff6772790 | ||
|
5f9b83944d | ||
|
f6735be4da | ||
|
6a3e0103bb | ||
|
0b5cc1983e | ||
|
1a9f8b1ad4 | ||
|
754f0008ec | ||
|
7115599121 | ||
|
e638e83662 | ||
|
d958fa9ff9 | ||
|
ebb6419960 | ||
|
2415951ead | ||
|
995ad69c54 | ||
|
225e4b9633 | ||
|
6ce2c6783b | ||
|
29f400b97d | ||
|
0cd64bd077 | ||
|
0551a02b82 | ||
|
25fadd06d0 | ||
|
7a47d07c6d | ||
|
34e48bed3b | ||
|
7b61ac3ddf | ||
|
b1c3a49fff | ||
|
c816336cbd | ||
|
00cf122d7a | ||
|
c7667c2d7f |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -31,3 +31,5 @@ updates_key.pem
|
||||
test/testdata
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
.idea
|
||||
.idea/*
|
@@ -9,7 +9,6 @@ notifications:
|
||||
email:
|
||||
- filippo.valsorda@gmail.com
|
||||
- phihag@phihag.de
|
||||
- jaime.marquinez.ferrandiz+travis@gmail.com
|
||||
- yasoob.khld@gmail.com
|
||||
# irc:
|
||||
# channels:
|
||||
|
12
AUTHORS
12
AUTHORS
@@ -90,3 +90,15 @@ Matthew Rayfield
|
||||
t0mm0
|
||||
Tithen-Firion
|
||||
Zack Fernandes
|
||||
cryptonaut
|
||||
Adrian Kretz
|
||||
Mathias Rav
|
||||
Petr Kutalek
|
||||
Will Glynn
|
||||
Max Reimann
|
||||
Cédric Luthi
|
||||
Thijs Vermeir
|
||||
Joel Leclerc
|
||||
Christopher Krooss
|
||||
Ondřej Caletka
|
||||
Dinesh S
|
||||
|
136
CONTRIBUTING.md
Normal file
136
CONTRIBUTING.md
Normal file
@@ -0,0 +1,136 @@
|
||||
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
### Is the description of the issue itself sufficient?
|
||||
|
||||
We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
|
||||
|
||||
So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
|
||||
|
||||
- What the problem is
|
||||
- How it could be fixed
|
||||
- How your proposed solution would look like
|
||||
|
||||
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
|
||||
|
||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
|
||||
### Are you using the latest version?
|
||||
|
||||
Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
|
||||
|
||||
### Is the issue already documented?
|
||||
|
||||
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or at https://github.com/rg3/youtube-dl/search?type=Issues . If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
|
||||
|
||||
### Why are existing options not enough?
|
||||
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
|
||||
### Is there enough context in your bug report?
|
||||
|
||||
People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
|
||||
|
||||
We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
|
||||
|
||||
### Does the issue involve one problem, and one problem only?
|
||||
|
||||
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
|
||||
|
||||
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
|
||||
|
||||
### Is anyone going to need the feature?
|
||||
|
||||
Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||
|
||||
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
||||
|
||||
python -m youtube_dl
|
||||
|
||||
To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
|
||||
|
||||
python -m unittest discover
|
||||
python test/test_download.py
|
||||
nosetests
|
||||
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||
```python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class YourExtractorIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://yourextractor.com/watch/42',
|
||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video title goes here',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
# TODO more properties, either as:
|
||||
# * A value
|
||||
# * MD5 checksum; start the string with md5:
|
||||
# * A regular expression; start the string with re:
|
||||
# * Any Python type (for example int or float)
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
23
Makefile
23
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
||||
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
@@ -35,13 +35,22 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtu
|
||||
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
|
||||
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
|
||||
|
||||
codetest:
|
||||
flake8 .
|
||||
|
||||
test:
|
||||
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
||||
nosetests --verbose test
|
||||
$(MAKE) codetest
|
||||
|
||||
ot: offlinetest
|
||||
|
||||
offlinetest: codetest
|
||||
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion
|
||||
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest supportedsites
|
||||
|
||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
||||
|
||||
@@ -54,7 +63,13 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
chmod a+x youtube-dl
|
||||
|
||||
README.md: youtube_dl/*.py youtube_dl/*/*.py
|
||||
COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
|
||||
COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
|
||||
|
||||
CONTRIBUTING.md: README.md
|
||||
python devscripts/make_contributing.py README.md CONTRIBUTING.md
|
||||
|
||||
supportedsites:
|
||||
python devscripts/make_supportedsites.py docs/supportedsites.md
|
||||
|
||||
README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
133
README.md
133
README.md
@@ -1,7 +1,15 @@
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
**youtube-dl** [OPTIONS] URL [URL...]
|
||||
- [INSTALLATION](#installation)
|
||||
- [DESCRIPTION](#description)
|
||||
- [OPTIONS](#options)
|
||||
- [CONFIGURATION](#configuration)
|
||||
- [OUTPUT TEMPLATE](#output-template)
|
||||
- [VIDEO SELECTION](#video-selection)
|
||||
- [FAQ](#faq)
|
||||
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
|
||||
- [BUGS](#bugs)
|
||||
- [COPYRIGHT](#copyright)
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
@@ -34,6 +42,8 @@ YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||
which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
youtube-dl [OPTIONS] URL [URL...]
|
||||
|
||||
# OPTIONS
|
||||
-h, --help print this help text and exit
|
||||
--version print program version and exit
|
||||
@@ -50,10 +60,6 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
they would handle
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
extractors
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--socket-timeout None Time to wait before giving up, in seconds
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
example "gvsearch2:" downloads two videos
|
||||
from google videos for youtube-dl "large
|
||||
@@ -72,6 +78,18 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them.
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
(experimental)
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
(experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
(experimental)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER playlist video to end at (default is last)
|
||||
@@ -113,12 +131,12 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
size. By default, the buffer size is
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
|
||||
## Filesystem Options:
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
stdin)
|
||||
--id use only video ID in file name
|
||||
-A, --auto-number number downloaded files starting from 00000
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to
|
||||
get the title, %(uploader)s for the
|
||||
uploader name, %(uploader_id)s for the
|
||||
@@ -152,6 +170,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
-A, --auto-number [deprecated; use -o
|
||||
"%(autonumber)s-%(title)s.%(ext)s" ] number
|
||||
downloaded files starting from 00000
|
||||
-t, --title [deprecated] use title in file name
|
||||
(default)
|
||||
-l, --literal [deprecated] alias of --title
|
||||
@@ -206,6 +227,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
for each command-line argument. If the URL
|
||||
refers to a playlist, dump the whole
|
||||
playlist information in a single line.
|
||||
--print-json Be quiet and print the video information as
|
||||
JSON (video is still being downloaded).
|
||||
--newline output progress bar as new lines
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
@@ -235,14 +258,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: -f 22/17/18 . -f
|
||||
mp4 , -f m4a and -f flv are also
|
||||
supported. You can also use the special
|
||||
names "best", "bestvideo", "bestaudio",
|
||||
"worst", "worstvideo" and "worstaudio". By
|
||||
default, youtube-dl will pick the best
|
||||
quality. Use commas to download multiple
|
||||
audio formats, such as -f
|
||||
preference using slashes, as in -f 22/17/18
|
||||
. Instead of format codes, you can select
|
||||
by extension for the extensions aac, m4a,
|
||||
mp3, mp4, ogg, wav, webm. You can also use
|
||||
the special names "best", "bestvideo",
|
||||
"bestaudio", "worst". By default, youtube-
|
||||
dl will pick the best quality. Use commas
|
||||
to download multiple audio formats, such as
|
||||
-f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||
You can merge the video and audio of two
|
||||
formats into a single file using -f <video-
|
||||
@@ -256,6 +280,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-F, --list-formats list all available formats
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifest on
|
||||
YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g.
|
||||
bestvideo+bestaudio), output to given
|
||||
container format. One of mkv, mp4, ogg,
|
||||
webm, flv.Ignored if no merge is required
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub write subtitle file
|
||||
@@ -302,6 +330,11 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--add-metadata write metadata to the video file
|
||||
--xattrs write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY (experimental) Automatically correct known
|
||||
faults of the file. One of never (do
|
||||
nothing), warn (only emit a warning),
|
||||
detect_or_warn(check whether we can do
|
||||
anything about it, warn otherwise
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
@@ -313,7 +346,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
@@ -436,6 +469,22 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
||||
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||
|
||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||
|
||||
If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
|
||||
|
||||
To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
|
||||
|
||||
From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
|
||||
|
||||
### How can I detect whether a given URL is supported by youtube-dl?
|
||||
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
|
||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||
|
||||
If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
|
||||
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||
@@ -526,23 +575,59 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
|
||||
|
||||
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
|
||||
|
||||
import youtube_dl
|
||||
```python
|
||||
import youtube_dl
|
||||
|
||||
ydl_opts = {}
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
ydl_opts = {}
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
```python
|
||||
import youtube_dl
|
||||
|
||||
|
||||
class MyLogger(object):
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
print(msg)
|
||||
|
||||
|
||||
def my_hook(d):
|
||||
if d['status'] == 'finished':
|
||||
print('Done downloading, now converting ...')
|
||||
|
||||
|
||||
ydl_opts = {
|
||||
'format': 'bestaudio/best',
|
||||
'postprocessors': [{
|
||||
'key': 'FFmpegExtractAudio',
|
||||
'preferredcodec': 'mp3',
|
||||
'preferredquality': '192',
|
||||
}],
|
||||
'logger': MyLogger(),
|
||||
'progress_hooks': [my_hook],
|
||||
}
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
# BUGS
|
||||
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode.
|
||||
|
||||
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
For discussions, join us in the irc channel #youtube-dl on freenode.
|
||||
|
||||
When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist):
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
### Is the description of the issue itself sufficient?
|
||||
|
||||
|
@@ -16,7 +16,7 @@ def main():
|
||||
template = tmplf.read()
|
||||
|
||||
ie_htmls = []
|
||||
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
|
||||
for ie in youtube_dl.list_extractors(age_limit=None):
|
||||
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
||||
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||
if ie_desc is False:
|
||||
|
32
devscripts/make_contributing.py
Executable file
32
devscripts/make_contributing.py
Executable file
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import optparse
|
||||
import re
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 2:
|
||||
parser.error('Expected an input and an output filename')
|
||||
|
||||
infile, outfile = args
|
||||
|
||||
with io.open(infile, encoding='utf-8') as inf:
|
||||
readme = inf.read()
|
||||
|
||||
bug_text = re.search(
|
||||
r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
|
||||
dev_text = re.search(
|
||||
r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING YOUTUBE-DL',
|
||||
readme).group(1)
|
||||
|
||||
out = bug_text + dev_text
|
||||
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(out)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
45
devscripts/make_supportedsites.py
Normal file
45
devscripts/make_supportedsites.py
Normal file
@@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import optparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
# Import youtube_dl
|
||||
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
|
||||
sys.path.append(ROOT_DIR)
|
||||
import youtube_dl
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.error('Expected an output filename')
|
||||
|
||||
outfile, = args
|
||||
|
||||
def gen_ies_md(ies):
|
||||
for ie in ies:
|
||||
ie_md = '**{0}**'.format(ie.IE_NAME)
|
||||
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||
if ie_desc is False:
|
||||
continue
|
||||
if ie_desc is not None:
|
||||
ie_md += ': {0}'.format(ie.IE_DESC)
|
||||
if not ie.working():
|
||||
ie_md += ' (Currently broken)'
|
||||
yield ie_md
|
||||
|
||||
ies = sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower())
|
||||
out = '# Supported sites\n' + ''.join(
|
||||
' - ' + md + '\n'
|
||||
for md in gen_ies_md(ies))
|
||||
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(out)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -11,8 +11,19 @@ README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
PREFIX = '%YOUTUBE-DL(1)\n\n# NAME\n'
|
||||
readme = re.sub(r'(?s)# INSTALLATION.*?(?=# DESCRIPTION)', '', readme)
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
|
500
docs/supportedsites.md
Normal file
500
docs/supportedsites.md
Normal file
@@ -0,0 +1,500 @@
|
||||
# Supported sites
|
||||
- **1up.com**
|
||||
- **220.ro**
|
||||
- **24video**
|
||||
- **3sat**
|
||||
- **4tube**
|
||||
- **56.com**
|
||||
- **5min**
|
||||
- **8tracks**
|
||||
- **9gag**
|
||||
- **abc.net.au**
|
||||
- **AcademicEarth:Course**
|
||||
- **AddAnime**
|
||||
- **AdobeTV**
|
||||
- **AdultSwim**
|
||||
- **Aftonbladet**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
- **Aparat**
|
||||
- **AppleTrailers**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:concert**
|
||||
- **arte.tv:creative**
|
||||
- **arte.tv:ddc**
|
||||
- **arte.tv:embed**
|
||||
- **arte.tv:future**
|
||||
- **audiomack**
|
||||
- **AUEngine**
|
||||
- **Azubu**
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
- **Bet**
|
||||
- **Bild**: Bild.de
|
||||
- **BiliBili**
|
||||
- **blinkx**
|
||||
- **blip.tv:user**
|
||||
- **BlipTV**
|
||||
- **Bloomberg**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
- **BR**: Bayerischer Rundfunk Mediathek
|
||||
- **Break**
|
||||
- **Brightcove**
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **Canal13cl**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **CBS**
|
||||
- **CBSNews**: CBS News
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
- **Chilloutzone**
|
||||
- **Cinchcast**
|
||||
- **Cinemassacre**
|
||||
- **clipfish**
|
||||
- **cliphunter**
|
||||
- **Clipsyndicate**
|
||||
- **Cloudy**
|
||||
- **Clubic**
|
||||
- **cmt.com**
|
||||
- **CNET**
|
||||
- **CNN**
|
||||
- **CNNBlogs**
|
||||
- **CollegeHumor**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Cracked**
|
||||
- **Criterion**
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSpan**: C-SPAN
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **dailymotion**
|
||||
- **dailymotion:playlist**
|
||||
- **dailymotion:user**
|
||||
- **daum.net**
|
||||
- **DBTV**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
- **Discovery**
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
- **DRTV**
|
||||
- **Dump**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **EbaumsWorld**
|
||||
- **eHow**
|
||||
- **Einthusan**
|
||||
- **eitb.tv**
|
||||
- **EllenTV**
|
||||
- **EllenTV:clips**
|
||||
- **ElPais**: El País
|
||||
- **EMPFlix**
|
||||
- **Engadget**
|
||||
- **Eporner**
|
||||
- **Escapist**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **facebook**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **fernsehkritik.tv**
|
||||
- **fernsehkritik.tv:postecke**
|
||||
- **Firedrive**
|
||||
- **Firstpost**
|
||||
- **firsttv**: Видеоархив - Первый канал
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **Foxgay**
|
||||
- **FoxNews**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
- **FranceInter**
|
||||
- **francetv**: France 2, 3, 4, 5 and Ô
|
||||
- **francetvinfo.fr**
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreeVideo**
|
||||
- **FunnyOrDie**
|
||||
- **Gamekings**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gametrailers**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
- **GiantBomb**
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GodTube**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
|
||||
- **Goshgay**
|
||||
- **Grooveshark**
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
- **Heise**
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HornBunny**
|
||||
- **HostingBulk**
|
||||
- **HotNewHipHop**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
- **HuffPost**: Huffington Post
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
- **ign.com**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Ina**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **Izlesene**
|
||||
- **JadoreCettePub**
|
||||
- **JeuxVideo**
|
||||
- **Jove**
|
||||
- **jpopsuki.tv**
|
||||
- **Jukebox**
|
||||
- **Kankan**
|
||||
- **keek**
|
||||
- **KeezMovies**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
- **Ku6**
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **LiveLeak**
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **lrt.lt**
|
||||
- **lynda**: lynda.com videos
|
||||
- **lynda:course**: lynda.com online courses
|
||||
- **m6**
|
||||
- **macgamestore**: MacGameStore trailers
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **Malemotion**
|
||||
- **MDR**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
- **Minhateca**
|
||||
- **MinistryGrid**
|
||||
- **mitele.es**
|
||||
- **mixcloud**
|
||||
- **MLB**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
- **Mofosex**
|
||||
- **Mojvideo**
|
||||
- **Moniker**: allmyvideos.net and vidspot.net
|
||||
- **mooshare**: Mooshare.biz
|
||||
- **Morningstar**: morningstar.com
|
||||
- **Motherless**
|
||||
- **Motorsport**: motorsport.com
|
||||
- **MovieClips**
|
||||
- **Moviezine**
|
||||
- **movshare**: MovShare
|
||||
- **MPORA**
|
||||
- **MTV**
|
||||
- **mtviggy.com**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
- **MusicVault**
|
||||
- **muzu.tv**
|
||||
- **MySpace**
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
- **myvideo**
|
||||
- **MyVidster**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **ndr**: NDR.de - Mediathek
|
||||
- **NDTV**
|
||||
- **NerdCubedFeed**
|
||||
- **Newgrounds**
|
||||
- **Newstube**
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **nhl.com**
|
||||
- **nhl.com:videocenter**: NHL videocenter category
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **Noco**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **novamov**: NovaMov
|
||||
- **Nowness**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo.nl**
|
||||
- **NRK**
|
||||
- **NRKTV**
|
||||
- **NTV**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **ocw.mit.edu**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **Ooyala**
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **ORFFM4**: radio FM4
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **PBS**
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **Playvid**
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
- **PornHd**
|
||||
- **PornHub**
|
||||
- **Pornotube**
|
||||
- **PornoXO**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Pyvideo**
|
||||
- **QuickVid**
|
||||
- **radio.de**
|
||||
- **radiofrance**
|
||||
- **Rai**
|
||||
- **RBMARadio**
|
||||
- **RedTube**
|
||||
- **Restudy**
|
||||
- **ReverbNation**
|
||||
- **RingTV**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **RTBF**
|
||||
- **RTLnow**
|
||||
- **rtlxl.nl**
|
||||
- **RTP**
|
||||
- **RTS**: RTS.ch
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
- **rutube:movie**: Rutube movies
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **RUTV**: RUTV.RU
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
- **SciVee**
|
||||
- **screen.yahoo:search**: Yahoo screen search
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
- **ScreenwaveMedia**
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||
- **Shared**
|
||||
- **ShareSix**
|
||||
- **Sina**
|
||||
- **Slideshare**
|
||||
- **Slutload**
|
||||
- **smotri**: Smotri.com
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
- **smotri:community**: Smotri.com community videos
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **Snotr**
|
||||
- **Sockshare**
|
||||
- **Sohu**
|
||||
- **soundcloud**
|
||||
- **soundcloud:playlist**
|
||||
- **soundcloud:set**
|
||||
- **soundcloud:user**
|
||||
- **Soundgasm**
|
||||
- **southpark.cc.com**
|
||||
- **southpark.de**
|
||||
- **Space**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
- **Spiegel:Article**: Articles on spiegel.de
|
||||
- **Spiegeltv**
|
||||
- **Spike**
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **SRMediathek**: Süddeutscher Rundfunk
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **SunPorno**
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
- **Tagesschau**
|
||||
- **Tapely**
|
||||
- **Tass**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **TeachingChannel**
|
||||
- **Teamcoco**
|
||||
- **TeamFour**
|
||||
- **TechTalks**
|
||||
- **techtv.mit.edu**
|
||||
- **TED**
|
||||
- **tegenlicht.vpro.nl**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **TeleMB**
|
||||
- **TenPlay**
|
||||
- **TF1**
|
||||
- **TheOnion**
|
||||
- **ThePlatform**
|
||||
- **TheSixtyOne**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
- **THVideoPlaylist**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.com**
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
- **TNAFlix**
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **TruTube**
|
||||
- **Tube8**
|
||||
- **Tudou**
|
||||
- **Tumblr**
|
||||
- **TuneIn**
|
||||
- **Turbo**
|
||||
- **Tutv**
|
||||
- **tv.dfb.de**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvp.pl**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Twitch**
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
- **Vbox7**
|
||||
- **VeeHD**
|
||||
- **Veoh**
|
||||
- **Vesti**: Вести.Ru
|
||||
- **Vevo**
|
||||
- **VGTV**
|
||||
- **vh1.com**
|
||||
- **Vice**
|
||||
- **Viddler**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
- **VideoBam**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videolectures.net**
|
||||
- **VideoMega**
|
||||
- **VideoPremium**
|
||||
- **VideoTt**: video.tt - Your True Tube
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidme**
|
||||
- **Vidzi**
|
||||
- **viki**
|
||||
- **vimeo**
|
||||
- **vimeo:album**
|
||||
- **vimeo:channel**
|
||||
- **vimeo:group**
|
||||
- **vimeo:likes**: Vimeo user likes
|
||||
- **vimeo:review**: Review pages on vimeo
|
||||
- **vimeo:user**
|
||||
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
|
||||
- **Vimple**: Vimple.ru
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **vk.com**
|
||||
- **vk.com:user-videos**: vk.com:All of a user's videos
|
||||
- **Vodlocker**
|
||||
- **Vporn**
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **vulture.com**
|
||||
- **Walla**
|
||||
- **WashingtonPost**
|
||||
- **wat.tv**
|
||||
- **WayOfTheMaster**
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
- **Weibo**
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **WorldStarHipHop**
|
||||
- **wrzuta.pl**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XHamster**
|
||||
- **XMinus**
|
||||
- **XNXX**
|
||||
- **XTube**
|
||||
- **XTubeUser**: XTube user profile
|
||||
- **XVideos**
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **YesJapan**
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
- **Youku**
|
||||
- **YouPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
- **youtube:channel**: YouTube.com channels
|
||||
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
||||
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||
- **youtube:playlist**: YouTube.com playlists
|
||||
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
||||
- **youtube:search**: YouTube.com searches
|
||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||
- **youtube:search_url**: YouTube.com search URLs
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3:album**: mp3.zing.vn albums
|
||||
- **zingmp3:song**: mp3.zing.vn songs
|
@@ -1,2 +1,6 @@
|
||||
[wheel]
|
||||
universal = True
|
||||
|
||||
[flake8]
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build
|
||||
ignore = E501
|
||||
|
@@ -82,24 +82,14 @@ class FakeYDL(YoutubeDL):
|
||||
|
||||
def gettestcases(include_onlymatching=False):
|
||||
for ie in youtube_dl.extractor.gen_extractors():
|
||||
t = getattr(ie, '_TEST', None)
|
||||
if t:
|
||||
assert not hasattr(ie, '_TESTS'), \
|
||||
'%s has _TEST and _TESTS' % type(ie).__name__
|
||||
tests = [t]
|
||||
else:
|
||||
tests = getattr(ie, '_TESTS', [])
|
||||
for t in tests:
|
||||
if not include_onlymatching and t.get('only_matching', False):
|
||||
continue
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
||||
for tc in ie.get_testcases(include_onlymatching):
|
||||
yield tc
|
||||
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def expect_info_dict(self, expected_dict, got_dict):
|
||||
def expect_info_dict(self, got_dict, expected_dict):
|
||||
for info_field, expected in expected_dict.items():
|
||||
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||
got = got_dict.get(info_field)
|
||||
@@ -120,6 +110,20 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(got_dict.get(info_field))
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(
|
||||
isinstance(got, list),
|
||||
'Expected field %s to be a list, but it is of type %s' % (
|
||||
info_field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
self, len(got), expected_num,
|
||||
'Expected %d items in field %s, but only got %d' % (
|
||||
expected_num, info_field, len(got)
|
||||
)
|
||||
)
|
||||
continue
|
||||
else:
|
||||
got = got_dict.get(info_field)
|
||||
self.assertEqual(expected, got,
|
||||
@@ -161,7 +165,9 @@ def assertRegexpMatches(self, text, regexp, msg=None):
|
||||
else:
|
||||
m = re.match(regexp, text)
|
||||
if not m:
|
||||
note = 'Regexp didn\'t match: %r not found in %r' % (regexp, text)
|
||||
note = 'Regexp didn\'t match: %r not found' % (regexp)
|
||||
if len(text) < 1000:
|
||||
note += ' in %r' % text
|
||||
if msg is None:
|
||||
msg = note
|
||||
else:
|
||||
|
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||
|
||||
def test_html_search_meta(self):
|
||||
ie = self.ie
|
||||
html = '''
|
||||
<meta name="a" content="1" />
|
||||
<meta name='b' content='2'>
|
||||
<meta name="c" content='3'>
|
||||
<meta name=d content='4'>
|
||||
<meta property="e" content='5' >
|
||||
<meta content="6" name="f">
|
||||
'''
|
||||
|
||||
self.assertEqual(ie._html_search_meta('a', html), '1')
|
||||
self.assertEqual(ie._html_search_meta('b', html), '2')
|
||||
self.assertEqual(ie._html_search_meta('c', html), '3')
|
||||
self.assertEqual(ie._html_search_meta('d', html), '4')
|
||||
self.assertEqual(ie._html_search_meta('e', html), '5')
|
||||
self.assertEqual(ie._html_search_meta('f', html), '6')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -8,6 +8,8 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import copy
|
||||
|
||||
from test.helper import FakeYDL, assertRegexpMatches
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
@@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-high')
|
||||
|
||||
def test_format_selection_audio_exts(self):
|
||||
formats = [
|
||||
{'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||
{'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||
{'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
|
||||
{'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
|
||||
{'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
|
||||
]
|
||||
|
||||
info_dict = _make_result(formats)
|
||||
ydl = YDL({'format': 'best'})
|
||||
ie = YoutubeIE(ydl)
|
||||
ie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'aac-64')
|
||||
|
||||
ydl = YDL({'format': 'mp3'})
|
||||
ie = YoutubeIE(ydl)
|
||||
ie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'mp3-64')
|
||||
|
||||
ydl = YDL({'prefer_free_formats': True})
|
||||
ie = YoutubeIE(ydl)
|
||||
ie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'ogg-64')
|
||||
|
||||
def test_format_selection_video(self):
|
||||
formats = [
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
|
||||
@@ -218,7 +251,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
# 3D
|
||||
'85', '84', '102', '83', '101', '82', '100',
|
||||
# Dash video
|
||||
'138', '137', '248', '136', '247', '135', '246',
|
||||
'137', '248', '136', '247', '135', '246',
|
||||
'245', '244', '134', '243', '133', '242', '160',
|
||||
# Dash audio
|
||||
'141', '172', '140', '171', '139',
|
||||
|
@@ -45,11 +45,6 @@ class TestAgeRestriction(unittest.TestCase):
|
||||
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
'505835.mp4', 2, old_age=25)
|
||||
|
||||
def test_pornotube(self):
|
||||
self._assert_restricted(
|
||||
'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
||||
'1689755.flv', 13)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -155,7 +155,7 @@ def generator(test_case):
|
||||
if is_playlist:
|
||||
self.assertEqual(res_dict['_type'], 'playlist')
|
||||
self.assertTrue('entries' in res_dict)
|
||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
||||
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||
|
||||
if 'playlist_mincount' in test_case:
|
||||
assertGreaterEqual(
|
||||
@@ -204,7 +204,7 @@ def generator(test_case):
|
||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||
info_dict = json.load(infof)
|
||||
|
||||
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
||||
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
||||
finally:
|
||||
try_rm_tcs_files()
|
||||
if is_playlist and res_dict is not None and res_dict.get('entries'):
|
||||
|
@@ -17,6 +17,7 @@ from youtube_dl.extractor import (
|
||||
TEDIE,
|
||||
VimeoIE,
|
||||
WallaIE,
|
||||
CeskaTelevizeIE,
|
||||
)
|
||||
|
||||
|
||||
@@ -88,6 +89,14 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(subtitles['it'] is not None)
|
||||
|
||||
def test_youtube_translated_subtitles(self):
|
||||
# This video has a subtitles track, which can be translated
|
||||
self.url = 'Ky9eprVWzlI'
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(subtitles['it'] is not None)
|
||||
|
||||
def test_youtube_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
self.url = 'n5BB19UTcdA'
|
||||
@@ -309,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
|
||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||
IE = CeskaTelevizeIE
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
||||
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,9 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
# Allow direct execution
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import io
|
||||
import re
|
||||
|
||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
@@ -14,6 +18,9 @@ IGNORED_FILES = [
|
||||
]
|
||||
|
||||
|
||||
from test.helper import assertRegexpMatches
|
||||
|
||||
|
||||
class TestUnicodeLiterals(unittest.TestCase):
|
||||
def test_all_files(self):
|
||||
for dirpath, _, filenames in os.walk(rootDir):
|
||||
@@ -29,9 +36,10 @@ class TestUnicodeLiterals(unittest.TestCase):
|
||||
|
||||
if "'" not in code and '"' not in code:
|
||||
continue
|
||||
self.assertRegexpMatches(
|
||||
assertRegexpMatches(
|
||||
self,
|
||||
code,
|
||||
r'(?:#.*\n*)?from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
|
||||
r'(?:(?:#.*?|\s*)\n)*from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
|
||||
'unicode_literals import missing in %s' % fn)
|
||||
|
||||
m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
|
||||
|
@@ -16,38 +16,41 @@ import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from youtube_dl.utils import (
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
clean_html,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
orderedSet,
|
||||
OnDemandPagedList,
|
||||
InAdvancePagedList,
|
||||
intlist_to_bytes,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
strip_jsonp,
|
||||
struct_unpack,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
uppercase_escape,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
uppercase_escape,
|
||||
limit_length,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
js_to_json,
|
||||
intlist_to_bytes,
|
||||
args_to_str,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
@@ -143,6 +146,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
|
||||
self.assertEqual(
|
||||
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
|
||||
'20141126')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
@@ -381,5 +387,28 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
||||
self.assertEqual(parse_filesize('1,24 KB'), 1240)
|
||||
|
||||
def test_version_tuple(self):
|
||||
self.assertEqual(version_tuple('1'), (1,))
|
||||
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
||||
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
|
||||
|
||||
def test_detect_exe_version(self):
|
||||
self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1
|
||||
built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4)
|
||||
configuration: --prefix=/usr --extra-'''), '1.2.1')
|
||||
self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685
|
||||
built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685')
|
||||
self.assertEqual(detect_exe_version('''X server found. dri2 connection failed!
|
||||
Trying to open render node...
|
||||
Success at /dev/dri/renderD128.
|
||||
ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
|
||||
def test_age_restricted(self):
|
||||
self.assertFalse(age_restricted(None, 10)) # unrestricted content
|
||||
self.assertFalse(age_restricted(1, None)) # unrestricted policy
|
||||
self.assertFalse(age_restricted(8, 10))
|
||||
self.assertTrue(age_restricted(18, 14))
|
||||
self.assertFalse(age_restricted(18, 18))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,76 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_params
|
||||
|
||||
|
||||
import io
|
||||
import json
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
import youtube_dl.extractor
|
||||
|
||||
|
||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
||||
self.to_stderr = self.to_screen
|
||||
|
||||
params = get_params({
|
||||
'writeinfojson': True,
|
||||
'skip_download': True,
|
||||
'writedescription': True,
|
||||
})
|
||||
|
||||
|
||||
TEST_ID = 'BaW_jenozKc'
|
||||
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||
EXPECTED_DESCRIPTION = '''test chars: "'/\ä↭𝕐
|
||||
test URL: https://github.com/rg3/youtube-dl/issues/1892
|
||||
|
||||
This is a test video for youtube-dl.
|
||||
|
||||
For more information, contact phihag@phihag.de .'''
|
||||
|
||||
|
||||
class TestInfoJSON(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Clear old files
|
||||
self.tearDown()
|
||||
|
||||
def test_info_json(self):
|
||||
ie = youtube_dl.extractor.YoutubeIE()
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_info_extractor(ie)
|
||||
ydl.download([TEST_ID])
|
||||
self.assertTrue(os.path.exists(INFO_JSON_FILE))
|
||||
with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
|
||||
jd = json.load(jsonf)
|
||||
self.assertEqual(jd['upload_date'], '20121002')
|
||||
self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
|
||||
self.assertEqual(jd['id'], TEST_ID)
|
||||
self.assertEqual(jd['extractor'], 'youtube')
|
||||
self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
|
||||
self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
|
||||
|
||||
self.assertTrue(os.path.exists(DESCRIPTION_FILE))
|
||||
with io.open(DESCRIPTION_FILE, 'r', encoding='utf-8') as descf:
|
||||
descr = descf.read()
|
||||
self.assertEqual(descr, EXPECTED_DESCRIPTION)
|
||||
|
||||
def tearDown(self):
|
||||
if os.path.exists(INFO_JSON_FILE):
|
||||
os.remove(INFO_JSON_FILE)
|
||||
if os.path.exists(DESCRIPTION_FILE):
|
||||
os.remove(DESCRIPTION_FILE)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -7,6 +7,7 @@ import collections
|
||||
import datetime
|
||||
import errno
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import locale
|
||||
import os
|
||||
@@ -26,6 +27,7 @@ from .compat import (
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
@@ -61,12 +63,18 @@ from .utils import (
|
||||
YoutubeDLHandler,
|
||||
prepend_extension,
|
||||
args_to_str,
|
||||
age_restricted,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .downloader import get_suitable_downloader
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
|
||||
from .postprocessor import (
|
||||
FFmpegFixupStretchedPP,
|
||||
FFmpegMergerPP,
|
||||
FFmpegPostProcessor,
|
||||
get_postprocessor,
|
||||
)
|
||||
from .version import __version__
|
||||
|
||||
|
||||
@@ -115,7 +123,7 @@ class YoutubeDL(object):
|
||||
dump_single_json: Force printing the info_dict of the whole playlist
|
||||
(or video) as a single JSON line.
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code.
|
||||
format: Video format code. See options.py for more information.
|
||||
format_limit: Highest quality format to try.
|
||||
outtmpl: Template for output names.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
@@ -123,6 +131,7 @@ class YoutubeDL(object):
|
||||
nooverwrites: Prevent overwriting files.
|
||||
playliststart: Playlist item to start at.
|
||||
playlistend: Playlist item to end at.
|
||||
playlistreverse: Download playlist items in reverse order.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logger: Log messages to a logging.Logger instance.
|
||||
@@ -174,6 +183,36 @@ class YoutubeDL(object):
|
||||
extract_flat: Do not resolve URLs, return the immediate result.
|
||||
Pass in 'in_playlist' to only show this behavior for
|
||||
playlist items.
|
||||
postprocessors: A list of dictionaries, each with an entry
|
||||
* key: The name of the postprocessor. See
|
||||
youtube_dl/postprocessor/__init__.py for a list.
|
||||
as well as any further keyword arguments for the
|
||||
postprocessor.
|
||||
progress_hooks: A list of functions that get called on download
|
||||
progress, with a dictionary with the entries
|
||||
* filename: The final filename
|
||||
* status: One of "downloading" and "finished"
|
||||
|
||||
The dict may also have some of the following entries:
|
||||
|
||||
* downloaded_bytes: Bytes on disk
|
||||
* total_bytes: Size of the whole file, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* eta: The estimated time in seconds, None if unknown
|
||||
* speed: The download speed in bytes/second, None if
|
||||
unknown
|
||||
|
||||
Progress hooks are guaranteed to be called at least once
|
||||
(with status "finished") if the download is successful.
|
||||
merge_output_format: Extension to use when merging formats.
|
||||
fixup: Automatically correct known faults of the file.
|
||||
One of:
|
||||
- "never": do nothing
|
||||
- "warn": only emit a warning
|
||||
- "detect_or_warn": check whether we can do anything
|
||||
about it, warn otherwise
|
||||
source_address: (Experimental) Client-side IP address to bind to.
|
||||
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
@@ -254,6 +293,16 @@ class YoutubeDL(object):
|
||||
self.print_debug_header()
|
||||
self.add_default_info_extractors()
|
||||
|
||||
for pp_def_raw in self.params.get('postprocessors', []):
|
||||
pp_class = get_postprocessor(pp_def_raw['key'])
|
||||
pp_def = dict(pp_def_raw)
|
||||
del pp_def['key']
|
||||
pp = pp_class(self, **compat_kwargs(pp_def))
|
||||
self.add_post_processor(pp)
|
||||
|
||||
for ph in self.params.get('progress_hooks', []):
|
||||
self.add_progress_hook(ph)
|
||||
|
||||
def warn_if_short_id(self, argv):
|
||||
# short YouTube ID starting with dash?
|
||||
idxs = [
|
||||
@@ -511,13 +560,8 @@ class YoutubeDL(object):
|
||||
max_views = self.params.get('max_views')
|
||||
if max_views is not None and view_count > max_views:
|
||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||
age_limit = self.params.get('age_limit')
|
||||
if age_limit is not None:
|
||||
actual_age_limit = info_dict.get('age_limit')
|
||||
if actual_age_limit is None:
|
||||
actual_age_limit = 0
|
||||
if age_limit < actual_age_limit:
|
||||
return 'Skipping "' + title + '" because it is age restricted'
|
||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||
return 'Skipping "%s" because it is age restricted' % title
|
||||
if self.in_download_archive(info_dict):
|
||||
return '%s has already been recorded in archive' % video_title
|
||||
return None
|
||||
@@ -621,23 +665,15 @@ class YoutubeDL(object):
|
||||
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info, download=False, process=False)
|
||||
|
||||
def make_result(embedded_info):
|
||||
new_result = ie_result.copy()
|
||||
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
||||
'entries', 'ie_key', 'duration',
|
||||
'subtitles', 'annotations', 'format',
|
||||
'thumbnail', 'thumbnails'):
|
||||
if f in new_result:
|
||||
del new_result[f]
|
||||
if f in embedded_info:
|
||||
new_result[f] = embedded_info[f]
|
||||
return new_result
|
||||
new_result = make_result(info)
|
||||
force_properties = dict(
|
||||
(k, v) for k, v in ie_result.items() if v is not None)
|
||||
for f in ('_type', 'url'):
|
||||
if f in force_properties:
|
||||
del force_properties[f]
|
||||
new_result = info.copy()
|
||||
new_result.update(force_properties)
|
||||
|
||||
assert new_result.get('_type') != 'url_transparent'
|
||||
if new_result.get('_type') == 'compat_list':
|
||||
new_result['entries'] = [
|
||||
make_result(e) for e in new_result['entries']]
|
||||
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
@@ -654,24 +690,34 @@ class YoutubeDL(object):
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
if isinstance(ie_result['entries'], list):
|
||||
n_all_entries = len(ie_result['entries'])
|
||||
entries = ie_result['entries'][playliststart:playlistend]
|
||||
ie_entries = ie_result['entries']
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
else:
|
||||
assert isinstance(ie_result['entries'], PagedList)
|
||||
entries = ie_result['entries'].getslice(
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
else: # iterable
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
@@ -749,7 +795,7 @@ class YoutubeDL(object):
|
||||
if video_formats:
|
||||
return video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
@@ -872,10 +918,24 @@ class YoutubeDL(object):
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
output_ext = (
|
||||
formats_info[0]['ext']
|
||||
if self.params.get('merge_output_format') is None
|
||||
else self.params['merge_output_format'])
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
'resolution': formats_info[0].get('resolution'),
|
||||
'fps': formats_info[0].get('fps'),
|
||||
'vcodec': formats_info[0].get('vcodec'),
|
||||
'vbr': formats_info[0].get('vbr'),
|
||||
'stretched_ratio': formats_info[0].get('stretched_ratio'),
|
||||
'acodec': formats_info[1].get('acodec'),
|
||||
'abr': formats_info[1].get('abr'),
|
||||
'ext': output_ext,
|
||||
}
|
||||
else:
|
||||
selected_format = None
|
||||
@@ -934,8 +994,12 @@ class YoutubeDL(object):
|
||||
if self.params.get('forceid', False):
|
||||
self.to_stdout(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
for f in info_dict['requested_formats']:
|
||||
self.to_stdout(f['url'] + f.get('play_path', ''))
|
||||
else:
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||
self.to_stdout(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||
@@ -971,13 +1035,13 @@ class YoutubeDL(object):
|
||||
descfn = filename + '.description'
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
|
||||
self.to_screen('[info] Video description is already present')
|
||||
elif info_dict.get('description') is None:
|
||||
self.report_warning('There\'s no description to write.')
|
||||
else:
|
||||
try:
|
||||
self.to_screen('[info] Writing video description to: ' + descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(info_dict['description'])
|
||||
except (KeyError, TypeError):
|
||||
self.report_warning('There\'s no description to write.')
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write description file ' + descfn)
|
||||
return
|
||||
@@ -1099,13 +1163,33 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
if success:
|
||||
# Fixup content
|
||||
stretched_ratio = info_dict.get('stretched_ratio')
|
||||
if stretched_ratio is not None and stretched_ratio != 1:
|
||||
fixup_policy = self.params.get('fixup')
|
||||
if fixup_policy is None:
|
||||
fixup_policy = 'detect_or_warn'
|
||||
if fixup_policy == 'warn':
|
||||
self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
|
||||
info_dict['id'], stretched_ratio))
|
||||
elif fixup_policy == 'detect_or_warn':
|
||||
stretched_pp = FFmpegFixupStretchedPP(self)
|
||||
if stretched_pp.available:
|
||||
info_dict.setdefault('__postprocessors', [])
|
||||
info_dict['__postprocessors'].append(stretched_pp)
|
||||
else:
|
||||
self.report_warning(
|
||||
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
|
||||
info_dict['id'], stretched_ratio))
|
||||
else:
|
||||
assert fixup_policy == 'ignore'
|
||||
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError) as err:
|
||||
self.report_error('postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
self.record_download_archive(info_dict)
|
||||
self.record_download_archive(info_dict)
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
@@ -1289,7 +1373,9 @@ class YoutubeDL(object):
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
idlen = max(len('format code'),
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [line(f, idlen) for f in formats]
|
||||
formats_s = [
|
||||
line(f, idlen) for f in formats
|
||||
if f.get('preference') is None or f['preference'] >= -1000]
|
||||
if len(formats) > 1:
|
||||
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
||||
@@ -1408,9 +1494,8 @@ class YoutubeDL(object):
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(
|
||||
self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(debuglevel=debuglevel)
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
opener = compat_urllib_request.build_opener(
|
||||
https_handler, proxy_handler, cookie_processor, ydlh)
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
|
@@ -38,18 +38,8 @@ from .update import update_self
|
||||
from .downloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
from .YoutubeDL import YoutubeDL
|
||||
from .postprocessor import (
|
||||
AtomicParsleyPP,
|
||||
FFmpegAudioFixPP,
|
||||
FFmpegMetadataPP,
|
||||
FFmpegVideoConvertor,
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegEmbedSubtitlePP,
|
||||
XAttrMetadataPP,
|
||||
ExecAfterDownloadPP,
|
||||
)
|
||||
|
||||
|
||||
def _real_main(argv=None):
|
||||
@@ -105,24 +95,22 @@ def _real_main(argv=None):
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
||||
extractors = gen_extractors()
|
||||
|
||||
if opts.list_extractors:
|
||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||
for mu in matchedUrls:
|
||||
compat_print(' ' + mu)
|
||||
sys.exit(0)
|
||||
if opts.list_extractor_descriptions:
|
||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
||||
for ie in list_extractors(opts.age_limit):
|
||||
if not ie._WORKING:
|
||||
continue
|
||||
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
||||
if desc is False:
|
||||
continue
|
||||
if hasattr(ie, 'SEARCH_KEY'):
|
||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny')
|
||||
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
|
||||
_COUNTS = ('', '5', '10', 'all')
|
||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||
compat_print(desc)
|
||||
@@ -178,6 +166,7 @@ def _real_main(argv=None):
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
||||
parser.error('invalid video recode format specified')
|
||||
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
@@ -209,16 +198,54 @@ def _real_main(argv=None):
|
||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||
' template'.format(outtmpl))
|
||||
|
||||
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
|
||||
any_printing = opts.print_json
|
||||
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
|
||||
|
||||
# PostProcessors
|
||||
postprocessors = []
|
||||
# Add the metadata pp first, the other pps will copy it
|
||||
if opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegMetadata'})
|
||||
if opts.extractaudio:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegExtractAudio',
|
||||
'preferredcodec': opts.audioformat,
|
||||
'preferredquality': opts.audioquality,
|
||||
'nopostoverwrites': opts.nopostoverwrites,
|
||||
})
|
||||
if opts.recodevideo:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegVideoConvertor',
|
||||
'preferedformat': opts.recodevideo,
|
||||
})
|
||||
if opts.embedsubtitles:
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
})
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
if opts.embedthumbnail:
|
||||
if not opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegAudioFix'})
|
||||
postprocessors.append({'key': 'AtomicParsley'})
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
postprocessors.append({
|
||||
'key': 'ExecAfterDownload',
|
||||
'verboseOutput': opts.verbose,
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
})
|
||||
|
||||
ydl_opts = {
|
||||
'usenetrc': opts.usenetrc,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'twofactor': opts.twofactor,
|
||||
'videopassword': opts.videopassword,
|
||||
'quiet': (opts.quiet or any_printing),
|
||||
'quiet': (opts.quiet or any_getting or any_printing),
|
||||
'no_warnings': opts.no_warnings,
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
@@ -228,9 +255,9 @@ def _real_main(argv=None):
|
||||
'forceduration': opts.getduration,
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'forcejson': opts.dumpjson,
|
||||
'forcejson': opts.dumpjson or opts.print_json,
|
||||
'dump_single_json': opts.dump_single_json,
|
||||
'simulate': opts.simulate or any_printing,
|
||||
'simulate': opts.simulate or any_getting,
|
||||
'skip_download': opts.skip_download,
|
||||
'format': opts.format,
|
||||
'format_limit': opts.format_limit,
|
||||
@@ -249,6 +276,7 @@ def _real_main(argv=None):
|
||||
'progress_with_newline': opts.progress_with_newline,
|
||||
'playliststart': opts.playliststart,
|
||||
'playlistend': opts.playlistend,
|
||||
'playlistreverse': opts.playlist_reverse,
|
||||
'noplaylist': opts.noplaylist,
|
||||
'logtostderr': opts.outtmpl == '-',
|
||||
'consoletitle': opts.consoletitle,
|
||||
@@ -296,32 +324,13 @@ def _real_main(argv=None):
|
||||
'encoding': opts.encoding,
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
'extract_flat': opts.extract_flat,
|
||||
'merge_output_format': opts.merge_output_format,
|
||||
'postprocessors': postprocessors,
|
||||
'fixup': opts.fixup,
|
||||
'source_address': opts.source_address,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
# PostProcessors
|
||||
# Add the metadata pp first, the other pps will copy it
|
||||
if opts.addmetadata:
|
||||
ydl.add_post_processor(FFmpegMetadataPP())
|
||||
if opts.extractaudio:
|
||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||
if opts.recodevideo:
|
||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||
if opts.embedsubtitles:
|
||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
||||
if opts.xattrs:
|
||||
ydl.add_post_processor(XAttrMetadataPP())
|
||||
if opts.embedthumbnail:
|
||||
if not opts.addmetadata:
|
||||
ydl.add_post_processor(FFmpegAudioFixPP())
|
||||
ydl.add_post_processor(AtomicParsleyPP())
|
||||
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
ydl.add_post_processor(ExecAfterDownloadPP(
|
||||
verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
|
||||
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose)
|
||||
@@ -359,3 +368,5 @@ def main(argv=None):
|
||||
sys.exit('ERROR: fixed output name but more than one file to download')
|
||||
except KeyboardInterrupt:
|
||||
sys.exit('\nERROR: Interrupted by user')
|
||||
|
||||
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
|
||||
|
@@ -4,6 +4,7 @@ import getpass
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
@@ -247,7 +248,7 @@ else:
|
||||
userhome = compat_getenv('HOME')
|
||||
elif 'USERPROFILE' in os.environ:
|
||||
userhome = compat_getenv('USERPROFILE')
|
||||
elif not 'HOMEPATH' in os.environ:
|
||||
elif 'HOMEPATH' not in os.environ:
|
||||
return path
|
||||
else:
|
||||
try:
|
||||
@@ -297,7 +298,9 @@ else:
|
||||
|
||||
# Old 2.6 and 2.7 releases require kwargs to be bytes
|
||||
try:
|
||||
(lambda x: x)(**{'x': 0})
|
||||
def _testfunc(x):
|
||||
pass
|
||||
_testfunc(**{'x': 0})
|
||||
except TypeError:
|
||||
def compat_kwargs(kwargs):
|
||||
return dict((bytes(k), v) for k, v in kwargs.items())
|
||||
@@ -305,6 +308,32 @@ else:
|
||||
compat_kwargs = lambda kwargs: kwargs
|
||||
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||
host, port = address
|
||||
err = None
|
||||
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
|
||||
af, socktype, proto, canonname, sa = res
|
||||
sock = None
|
||||
try:
|
||||
sock = socket.socket(af, socktype, proto)
|
||||
sock.settimeout(timeout)
|
||||
if source_address:
|
||||
sock.bind(source_address)
|
||||
sock.connect(sa)
|
||||
return sock
|
||||
except socket.error as _:
|
||||
err = _
|
||||
if sock is not None:
|
||||
sock.close()
|
||||
if err is not None:
|
||||
raise err
|
||||
else:
|
||||
raise error("getaddrinfo returns an empty list")
|
||||
else:
|
||||
compat_socket_create_connection = socket.create_connection
|
||||
|
||||
|
||||
# Fix https://github.com/rg3/youtube-dl/issues/4223
|
||||
# See http://bugs.python.org/issue9161 for what is broken
|
||||
def workaround_optparse_bug9161():
|
||||
@@ -341,6 +370,7 @@ __all__ = [
|
||||
'compat_parse_qs',
|
||||
'compat_print',
|
||||
'compat_str',
|
||||
'compat_socket_create_connection',
|
||||
'compat_subprocess_get_DEVNULL',
|
||||
'compat_urllib_error',
|
||||
'compat_urllib_parse',
|
||||
|
@@ -5,8 +5,8 @@ import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
timeconvert,
|
||||
@@ -80,6 +80,8 @@ class FileDownloader(object):
|
||||
def calc_eta(start, now, total, current):
|
||||
if total is None:
|
||||
return None
|
||||
if now is None:
|
||||
now = time.time()
|
||||
dif = now - start
|
||||
if current == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
@@ -146,18 +148,19 @@ class FileDownloader(object):
|
||||
def report_error(self, *args, **kargs):
|
||||
self.ydl.report_error(*args, **kargs)
|
||||
|
||||
def slow_down(self, start_time, byte_counter):
|
||||
def slow_down(self, start_time, now, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit', None)
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
now = time.time()
|
||||
if now is None:
|
||||
now = time.time()
|
||||
elapsed = now - start_time
|
||||
if elapsed <= 0.0:
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
||||
time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
|
||||
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
@@ -282,7 +285,7 @@ class FileDownloader(object):
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
# Check file already present
|
||||
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||
if filename != '-' and self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||
self.report_file_already_downloaded(filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
@@ -302,19 +305,6 @@ class FileDownloader(object):
|
||||
ph(status)
|
||||
|
||||
def add_progress_hook(self, ph):
|
||||
""" ph gets called on download progress, with a dictionary with the entries
|
||||
* filename: The final filename
|
||||
* status: One of "downloading" and "finished"
|
||||
|
||||
It can also have some of the following entries:
|
||||
|
||||
* downloaded_bytes: Bytes on disks
|
||||
* total_bytes: Total bytes, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* eta: The estimated time in seconds, None if unknown
|
||||
* speed: The download speed in bytes/second, None if unknown
|
||||
|
||||
Hooks are guaranteed to be called at least once (with status "finished")
|
||||
if the download is successful.
|
||||
"""
|
||||
# See YoutubeDl.py (search for progress_hooks) for a description of
|
||||
# this interface
|
||||
self._progress_hooks.append(ph)
|
||||
|
@@ -9,10 +9,12 @@ import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
struct_pack,
|
||||
struct_unpack,
|
||||
compat_urlparse,
|
||||
format_bytes,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
@@ -185,24 +187,34 @@ def build_fragments_list(boot_info):
|
||||
return res
|
||||
|
||||
|
||||
def write_flv_header(stream, metadata):
|
||||
"""Writes the FLV header and the metadata to stream"""
|
||||
def write_unsigned_int(stream, val):
|
||||
stream.write(struct_pack('!I', val))
|
||||
|
||||
|
||||
def write_unsigned_int_24(stream, val):
|
||||
stream.write(struct_pack('!I', val)[1:])
|
||||
|
||||
|
||||
def write_flv_header(stream):
|
||||
"""Writes the FLV header to stream"""
|
||||
# FLV header
|
||||
stream.write(b'FLV\x01')
|
||||
stream.write(b'\x05')
|
||||
stream.write(b'\x00\x00\x00\x09')
|
||||
# FLV File body
|
||||
stream.write(b'\x00\x00\x00\x00')
|
||||
# FLVTAG
|
||||
# Script data
|
||||
stream.write(b'\x12')
|
||||
# Size of the metadata with 3 bytes
|
||||
stream.write(struct_pack('!L', len(metadata))[1:])
|
||||
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
||||
stream.write(metadata)
|
||||
# Magic numbers extracted from the output files produced by AdobeHDS.php
|
||||
#(https://github.com/K-S-V/Scripts)
|
||||
stream.write(b'\x00\x00\x01\x73')
|
||||
|
||||
|
||||
def write_metadata_tag(stream, metadata):
|
||||
"""Writes optional metadata tag to stream"""
|
||||
SCRIPT_TAG = b'\x12'
|
||||
FLV_TAG_HEADER_LEN = 11
|
||||
|
||||
if metadata:
|
||||
stream.write(SCRIPT_TAG)
|
||||
write_unsigned_int_24(stream, len(metadata))
|
||||
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
|
||||
stream.write(metadata)
|
||||
write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
|
||||
|
||||
|
||||
def _add_ns(prop):
|
||||
@@ -231,6 +243,7 @@ class F4mFD(FileDownloader):
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
@@ -253,7 +266,11 @@ class F4mFD(FileDownloader):
|
||||
bootstrap = self.ydl.urlopen(bootstrap_url).read()
|
||||
else:
|
||||
bootstrap = base64.b64decode(bootstrap_node.text)
|
||||
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
metadata = base64.b64decode(metadata_node.text)
|
||||
else:
|
||||
metadata = None
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
@@ -266,7 +283,8 @@ class F4mFD(FileDownloader):
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
write_flv_header(dest_stream, metadata)
|
||||
write_flv_header(dest_stream)
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
|
@@ -4,11 +4,13 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_request,
|
||||
check_executable,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
@@ -24,12 +26,12 @@ class HlsFD(FileDownloader):
|
||||
'-bsf:a', 'aac_adtstoasc',
|
||||
encodeFilename(tmpfilename, for_subprocess=True)]
|
||||
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
if check_executable(program, ['-version']):
|
||||
break
|
||||
else:
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
program = ffpp._executable
|
||||
if program is None:
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
ffpp.check_version()
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
|
@@ -4,11 +4,12 @@ import os
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_error,
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
format_bytes,
|
||||
@@ -136,16 +137,21 @@ class HttpFD(FileDownloader):
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
|
||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||
now = None # needed for slow_down() in the first loop run
|
||||
before = start # start measuring
|
||||
while True:
|
||||
|
||||
# Download and write
|
||||
before = time.time()
|
||||
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
after = time.time()
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# Open file just in time
|
||||
# exit loop when download is finished
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
|
||||
# Open destination file just in time
|
||||
if stream is None:
|
||||
try:
|
||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
||||
@@ -161,11 +167,22 @@ class HttpFD(FileDownloader):
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
return False
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, now, byte_counter - resume_len)
|
||||
|
||||
# end measuring of one loop run
|
||||
now = time.time()
|
||||
after = now
|
||||
|
||||
# Adjust block size
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
before = after
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = percent = None
|
||||
else:
|
||||
@@ -186,9 +203,6 @@ class HttpFD(FileDownloader):
|
||||
if is_test and byte_counter == data_len:
|
||||
break
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
|
@@ -4,8 +4,8 @@ import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_subprocess_get_DEVNULL
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
@@ -20,11 +20,7 @@ class MplayerFD(FileDownloader):
|
||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(
|
||||
['mplayer', '-h'],
|
||||
stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
if not check_executable('mplayer', ['-h']):
|
||||
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||
return False
|
||||
|
||||
|
@@ -7,9 +7,9 @@ import sys
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
compat_str,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
get_exe_version,
|
||||
@@ -185,7 +185,7 @@ class RtmpFD(FileDownloader):
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == RD_FAILED:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
||||
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = RD_SUCCESS
|
||||
|
@@ -3,8 +3,11 @@ from __future__ import unicode_literals
|
||||
from .abc import ABCIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .adobetv import AdobeTVIE
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
@@ -22,7 +25,9 @@ from .arte import (
|
||||
ArteTVDDCIE,
|
||||
ArteTVEmbedIE,
|
||||
)
|
||||
from .audiomack import AudiomackIE
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .auengine import AUEngineIE
|
||||
from .azubu import AzubuIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
@@ -30,6 +35,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bet import BetIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
@@ -50,7 +56,7 @@ from .cbsnews import CBSNewsIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .cinchcast import CinchcastIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
@@ -61,9 +67,13 @@ from .cnet import CNETIE
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .criterion import CriterionIE
|
||||
@@ -85,12 +95,14 @@ from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import DRTVIE
|
||||
from .dvtv import DVTVIE
|
||||
from .dump import DumpIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .divxstage import DivxStageIE
|
||||
from .dropbox import DropboxIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .echomsk import EchoMskIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
@@ -103,6 +115,7 @@ from .elpais import ElPaisIE
|
||||
from .empflix import EMPFlixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .escapist import EscapistIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
@@ -147,6 +160,8 @@ from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globo import GloboIE
|
||||
from .godtube import GodTubeIE
|
||||
@@ -157,10 +172,13 @@ from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .grooveshark import GroovesharkIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hostingbulk import HostingBulkIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
@@ -190,6 +208,7 @@ from .jove import JoveIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
@@ -258,6 +277,8 @@ from .nbc import (
|
||||
)
|
||||
from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nfb import NFBIE
|
||||
@@ -284,6 +305,7 @@ from .nytimes import NYTimesIE
|
||||
from .nuvid import NuvidIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .openfilm import OpenFilmIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFOE1IE,
|
||||
@@ -307,24 +329,30 @@ from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .quickvid import QuickVidIE
|
||||
from .radiode import RadioDeIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import RaiIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .restudy import RestudyIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE
|
||||
from .rtlnl import RtlXlIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
RutubeEmbedIE,
|
||||
RutubeMovieIE,
|
||||
RutubePersonIE,
|
||||
)
|
||||
@@ -334,6 +362,8 @@ from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
@@ -393,6 +423,7 @@ from .ted import TEDIE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
@@ -420,7 +451,7 @@ from .tunein import TuneInIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twitch import TwitchIE
|
||||
@@ -451,6 +482,7 @@ from .videott import VideoTtIE
|
||||
from .videoweed import VideoWeedIE
|
||||
from .vidme import VidmeIE
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoAlbumIE,
|
||||
@@ -486,6 +518,7 @@ from .wdr import (
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .webofstories import WebOfStoriesIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
@@ -498,10 +531,12 @@ from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
YahooSearchIE,
|
||||
)
|
||||
from .yesjapan import YesJapanIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
@@ -519,12 +554,12 @@ from .youtube import (
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTopListIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
@@ -545,6 +580,17 @@ def gen_extractors():
|
||||
return [klass() for klass in _ALL_CLASSES]
|
||||
|
||||
|
||||
def list_extractors(age_limit):
|
||||
"""
|
||||
Return a list of extractors that are suitable for the given age,
|
||||
sorted by extractor ID.
|
||||
"""
|
||||
|
||||
return sorted(
|
||||
filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
|
||||
key=lambda ie: ie.IE_NAME.lower())
|
||||
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
return globals()[ie_name + 'IE']
|
||||
|
70
youtube_dl/extractor/adobetv.py
Normal file
70
youtube_dl/extractor/adobetv.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class AdobeTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
|
||||
'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
|
||||
'info_dict': {
|
||||
'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
|
||||
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'upload_date': '20110914',
|
||||
'duration': 60,
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
|
||||
video_id)
|
||||
|
||||
title = player.get('title') or self._search_regex(
|
||||
r'data-title="([^"]+)"', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('datepublished', webpage, 'upload date'))
|
||||
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration')
|
||||
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
||||
webpage, 'view count'))
|
||||
|
||||
formats = [{
|
||||
'url': source['src'],
|
||||
'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
|
||||
'tbr': source.get('bitrate'),
|
||||
} for source in player['sources']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -2,123 +2,150 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
xpath_text,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AdultSwimIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
|
||||
_TEST = {
|
||||
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '4da359ec73b58df4575cd01a610ba5dc',
|
||||
'md5': '247572debc75c7652f253c8daa51a14d',
|
||||
'info_dict': {
|
||||
'id': '8a250ba1450996e901453d7f02ca02f5',
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
|
||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
|
||||
'uploader': 'Rick and Morty',
|
||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
|
||||
}
|
||||
'title': 'Rick and Morty - Pilot Part 1',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'ffbdf55af9331c509d95350bd0cc1819',
|
||||
'md5': '77b0e037a4b20ec6b98671c4c379f48d',
|
||||
'info_dict': {
|
||||
'id': '8a250ba1450996e901453d7f4bd102f6',
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
|
||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
|
||||
'uploader': 'Rick and Morty',
|
||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
|
||||
}
|
||||
'title': 'Rick and Morty - Pilot Part 4',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
},
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'title': 'Rick and Morty - Pilot',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': 'b92409635540304280b4b6c36bd14a0a',
|
||||
'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
|
||||
'info_dict': {
|
||||
'id': '8a250ba1450996e901453d7fa73c02f7',
|
||||
'id': '-t8CamQlQ2aYZ49ItZCFog-0',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
|
||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
|
||||
'uploader': 'Rick and Morty',
|
||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': 'e8818891d60e47b29cd89d7b0278156d',
|
||||
'info_dict': {
|
||||
'id': '8a250ba1450996e901453d7fc8ba02f8',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
|
||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
|
||||
'uploader': 'Rick and Morty',
|
||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
|
||||
}
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
}]
|
||||
|
||||
_video_extensions = {
|
||||
'3500': 'flv',
|
||||
'640': 'mp4',
|
||||
'150': 'mp4',
|
||||
'ipad': 'm3u8',
|
||||
'iphone': 'm3u8'
|
||||
}
|
||||
_video_dimensions = {
|
||||
'3500': (1280, 720),
|
||||
'640': (480, 270),
|
||||
'150': (320, 180)
|
||||
}
|
||||
@staticmethod
|
||||
def find_video_info(collection, slug):
|
||||
for video in collection.get('videos'):
|
||||
if video.get('slug') == slug:
|
||||
return video
|
||||
|
||||
@staticmethod
|
||||
def find_collection_by_linkURL(collections, linkURL):
|
||||
for collection in collections:
|
||||
if collection.get('linkURL') == linkURL:
|
||||
return collection
|
||||
|
||||
@staticmethod
|
||||
def find_collection_containing_video(collections, slug):
|
||||
for collection in collections:
|
||||
for video in collection.get('videos'):
|
||||
if video.get('slug') == slug:
|
||||
return collection, video
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_path = mobj.group('path')
|
||||
show_path = mobj.group('show_path')
|
||||
episode_path = mobj.group('episode_path')
|
||||
is_playlist = True if mobj.group('is_playlist') else False
|
||||
|
||||
webpage = self._download_webpage(url, video_path)
|
||||
episode_id = self._html_search_regex(
|
||||
r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
|
||||
webpage, 'episode_id')
|
||||
title = self._og_search_title(webpage)
|
||||
webpage = self._download_webpage(url, episode_path)
|
||||
|
||||
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
|
||||
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
|
||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
|
||||
|
||||
episode_el = idoc.find('.//episode')
|
||||
show_title = episode_el.attrib.get('collectionTitle')
|
||||
episode_title = episode_el.attrib.get('title')
|
||||
thumbnail = episode_el.attrib.get('thumbnailUrl')
|
||||
description = episode_el.find('./description').text.strip()
|
||||
try:
|
||||
bootstrappedData = json.loads(bootstrappedDataJS)
|
||||
except ValueError as ve:
|
||||
errmsg = '%s: Failed to parse JSON ' % episode_path
|
||||
raise ExtractorError(errmsg, cause=ve)
|
||||
|
||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||
if is_playlist:
|
||||
collections = bootstrappedData['playlists']['collections']
|
||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||
video_info = self.find_video_info(collection, episode_path)
|
||||
|
||||
show_title = video_info['showTitle']
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
collections = bootstrappedData['show']['collections']
|
||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||
|
||||
show = bootstrappedData['show']
|
||||
show_title = show['title']
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
|
||||
episode_id = video_info['id']
|
||||
episode_title = video_info['title']
|
||||
episode_description = video_info['description']
|
||||
episode_duration = video_info.get('duration')
|
||||
|
||||
entries = []
|
||||
segment_els = episode_el.findall('./segments/segment')
|
||||
for part_num, segment_id in enumerate(segment_ids):
|
||||
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
|
||||
|
||||
for part_num, segment_el in enumerate(segment_els):
|
||||
segment_id = segment_el.attrib.get('id')
|
||||
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
|
||||
thumbnail = segment_el.attrib.get('thumbnailUrl')
|
||||
duration = segment_el.attrib.get('duration')
|
||||
segment_title = '%s - %s' % (show_title, episode_title)
|
||||
if len(segment_ids) > 1:
|
||||
segment_title += ' Part %d' % (part_num + 1)
|
||||
|
||||
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
|
||||
idoc = self._download_xml(
|
||||
segment_url, segment_title,
|
||||
'Downloading segment information', 'Unable to download segment information')
|
||||
|
||||
segment_duration = float_or_none(
|
||||
xpath_text(idoc, './/trt', 'segment duration').strip())
|
||||
|
||||
formats = []
|
||||
file_els = idoc.findall('.//files/file')
|
||||
|
||||
for file_el in file_els:
|
||||
bitrate = file_el.attrib.get('bitrate')
|
||||
type = file_el.attrib.get('type')
|
||||
width, height = self._video_dimensions.get(bitrate, (None, None))
|
||||
ftype = file_el.attrib.get('type')
|
||||
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (bitrate, type),
|
||||
'url': file_el.text,
|
||||
'ext': self._video_extensions.get(bitrate, 'mp4'),
|
||||
'format_id': '%s_%s' % (bitrate, ftype),
|
||||
'url': file_el.text.strip(),
|
||||
# The bitrate may not be a number (for example: 'iphone')
|
||||
'tbr': int(bitrate) if bitrate.isdigit() else None,
|
||||
'height': height,
|
||||
'width': width
|
||||
'quality': 1 if ftype == 'hd' else -1
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@@ -127,18 +154,16 @@ class AdultSwimIE(InfoExtractor):
|
||||
'id': segment_id,
|
||||
'title': segment_title,
|
||||
'formats': formats,
|
||||
'uploader': show_title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'description': description
|
||||
'duration': segment_duration,
|
||||
'description': episode_description
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': episode_id,
|
||||
'display_id': video_path,
|
||||
'display_id': episode_path,
|
||||
'entries': entries,
|
||||
'title': '%s %s' % (show_title, episode_title),
|
||||
'description': description,
|
||||
'thumbnail': thumbnail
|
||||
'title': '%s - %s' % (show_title, episode_title),
|
||||
'description': episode_description,
|
||||
'duration': episode_duration
|
||||
}
|
||||
|
35
youtube_dl/extractor/aljazeera.py
Normal file
35
youtube_dl/extractor/aljazeera.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
'info_dict': {
|
||||
'id': '3792260579001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Slum - Episode 1: Deliverance',
|
||||
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
|
||||
'uploader': 'Al Jazeera English',
|
||||
},
|
||||
'add_ie': ['Brightcove'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, program_name)
|
||||
brightcove_id = self._search_regex(
|
||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': (
|
||||
'brightcove:'
|
||||
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
|
||||
'&%40videoPlayer={0}'.format(brightcove_id)
|
||||
),
|
||||
'ie_key': 'Brightcove',
|
||||
}
|
@@ -5,15 +5,14 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
qualities,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class AllocineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
|
||||
@@ -45,6 +44,9 @@ class AllocineIE(InfoExtractor):
|
||||
'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/video-19550147/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -75,9 +77,7 @@ class AllocineIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': v,
|
||||
'ext': determine_ext(v),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
77
youtube_dl/extractor/alphaporno.py
Normal file
77
youtube_dl/extractor/alphaporno.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AlphaPornoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
|
||||
'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
|
||||
'info_dict': {
|
||||
'id': '258807',
|
||||
'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sensual striptease porn with Samantha Alexandra',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1418694611,
|
||||
'upload_date': '20141216',
|
||||
'duration': 387,
|
||||
'filesize_approx': 54120000,
|
||||
'tbr': 1145,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
|
||||
ext = self._html_search_meta(
|
||||
'encodingFormat', webpage, 'ext', default='.mp4')[1:]
|
||||
|
||||
title = self._search_regex(
|
||||
[r'<meta content="([^"]+)" itemprop="description">',
|
||||
r'class="title" itemprop="name">([^<]+)<'],
|
||||
webpage, 'title')
|
||||
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date'))
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration'))
|
||||
filesize_approx = parse_filesize(self._html_search_meta(
|
||||
'contentSize', webpage, 'file size'))
|
||||
bitrate = int_or_none(self._html_search_meta(
|
||||
'bitrate', webpage, 'bitrate'))
|
||||
categories = self._html_search_meta(
|
||||
'keywords', webpage, 'categories', default='').split(',')
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'filesize_approx': filesize_approx,
|
||||
'tbr': bitrate,
|
||||
'categories': categories,
|
||||
'age_limit': age_limit,
|
||||
}
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .fivemin import FiveMinIE
|
||||
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
@@ -42,31 +41,30 @@ class AolIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
playlist_id = mobj.group('playlist_id')
|
||||
if playlist_id and not self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
if not playlist_id or self._downloader.params.get('noplaylist'):
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
|
||||
playlist_html = self._search_regex(
|
||||
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
|
||||
'playlist HTML')
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': 'aol-video:%s' % m.group('id'),
|
||||
'ie_key': 'Aol',
|
||||
} for m in re.finditer(
|
||||
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
|
||||
playlist_html)]
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': mobj.group('playlist_display_id'),
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
|
||||
playlist_html = self._search_regex(
|
||||
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
|
||||
'playlist HTML')
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': 'aol-video:%s' % m.group('id'),
|
||||
'ie_key': 'Aol',
|
||||
} for m in re.finditer(
|
||||
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
|
||||
playlist_html)]
|
||||
|
||||
return FiveMinIE._build_result(video_id)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': mobj.group('playlist_display_id'),
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -4,8 +4,8 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
@@ -1,42 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_TEST = {
|
||||
"url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
||||
'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||
'info_dict': {
|
||||
"title": "1968 Demo - FJCC Conference Presentation Reel #1",
|
||||
"description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
|
||||
"upload_date": "19681210",
|
||||
"uploader": "SRI International"
|
||||
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'ext': 'ogv',
|
||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||
'description': 'md5:1780b464abaca9991d8968c877bb53ed',
|
||||
'upload_date': '19681210',
|
||||
'uploader': 'SRI International'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
'md5': '18f2a19e6d89af8425671da1cf3d4e04',
|
||||
'info_dict': {
|
||||
'id': 'Cops1922',
|
||||
'ext': 'ogv',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:70f72ee70882f713d4578725461ffcc3',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
json_url = url + ('?' if '?' in url else '&') + 'output=json'
|
||||
json_data = self._download_webpage(json_url, video_id)
|
||||
data = json.loads(json_data)
|
||||
data = self._download_json(json_url, video_id)
|
||||
|
||||
title = data['metadata']['title'][0]
|
||||
description = data['metadata']['description'][0]
|
||||
uploader = data['metadata']['creator'][0]
|
||||
upload_date = unified_strdate(data['metadata']['date'][0])
|
||||
def get_optional(data_dict, field):
|
||||
return data_dict['metadata'].get(field, [None])[0]
|
||||
|
||||
title = get_optional(data, 'title')
|
||||
description = get_optional(data, 'description')
|
||||
uploader = get_optional(data, 'creator')
|
||||
upload_date = unified_strdate(get_optional(data, 'date'))
|
||||
|
||||
formats = [
|
||||
{
|
||||
|
@@ -37,7 +37,7 @@ class ArteTvIE(InfoExtractor):
|
||||
config_xml_url, video_id, note='Downloading configuration')
|
||||
|
||||
formats = [{
|
||||
'forma_id': q.attrib['quality'],
|
||||
'format_id': q.attrib['quality'],
|
||||
# The playpath starts at 'mp4:', if we don't manually
|
||||
# split the url, rtmpdump will incorrectly parse them
|
||||
'url': q.text.split('mp4:', 1)[0],
|
||||
@@ -133,7 +133,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'tbr': int_or_none(f.get('bitrate')),
|
||||
'quality': qfunc(f['quality']),
|
||||
'quality': qfunc(f.get('quality')),
|
||||
'source_preference': source_pref,
|
||||
}
|
||||
|
||||
|
114
youtube_dl/extractor/atresplayer.py
Normal file
114
youtube_dl/extractor/atresplayer.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import hmac
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||
'md5': 'efd56753cda1bb64df52a3074f62e38a',
|
||||
'info_dict': {
|
||||
'id': 'capitulo-10-especial-solidario-nochebuena',
|
||||
'ext': 'mp4',
|
||||
'title': 'Especial Solidario de Nochebuena',
|
||||
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
|
||||
'duration': 5527.6,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
|
||||
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
|
||||
_TIMESTAMP_SHIFT = 30000
|
||||
|
||||
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
|
||||
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
|
||||
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
|
||||
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
episode_id = self._search_regex(
|
||||
r'episode="([^"]+)"', webpage, 'episode id')
|
||||
|
||||
timestamp = int_or_none(self._download_webpage(
|
||||
self._TIME_API_URL,
|
||||
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
|
||||
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
|
||||
token = hmac.new(
|
||||
self._MAGIC.encode('ascii'),
|
||||
(episode_id + timestamp_shifted).encode('utf-8')
|
||||
).hexdigest()
|
||||
|
||||
formats = []
|
||||
for fmt in ['windows', 'android_tablet']:
|
||||
request = compat_urllib_request.Request(
|
||||
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
||||
request.add_header('Youtubedl-user-agent', self._USER_AGENT)
|
||||
|
||||
fmt_json = self._download_json(
|
||||
request, video_id, 'Downloading %s video JSON' % fmt)
|
||||
|
||||
result = fmt_json.get('resultDes')
|
||||
if result.lower() != 'ok':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
||||
|
||||
for _, video_url in fmt_json['resultObject'].items():
|
||||
if video_url.endswith('/Manifest'):
|
||||
formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'android',
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
player = self._download_json(
|
||||
self._PLAYER_URL_TEMPLATE % episode_id,
|
||||
episode_id)
|
||||
|
||||
path_data = player.get('pathData')
|
||||
|
||||
episode = self._download_xml(
|
||||
self._EPISODE_URL_TEMPLATE % path_data,
|
||||
video_id, 'Downloading episode XML')
|
||||
|
||||
duration = float_or_none(xpath_text(
|
||||
episode, './media/asset/info/technical/contentDuration', 'duration'))
|
||||
|
||||
art = episode.find('./media/asset/info/art')
|
||||
title = xpath_text(art, './name', 'title')
|
||||
description = xpath_text(art, './description', 'description')
|
||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
55
youtube_dl/extractor/atttechchannel.py
Normal file
55
youtube_dl/extractor/atttechchannel.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class ATTTechChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
|
||||
'info_dict': {
|
||||
'id': '11316',
|
||||
'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
|
||||
'ext': 'flv',
|
||||
'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
|
||||
'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"url\s*:\s*'(rtmp://[^']+)'",
|
||||
webpage, 'video URL')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'mediaid\s*=\s*(\d+)',
|
||||
webpage, 'video id', fatal=False)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
|
||||
webpage, 'upload date', fatal=False), False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
@@ -1,11 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .soundcloud import SoundcloudIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
import time
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class AudiomackIE(InfoExtractor):
|
||||
@@ -17,12 +21,13 @@ class AudiomackIE(InfoExtractor):
|
||||
'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
|
||||
'info_dict':
|
||||
{
|
||||
'id': 'roosh-williams/extraordinary',
|
||||
'id': '310086',
|
||||
'ext': 'mp3',
|
||||
'title': 'Roosh Williams - Extraordinary'
|
||||
'uploader': 'Roosh Williams',
|
||||
'title': 'Extraordinary'
|
||||
}
|
||||
},
|
||||
# hosted on soundcloud via audiomack
|
||||
# audiomack wrapper around soundcloud song
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
|
||||
@@ -38,32 +43,97 @@ class AudiomackIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
|
||||
# Request the extended version of the api for extra fields like artist and title
|
||||
api_response = self._download_json(
|
||||
"http://www.audiomack.com/api/music/url/song/%s?_=%d" % (
|
||||
video_id, time.time()),
|
||||
video_id)
|
||||
'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % (
|
||||
album_url_tag, time.time()),
|
||||
album_url_tag)
|
||||
|
||||
if "url" not in api_response:
|
||||
raise ExtractorError("Unable to deduce api url of song")
|
||||
realurl = api_response["url"]
|
||||
# API is inconsistent with errors
|
||||
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url %s', url)
|
||||
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# - if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(realurl):
|
||||
return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
artist = self._html_search_regex(
|
||||
r'<span class="artist">(.*?)</span>', webpage, "artist")
|
||||
songtitle = self._html_search_regex(
|
||||
r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>',
|
||||
webpage, "title")
|
||||
title = artist + " - " + songtitle
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
if SoundcloudIE.suitable(api_response['url']):
|
||||
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': realurl,
|
||||
'id': api_response.get('id', album_url_tag),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title'),
|
||||
'url': api_response['url'],
|
||||
}
|
||||
|
||||
|
||||
class AudiomackAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack:album'
|
||||
_TESTS = [
|
||||
# Standard album playlist
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
||||
'playlist_count': 15,
|
||||
'info_dict':
|
||||
{
|
||||
'id': '812251',
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)'
|
||||
}
|
||||
},
|
||||
# Album playlist ripped from fakeshoredrive with no metadata
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
|
||||
'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
|
||||
'ext': 'mp3',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
'playliststart': 8,
|
||||
'playlistend': 8,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
result = {'_type': 'playlist', 'entries': []}
|
||||
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
||||
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
||||
for track_no in itertools.count():
|
||||
# Get song's metadata
|
||||
api_response = self._download_json(
|
||||
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
|
||||
% (album_url_tag, track_no, time.time()), album_url_tag,
|
||||
note='Querying song information (%d)' % (track_no + 1))
|
||||
|
||||
# Total failure, only occurs when url is totally wrong
|
||||
# Won't happen in middle of valid playlist (next case)
|
||||
if 'url' not in api_response or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
|
||||
# URL is good but song id doesn't exist - usually means end of playlist
|
||||
elif not api_response['url']:
|
||||
break
|
||||
else:
|
||||
# Pull out the album metadata and add to result (if it exists)
|
||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||
if apikey in api_response and resultkey not in result:
|
||||
result[resultkey] = api_response[apikey]
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': api_response.get('id', song_id),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title', song_id),
|
||||
'url': api_response['url'],
|
||||
})
|
||||
return result
|
||||
|
@@ -3,10 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
|
||||
title = title.strip()
|
||||
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
||||
links = map(compat_urllib_parse.unquote, links)
|
||||
title = self._html_search_regex(
|
||||
r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
|
||||
video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
|
||||
video_url = compat_urllib_parse.unquote(video_urls[0])
|
||||
thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
|
||||
thumbnail = compat_urllib_parse.unquote(thumbnails[0])
|
||||
|
||||
thumbnail = None
|
||||
video_url = None
|
||||
for link in links:
|
||||
if link.endswith('.png'):
|
||||
thumbnail = link
|
||||
elif '/videos/' in link:
|
||||
video_url = link
|
||||
if not video_url:
|
||||
raise ExtractorError('Could not find video URL')
|
||||
|
||||
ext = '.' + determine_ext(video_url)
|
||||
if ext == title[-len(ext):]:
|
||||
title = title[:-len(ext)]
|
||||
title = remove_end(title, ext)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -5,7 +5,7 @@ import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
@@ -4,9 +4,11 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@@ -104,7 +106,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -139,6 +141,12 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'title': 'Hierophany of the Open Grave',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'title': 'Loom',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Kaleidoscope: Leonard Cohen',
|
||||
'description': 'md5:db4755d7a665ae72343779f7dacb402c',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
'duration': 1740,
|
||||
},
|
||||
'params': {
|
||||
@@ -71,7 +71,54 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
'id': 'b04v209v',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, The Essential New Tune Special',
|
||||
'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
|
||||
'duration': 10800,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p02frcch',
|
||||
'ext': 'flv',
|
||||
'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
|
||||
'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
|
||||
'duration': 3507,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
@@ -203,13 +250,66 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
|
||||
version = playlist.get('defaultAvailableVersion')
|
||||
if version:
|
||||
smp_config = version['smpConfig']
|
||||
title = smp_config['title']
|
||||
description = smp_config['summary']
|
||||
for item in smp_config['items']:
|
||||
kind = item['kind']
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
programme_id = item.get('vpid')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
playlist = self._download_xml(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
|
||||
playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
|
||||
if programme_id:
|
||||
player = self._download_json(
|
||||
'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
|
||||
@@ -219,32 +319,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
duration = player['duration']
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
else:
|
||||
playlist = self._download_xml(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
|
||||
group_id, 'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % group_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
|
@@ -10,15 +10,15 @@ from ..utils import url_basename
|
||||
class BehindKinkIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
|
||||
'md5': '41ad01222b8442089a55528fec43ec01',
|
||||
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
|
||||
'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
|
||||
'info_dict': {
|
||||
'id': '36370',
|
||||
'id': '37127',
|
||||
'ext': 'mp4',
|
||||
'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
|
||||
'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
|
||||
'upload_date': '20140814',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
|
||||
'title': 'What are you passionate about – Marley Blaze',
|
||||
'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
|
||||
'upload_date': '20141205',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
@@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
year = mobj.group('year')
|
||||
month = mobj.group('month')
|
||||
day = mobj.group('day')
|
||||
upload_date = year + month + day
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"'file':\s*'([^']+)'",
|
||||
webpage, 'URL base')
|
||||
|
||||
video_id = url_basename(video_url)
|
||||
video_id = video_id.split('_')[0]
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
video_id = url_basename(video_url).split('_')[0]
|
||||
upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'upload_date': upload_date,
|
||||
|
107
youtube_dl/extractor/bet.py
Normal file
107
youtube_dl/extractor/bet.py
Normal file
@@ -0,0 +1,107 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class BetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||
'info_dict': {
|
||||
'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
|
||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||
'ext': 'flv',
|
||||
'title': 'BET News Presents: A Conversation With President Obama',
|
||||
'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
|
||||
'duration': 1534,
|
||||
'timestamp': 1418075340,
|
||||
'upload_date': '20141208',
|
||||
'uploader': 'admin',
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||
'info_dict': {
|
||||
'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
|
||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||
'ext': 'flv',
|
||||
'title': 'Justice for Ferguson: A Community Reacts',
|
||||
'description': 'A BET News special.',
|
||||
'duration': 1696,
|
||||
'timestamp': 1416942360,
|
||||
'upload_date': '20141125',
|
||||
'uploader': 'admin',
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
||||
webpage, 'media URL'))
|
||||
|
||||
mrss = self._download_xml(media_url, display_id)
|
||||
|
||||
item = mrss.find('./channel/item')
|
||||
|
||||
NS_MAP = {
|
||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'ka': 'http://kickapps.com/karss',
|
||||
}
|
||||
|
||||
title = xpath_text(item, './title', 'title')
|
||||
description = xpath_text(
|
||||
item, './description', 'description', fatal=False)
|
||||
|
||||
video_id = xpath_text(item, './guid', 'video id', fatal=False)
|
||||
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
item, xpath_with_ns('./dc:date', NS_MAP),
|
||||
'upload date', fatal=False))
|
||||
uploader = xpath_text(
|
||||
item, xpath_with_ns('./dc:creator', NS_MAP),
|
||||
'uploader', fatal=False)
|
||||
|
||||
media_content = item.find(
|
||||
xpath_with_ns('./media:content', NS_MAP))
|
||||
duration = int_or_none(media_content.get('duration'))
|
||||
smil_url = media_content.get('url')
|
||||
|
||||
thumbnail = media_content.find(
|
||||
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
|
||||
|
||||
formats = self._extract_smil_formats(smil_url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -5,8 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -29,10 +27,9 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_code = self._search_regex(
|
||||
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
||||
|
||||
@@ -55,45 +52,38 @@ class BiliBiliIE(InfoExtractor):
|
||||
thumbnail = self._html_search_meta(
|
||||
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
|
||||
|
||||
player_params = compat_parse_qs(self._html_search_regex(
|
||||
r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
|
||||
webpage, 'player params'))
|
||||
cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
|
||||
|
||||
if 'cid' in player_params:
|
||||
cid = player_params['cid'][0]
|
||||
lq_doc = self._download_xml(
|
||||
'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading LQ video info'
|
||||
)
|
||||
lq_durl = lq_doc.find('./durl')
|
||||
formats = [{
|
||||
'format_id': 'lq',
|
||||
'quality': 1,
|
||||
'url': lq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
|
||||
lq_doc = self._download_xml(
|
||||
'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading LQ video info'
|
||||
)
|
||||
lq_durl = lq_doc.find('.//durl')
|
||||
formats = [{
|
||||
'format_id': 'lq',
|
||||
'quality': 1,
|
||||
'url': lq_durl.find('./url').text,
|
||||
hq_doc = self._download_xml(
|
||||
'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading HQ video info',
|
||||
fatal=False,
|
||||
)
|
||||
if hq_doc is not False:
|
||||
hq_durl = hq_doc.find('./durl')
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
'ext': 'flv',
|
||||
'url': hq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
|
||||
hq_doc = self._download_xml(
|
||||
'http://interface.bilibili.cn/playurl?cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading HQ video info',
|
||||
fatal=False,
|
||||
)
|
||||
if hq_doc is not False:
|
||||
hq_durl = hq_doc.find('.//durl')
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
'ext': 'flv',
|
||||
'url': hq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
hq_durl.find('./size'), get_attr='text'),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
|
||||
hq_durl.find('./size'), get_attr='text'),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
|
@@ -4,13 +4,17 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
compat_urlparse,
|
||||
clean_html,
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -78,7 +82,25 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
'uploader': 'NostalgiaCritic',
|
||||
'uploader_id': '246467',
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
# https://github.com/rg3/youtube-dl/pull/4404
|
||||
'note': 'Audio only',
|
||||
'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
|
||||
'md5': '76c0a56f24e769ceaab21fbb6416a351',
|
||||
'info_dict': {
|
||||
'id': '7103299',
|
||||
'ext': 'flv',
|
||||
'title': 'Weekly Manga Recap: Kingdom',
|
||||
'description': 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?',
|
||||
'timestamp': 1417660321,
|
||||
'upload_date': '20141204',
|
||||
'uploader': 'The Rollo T',
|
||||
'uploader_id': '407429',
|
||||
'duration': 7251,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -145,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
'url': real_url,
|
||||
'format_id': role,
|
||||
'format_note': media_type,
|
||||
'vcodec': media_content.get(blip('vcodec')),
|
||||
'vcodec': media_content.get(blip('vcodec')) or 'none',
|
||||
'acodec': media_content.get(blip('acodec')),
|
||||
'filesize': media_content.get('filesize'),
|
||||
'width': int(media_content.get('width')),
|
||||
'height': int(media_content.get('height')),
|
||||
'width': int_or_none(media_content.get('width')),
|
||||
'height': int_or_none(media_content.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -6,25 +6,26 @@ import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
unsmuggle_url,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class BrightcoveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*?\?(?P<query>.*)'
|
||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||
|
||||
_TESTS = [
|
||||
|
@@ -33,7 +33,7 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'skip_download': True, # Got enough YouTube download tests
|
||||
},
|
||||
'info_dict': {
|
||||
'description': 'Munchkin the Teddy Bear is back !',
|
||||
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||
},
|
||||
'playlist': [{
|
||||
@@ -42,9 +42,9 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
|
||||
'description': 're:© 2014 Munchkin the Shih Tzu',
|
||||
'uploader': 'Munchkin the Shih Tzu',
|
||||
'title': 'Munchkin the Teddy Bear gets her exercise',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
},
|
||||
}]
|
||||
}]
|
||||
|
@@ -5,6 +5,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
qualities,
|
||||
@@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
|
||||
|
||||
fmt_url = next(iter(media.find('VIDEOS'))).text
|
||||
if '/geo' in fmt_url.lower():
|
||||
response = self._request_webpage(
|
||||
HEADRequest(fmt_url), video_id,
|
||||
'Checking if the video is georestricted')
|
||||
if '/blocage' in response.geturl():
|
||||
raise ExtractorError(
|
||||
'The video is not available in your country',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
for fmt in media.find('VIDEOS'):
|
||||
format_url = fmt.text
|
||||
|
@@ -3,55 +3,50 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
'id': '213512120230004',
|
||||
'ext': 'flv',
|
||||
'title': 'První republika: Španělská chřipka',
|
||||
'duration': 3107.4,
|
||||
'id': '214411058091220',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace',
|
||||
'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 3350,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Works only from Czech Republic.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
|
||||
'info_dict': {
|
||||
'id': '20138143440',
|
||||
'ext': 'flv',
|
||||
'title': 'Tsatsiki, maminka a policajt',
|
||||
'duration': 6754.1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
'skip': 'Works only from Czech Republic.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||
'info_dict': {
|
||||
'id': '14716',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||
'duration': 90,
|
||||
'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 88.4,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -78,8 +73,9 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
|
||||
data=compat_urllib_parse.urlencode(data))
|
||||
req = compat_urllib_request.Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=compat_urllib_parse.urlencode(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
@@ -88,39 +84,72 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
playlistpage = self._download_json(req, video_id)
|
||||
|
||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
|
||||
playlist_url = playlistpage['url']
|
||||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_xml(req, video_id)
|
||||
playlist = self._download_json(req, video_id)
|
||||
|
||||
item = playlist['playlist'][0]
|
||||
formats = []
|
||||
for i in playlist.find('smilRoot/body'):
|
||||
if 'AD' not in i.attrib['id']:
|
||||
base_url = i.attrib['base']
|
||||
parsedurl = compat_urllib_parse_urlparse(base_url)
|
||||
duration = i.attrib['duration']
|
||||
|
||||
for video in i.findall('video'):
|
||||
if video.attrib['label'] != 'AD':
|
||||
format_id = video.attrib['label']
|
||||
play_path = video.attrib['src']
|
||||
vbr = int(video.attrib['system-bitrate'])
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'vbr': vbr,
|
||||
'play_path': play_path,
|
||||
'app': parsedurl.path[1:] + '?' + parsedurl.query,
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
for format_id, stream_url in item['streamUrls'].items():
|
||||
formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
duration = float_or_none(item.get('duration'))
|
||||
thumbnail = item.get('previewImageUrl')
|
||||
|
||||
subtitles = {}
|
||||
subs = item.get('subtitles')
|
||||
if subs:
|
||||
subtitles['cs'] = subs[0]['url']
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
|
||||
'duration': float(duration),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_subtitles(subtitles):
|
||||
""" Convert millisecond-based subtitles to SRT """
|
||||
if subtitles is None:
|
||||
return subtitles # subtitles not requested
|
||||
|
||||
def _msectotimecode(msec):
|
||||
""" Helper utility to convert milliseconds to timecode """
|
||||
components = []
|
||||
for divider in [1000, 60, 60, 100]:
|
||||
components.append(msec % divider)
|
||||
msec //= divider
|
||||
return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
|
||||
|
||||
def _fix_subtitle(subtitle):
|
||||
for line in subtitle.splitlines():
|
||||
m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
|
||||
if m:
|
||||
yield m.group(1)
|
||||
start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
|
||||
yield "{0} --> {1}".format(start, stop)
|
||||
else:
|
||||
yield line
|
||||
|
||||
fixed_subtitles = {}
|
||||
for k, v in subtitles.items():
|
||||
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
|
||||
return fixed_subtitles
|
||||
|
@@ -236,16 +236,17 @@ class Channel9IE(InfoExtractor):
|
||||
if contents is None:
|
||||
return contents
|
||||
|
||||
session_meta = {'session_code': self._extract_session_code(html),
|
||||
'session_day': self._extract_session_day(html),
|
||||
'session_room': self._extract_session_room(html),
|
||||
'session_speakers': self._extract_session_speakers(html),
|
||||
}
|
||||
session_meta = {
|
||||
'session_code': self._extract_session_code(html),
|
||||
'session_day': self._extract_session_day(html),
|
||||
'session_room': self._extract_session_room(html),
|
||||
'session_speakers': self._extract_session_speakers(html),
|
||||
}
|
||||
|
||||
for content in contents:
|
||||
content.update(session_meta)
|
||||
|
||||
return contents
|
||||
return self.playlist_result(contents)
|
||||
|
||||
def _extract_list(self, content_path):
|
||||
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
|
||||
|
52
youtube_dl/extractor/cinchcast.py
Normal file
52
youtube_dl/extractor/cinchcast.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class CinchcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
# Actual test is run in generic, look for undergroundwellness
|
||||
'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
doc = self._download_xml(
|
||||
'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
|
||||
video_id)
|
||||
|
||||
item = doc.find('.//item')
|
||||
title = xpath_text(item, './title', fatal=True)
|
||||
date_str = xpath_text(
|
||||
item, './{http://developer.longtailvideo.com/trac/}date')
|
||||
upload_date = unified_strdate(date_str, day_first=False)
|
||||
# duration is present but wrong
|
||||
formats = []
|
||||
formats.append({
|
||||
'format_id': 'main',
|
||||
'url': item.find(
|
||||
'./{http://search.yahoo.com/mrss/}content').attrib['url'],
|
||||
})
|
||||
backup_url = xpath_text(
|
||||
item, './{http://developer.longtailvideo.com/trac/}backupContent')
|
||||
if backup_url:
|
||||
formats.append({
|
||||
'preference': 2, # seems to be more reliable
|
||||
'format_id': 'backup',
|
||||
'url': backup_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
@@ -2,12 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -15,23 +13,24 @@ class CNETIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||
'md5': '041233212a0d06b179c87cbcca1577b8',
|
||||
'info_dict': {
|
||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
|
||||
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
|
||||
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
||||
'uploader': 'Sarah Mitroff',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires rtmpdump',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data_json = self._html_search_regex(
|
||||
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
@@ -42,37 +41,31 @@ class CNETIE(InfoExtractor):
|
||||
if not vdata:
|
||||
raise ExtractorError('Cannot find video data')
|
||||
|
||||
mpx_account = data['config']['players']['default']['mpx_account']
|
||||
vid = vdata['files']['rtmp']
|
||||
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
|
||||
|
||||
video_id = vdata['id']
|
||||
title = vdata.get('headline')
|
||||
if title is None:
|
||||
title = vdata.get('title')
|
||||
if title is None:
|
||||
raise ExtractorError('Cannot find title!')
|
||||
description = vdata.get('dek')
|
||||
thumbnail = vdata.get('image', {}).get('path')
|
||||
author = vdata.get('author')
|
||||
if author:
|
||||
uploader = '%s %s' % (author['firstName'], author['lastName'])
|
||||
uploader_id = author.get('email')
|
||||
uploader_id = author.get('id')
|
||||
else:
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s-%s-%s' % (
|
||||
f['type'], f['format'],
|
||||
int_or_none(f.get('bitrate'), 1000, default='')),
|
||||
'url': f['uri'],
|
||||
'tbr': int_or_none(f.get('bitrate'), 1000),
|
||||
} for f in vdata['files']['data']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': tp_link,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -11,14 +11,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''
|
||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'md5': '3e6121ea48df7e2259fe73a0628605c4',
|
||||
'info_dict': {
|
||||
'id': 'sports_2013_06_09_nadal-1-on-1.cnn',
|
||||
'id': 'sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nadal wins 8th French Open title',
|
||||
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||
@@ -35,6 +35,16 @@ class CNNIE(InfoExtractor):
|
||||
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||
"upload_date": "20130821",
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
'info_dict': {
|
||||
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -127,3 +137,28 @@ class CNNBlogsIE(InfoExtractor):
|
||||
'url': cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class CNNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
|
||||
'md5': '275b326f85d80dff7592a9820f5dc887',
|
||||
'info_dict': {
|
||||
'id': 'bestoftv/2014/12/21/sotu-crowley-president-obama-north-korea-not-going-to-be-intimidated.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: We\'re not going to be intimidated',
|
||||
'description': 'md5:e735586f3dc936075fa654a4d91b21f9',
|
||||
'upload_date': '20141220',
|
||||
},
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url_basename(url))
|
||||
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'http://cnn.com/video/?/video/' + cnn_url,
|
||||
'ie_key': CNNIE.ie_key(),
|
||||
}
|
||||
|
92
youtube_dl/extractor/collegerama.py
Normal file
92
youtube_dl/extractor/collegerama.py
Normal file
@@ -0,0 +1,92 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CollegeRamaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P<id>[\da-f]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'md5': '481fda1c11f67588c0d9d8fbdced4e39',
|
||||
'info_dict': {
|
||||
'id': '585a43626e544bdd97aeb71a0ec907a01d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
|
||||
'description': '',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 7713.088,
|
||||
'timestamp': 1413309600,
|
||||
'upload_date': '20141014',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
|
||||
'md5': 'ef1fdded95bdf19b12c5999949419c92',
|
||||
'info_dict': {
|
||||
'id': '86a9ea9f53e149079fbdb4202b521ed21d',
|
||||
'ext': 'wmv',
|
||||
'title': '64ste Vakantiecursus: Afvalwater',
|
||||
'description': 'md5:7fd774865cc69d972f542b157c328305',
|
||||
'duration': 10853,
|
||||
'timestamp': 1326446400,
|
||||
'upload_date': '20120113',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_options_request = {
|
||||
"getPlayerOptionsRequest": {
|
||||
"ResourceId": video_id,
|
||||
"QueryString": "",
|
||||
}
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
|
||||
json.dumps(player_options_request))
|
||||
request.add_header('Content-Type', 'application/json')
|
||||
|
||||
player_options = self._download_json(request, video_id)
|
||||
|
||||
presentation = player_options['d']['Presentation']
|
||||
title = presentation['Title']
|
||||
description = presentation.get('Description')
|
||||
thumbnail = None
|
||||
duration = float_or_none(presentation.get('Duration'), 1000)
|
||||
timestamp = int_or_none(presentation.get('UnixTime'), 1000)
|
||||
|
||||
formats = []
|
||||
for stream in presentation['Streams']:
|
||||
for video in stream['VideoUrls']:
|
||||
thumbnail_url = stream.get('ThumbnailUrl')
|
||||
if thumbnail_url:
|
||||
thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url
|
||||
format_id = video['MediaType']
|
||||
if format_id == 'SS':
|
||||
continue
|
||||
formats.append({
|
||||
'url': video['Location'],
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
57
youtube_dl/extractor/comcarcoff.py
Normal file
57
youtube_dl/extractor/comcarcoff.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class ComCarCoffIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
|
||||
_TESTS = [{
|
||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
||||
'info_dict': {
|
||||
'id': 'miranda-sings-happy-thanksgiving-miranda',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141127',
|
||||
'timestamp': 1417107600,
|
||||
'title': 'Happy Thanksgiving Miranda',
|
||||
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
||||
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires ffmpeg',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
if not display_id:
|
||||
display_id = 'comediansincarsgettingcoffee.com'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
full_data = json.loads(self._search_regex(
|
||||
r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
|
||||
webpage, 'full data json'))
|
||||
|
||||
video_id = full_data['activeVideo']['video']
|
||||
video_data = full_data['videos'][video_id]
|
||||
thumbnails = [{
|
||||
'url': video_data['images']['thumb'],
|
||||
}, {
|
||||
'url': video_data['images']['poster'],
|
||||
}]
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['mediaUrl'], video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_data['title'],
|
||||
'description': video_data.get('description'),
|
||||
'timestamp': parse_iso8601(video_data.get('pubDate')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
||||
}
|
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
@@ -48,7 +50,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
)|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||
(?:[?#].*|$)'''
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||
@@ -81,6 +83,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
|
||||
'only_matching': True,
|
||||
|
@@ -21,6 +21,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
age_restricted,
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
@@ -40,7 +41,7 @@ class InfoExtractor(object):
|
||||
information about the video (or videos) the URL refers to. This
|
||||
information includes the real video URL, the video title, author and
|
||||
others. The information is stored in a dictionary which is then
|
||||
passed to the FileDownloader. The FileDownloader processes this
|
||||
passed to the YoutubeDL. The YoutubeDL processes this
|
||||
information possibly downloading the video to the file system, among
|
||||
other possible outcomes.
|
||||
|
||||
@@ -92,6 +93,8 @@ class InfoExtractor(object):
|
||||
by this field, regardless of all other values.
|
||||
-1 for default (order by other properties),
|
||||
-2 or smaller for less than default.
|
||||
< -1000 to hide the format (if there is
|
||||
another one which is strictly better)
|
||||
* language_preference Is this in the correct requested
|
||||
language?
|
||||
10 if it's what the URL is about,
|
||||
@@ -111,6 +114,9 @@ class InfoExtractor(object):
|
||||
to add to the request.
|
||||
* http_post_data Additional data to send with a POST
|
||||
request.
|
||||
* stretched_ratio If given and not 1, indicates that the
|
||||
video's pixels are not square.
|
||||
width : height ratio as float.
|
||||
url: Final video URL.
|
||||
ext: Video filename extension.
|
||||
format: The video format, defaults to ext (used for --get-format)
|
||||
@@ -118,6 +124,7 @@ class InfoExtractor(object):
|
||||
|
||||
The following fields are optional:
|
||||
|
||||
alt_title: A secondary title of the video.
|
||||
display_id An alternative identifier for the video, not necessarily
|
||||
unique, but available before title. Typically, id is
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
@@ -129,7 +136,7 @@ class InfoExtractor(object):
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
deprecated)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: One-line video description.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
@@ -143,6 +150,17 @@ class InfoExtractor(object):
|
||||
like_count: Number of positive ratings of the video
|
||||
dislike_count: Number of negative ratings of the video
|
||||
comment_count: Number of comments on the video
|
||||
comments: A list of comments, each with one or more of the following
|
||||
properties (all but one of text or html optional):
|
||||
* "author" - human-readable name of the comment author
|
||||
* "author_id" - user ID of the comment author
|
||||
* "id" - Comment ID
|
||||
* "html" - Comment as HTML
|
||||
* "text" - Plain text of the comment
|
||||
* "timestamp" - UNIX timestamp of comment
|
||||
* "parent" - ID of the comment this one is replying to.
|
||||
Set to "root" to indicate that this is a
|
||||
comment to the original video.
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
@@ -158,8 +176,8 @@ class InfoExtractor(object):
|
||||
|
||||
|
||||
_type "playlist" indicates multiple videos.
|
||||
There must be a key "entries", which is a list or a PagedList object, each
|
||||
element of which is a valid dictionary under this specfication.
|
||||
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "title" and "id" attributes with the same
|
||||
semantics as videos (see above).
|
||||
@@ -174,9 +192,10 @@ class InfoExtractor(object):
|
||||
_type "url" indicates that the video must be extracted from another
|
||||
location, possibly by a different extractor. Its only required key is:
|
||||
"url" - the next URL to extract.
|
||||
|
||||
Additionally, it may have properties believed to be identical to the
|
||||
resolved entity, for example "title" if the title of the referred video is
|
||||
The key "ie_key" can be set to the class name (minus the trailing "IE",
|
||||
e.g. "Youtube") if the extractor class is known in advance.
|
||||
Additionally, the dictionary may have any properties of the resolved entity
|
||||
known in advance, for example "title" if the title of the referred video is
|
||||
known ahead of time.
|
||||
|
||||
|
||||
@@ -360,9 +379,19 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
|
||||
""" Returns the data of the page as a string """
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
||||
success = False
|
||||
try_count = 0
|
||||
while success is False:
|
||||
try:
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
||||
success = True
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
try_count += 1
|
||||
if try_count >= tries:
|
||||
raise e
|
||||
self._sleep(timeout, video_id)
|
||||
if res is False:
|
||||
return res
|
||||
else:
|
||||
@@ -390,6 +419,10 @@ class InfoExtractor(object):
|
||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
||||
if (not fatal) and json_string is False:
|
||||
return None
|
||||
return self._parse_json(
|
||||
json_string, video_id, transform_source=transform_source, fatal=fatal)
|
||||
|
||||
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
||||
if transform_source:
|
||||
json_string = transform_source(json_string)
|
||||
try:
|
||||
@@ -439,7 +472,7 @@ class InfoExtractor(object):
|
||||
return video_info
|
||||
|
||||
@staticmethod
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None):
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
|
||||
"""Returns a playlist"""
|
||||
video_info = {'_type': 'playlist',
|
||||
'entries': entries}
|
||||
@@ -447,6 +480,8 @@ class InfoExtractor(object):
|
||||
video_info['id'] = playlist_id
|
||||
if playlist_title:
|
||||
video_info['title'] = playlist_title
|
||||
if playlist_description:
|
||||
video_info['description'] = playlist_description
|
||||
return video_info
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
@@ -581,9 +616,9 @@ class InfoExtractor(object):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?ix)<meta
|
||||
r'''(?isx)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
|
||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
@@ -707,8 +742,14 @@ class InfoExtractor(object):
|
||||
'Unable to download f4m manifest')
|
||||
|
||||
formats = []
|
||||
manifest_version = '1.0'
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
|
||||
if not media_nodes:
|
||||
manifest_version = '2.0'
|
||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
format_id = 'f4m-%d' % (i if tbr is None else tbr)
|
||||
formats.append({
|
||||
@@ -790,6 +831,49 @@ class InfoExtractor(object):
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
# TODO: improve extraction
|
||||
def _extract_smil_formats(self, smil_url, video_id):
|
||||
smil = self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file')
|
||||
|
||||
base = smil.find('./head/meta').get('base')
|
||||
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
if not proto:
|
||||
if base:
|
||||
if base.startswith('rtmp'):
|
||||
proto = 'rtmp'
|
||||
elif base.startswith('http'):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
formats.append({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@@ -824,6 +908,35 @@ class InfoExtractor(object):
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def get_testcases(self, include_onlymatching=False):
|
||||
t = getattr(self, '_TEST', None)
|
||||
if t:
|
||||
assert not hasattr(self, '_TESTS'), \
|
||||
'%s has _TEST and _TESTS' % type(self).__name__
|
||||
tests = [t]
|
||||
else:
|
||||
tests = getattr(self, '_TESTS', [])
|
||||
for t in tests:
|
||||
if not include_onlymatching and t.get('only_matching', False):
|
||||
continue
|
||||
t['name'] = type(self).__name__[:-len('IE')]
|
||||
yield t
|
||||
|
||||
def is_suitable(self, age_limit):
|
||||
""" Test whether the extractor is generally suitable for the given
|
||||
age limit (i.e. pornographic sites are not, all others usually are) """
|
||||
|
||||
any_restricted = False
|
||||
for tc in self.get_testcases(include_onlymatching=False):
|
||||
if 'playlist' in tc:
|
||||
tc = tc['playlist'][0]
|
||||
is_restricted = age_restricted(
|
||||
tc.get('info_dict', {}).get('age_limit'), age_limit)
|
||||
if not is_restricted:
|
||||
return True
|
||||
any_restricted = any_restricted or is_restricted
|
||||
return not any_restricted
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
29
youtube_dl/extractor/commonmistakes.py
Normal file
29
youtube_dl/extractor/commonmistakes.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class CommonMistakesIE(InfoExtractor):
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:url|URL)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'url',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'URL',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
msg = (
|
||||
'You\'ve asked youtube-dl to download the URL "%s". '
|
||||
'That doesn\'t make any sense. '
|
||||
'Simply remove the parameter in your command or configuration.'
|
||||
) % url
|
||||
if self._downloader.params.get('verbose'):
|
||||
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
|
||||
raise ExtractorError(msg, expected=True)
|
@@ -5,12 +5,14 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
orderedSet,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
|
||||
class CondeNastIE(InfoExtractor):
|
||||
|
@@ -10,10 +10,12 @@ import xml.etree.ElementTree
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
unified_strdate,
|
||||
@@ -27,10 +29,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
|
||||
'info_dict': {
|
||||
'id': '645513',
|
||||
'ext': 'flv',
|
||||
@@ -45,7 +46,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
'360': ('60', '106'),
|
||||
@@ -224,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
||||
|
||||
formats = []
|
||||
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
|
||||
for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
||||
|
@@ -27,7 +27,6 @@ class CSpanIE(InfoExtractor):
|
||||
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||
# For whatever reason, the served video alternates between
|
||||
# two different ones
|
||||
#'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
|
||||
'info_dict': {
|
||||
'id': '340723',
|
||||
'ext': 'mp4',
|
||||
|
@@ -8,13 +8,15 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
@@ -5,7 +5,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
@@ -38,7 +38,7 @@ class DaumIE(InfoExtractor):
|
||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||
webpage = self._download_webpage(canonical_url, video_id)
|
||||
full_id = self._search_regex(
|
||||
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
||||
r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
|
||||
webpage, 'full id')
|
||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||
info = self._download_xml(
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -61,7 +62,7 @@ class DBTVIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video['id'],
|
||||
'id': compat_str(video['id']),
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
|
@@ -1,47 +1,45 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DiscoveryIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
|
||||
_TEST = {
|
||||
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
||||
'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
|
||||
'info_dict': {
|
||||
'id': '614784',
|
||||
'ext': 'mp4',
|
||||
'title': 'MythBusters: Mission Impossible Outtakes',
|
||||
'id': 'mission-impossible-outtakes',
|
||||
'ext': 'flv',
|
||||
'title': 'Mission Impossible Outtakes',
|
||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
||||
' back.'),
|
||||
'duration': 156,
|
||||
'timestamp': 1303099200,
|
||||
'upload_date': '20110418',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
|
||||
webpage, 'video list', flags=re.DOTALL)
|
||||
video_list = json.loads(video_list_json)
|
||||
info = video_list['clips'][0]
|
||||
formats = []
|
||||
for f in info['mp4']:
|
||||
formats.append(
|
||||
{'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
|
||||
info = self._parse_json(self._search_regex(
|
||||
r'(?s)<script type="application/ld\+json">(.*?)</script>',
|
||||
webpage, 'video info'), video_id)
|
||||
|
||||
return {
|
||||
'id': info['contentId'],
|
||||
'title': video_list['name'],
|
||||
'formats': formats,
|
||||
'description': info['videoCaption'],
|
||||
'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
|
||||
'duration': info['duration'],
|
||||
'id': video_id,
|
||||
'title': info['name'],
|
||||
'url': info['contentURL'],
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'timestamp': parse_iso8601(info.get('uploadDate')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
}
|
||||
|
125
youtube_dl/extractor/dvtv.py
Normal file
125
youtube_dl/extractor/dvtv.py
Normal file
@@ -0,0 +1,125 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class DVTVIE(InfoExtractor):
|
||||
IE_NAME = 'dvtv'
|
||||
IE_DESC = 'http://video.aktualne.cz/'
|
||||
|
||||
_VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
|
||||
'md5': '67cb83e4a955d36e1b5d31993134a0c2',
|
||||
'info_dict': {
|
||||
'id': 'dc0768de855511e49e4b0025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
|
||||
'md5': '6388f1941b48537dbd28791f712af8bf',
|
||||
'info_dict': {
|
||||
'id': '72c02230849211e49f60002590604f2e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
|
||||
'info_dict': {
|
||||
'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
|
||||
'id': '973eb3bc854e11e498be002590604f2e',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'da7ca6be4935532241fa9520b3ad91e4',
|
||||
'info_dict': {
|
||||
'id': 'b0b40906854d11e4bdad0025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne'
|
||||
}
|
||||
}, {
|
||||
'md5': '5f7652a08b05009c1292317b449ffea2',
|
||||
'info_dict': {
|
||||
'id': '420ad9ec854a11e4bdad0025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka'
|
||||
}
|
||||
}, {
|
||||
'md5': '498eb9dfa97169f409126c617e2a3d64',
|
||||
'info_dict': {
|
||||
'id': '95d35580846a11e4b6d20025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?'
|
||||
}
|
||||
}, {
|
||||
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
|
||||
'info_dict': {
|
||||
'id': '6fe14d66853511e4833a0025900fea04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády'
|
||||
}
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_video_metadata(self, js, video_id):
|
||||
metadata = self._parse_json(js, video_id, transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for video in metadata['sources']:
|
||||
ext = video['type'][6:]
|
||||
formats.append({
|
||||
'url': video['file'],
|
||||
'ext': ext,
|
||||
'format_id': '%s-%s' % (ext, video['label']),
|
||||
'height': int(video['label'].rstrip('p')),
|
||||
'fps': 25,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': metadata['mediaid'],
|
||||
'title': unescapeHTML(metadata['title']),
|
||||
'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# single video
|
||||
item = self._search_regex(
|
||||
r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
|
||||
webpage, 'video', default=None, fatal=False)
|
||||
|
||||
if item:
|
||||
return self._parse_video_metadata(item, video_id)
|
||||
|
||||
# playlist
|
||||
items = re.findall(
|
||||
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
|
||||
webpage)
|
||||
|
||||
if items:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': [self._parse_video_metadata(i, video_id) for i in items]
|
||||
}
|
||||
|
||||
raise ExtractorError('Could not find neither video nor playlist')
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -20,8 +18,7 @@ class EbaumsWorldIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
config = self._download_xml(
|
||||
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
||||
video_url = config.find('file').text
|
||||
|
46
youtube_dl/extractor/echomsk.py
Normal file
46
youtube_dl/extractor/echomsk.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EchoMskIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.echo.msk.ru/sounds/1464134.html',
|
||||
'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
|
||||
'info_dict': {
|
||||
'id': '1464134',
|
||||
'ext': 'mp3',
|
||||
'title': 'Особое мнение - 29 декабря 2014, 19:08',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
|
||||
webpage, 'title')
|
||||
|
||||
air_date = self._html_search_regex(
|
||||
r'(?s)<div class="date">(.+?)</div>',
|
||||
webpage, 'date', fatal=False, default=None)
|
||||
|
||||
if air_date:
|
||||
air_date = re.sub(r'(\s)\1+', r'\1', air_date)
|
||||
if air_date:
|
||||
title = '%s - %s' % (title, air_date)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
}
|
@@ -1,8 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
@@ -24,11 +22,10 @@ class EHowIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
||||
webpage, 'video URL')
|
||||
video_url = self._search_regex(
|
||||
r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
|
||||
final_url = compat_urllib_parse.unquote(video_url)
|
||||
uploader = self._html_search_meta('uploader', webpage)
|
||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||
|
@@ -6,9 +6,12 @@ import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EightTracksIE(InfoExtractor):
|
||||
@@ -112,14 +115,29 @@ class EightTracksIE(InfoExtractor):
|
||||
session = str(random.randint(0, 1000000000))
|
||||
mix_id = data['id']
|
||||
track_count = data['tracks_count']
|
||||
duration = data['duration']
|
||||
avg_song_duration = float(duration) / track_count
|
||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||
next_url = first_url
|
||||
entries = []
|
||||
|
||||
for i in range(track_count):
|
||||
api_json = self._download_webpage(
|
||||
next_url, playlist_id,
|
||||
note='Downloading song information %d/%d' % (i + 1, track_count),
|
||||
errnote='Failed to download song information')
|
||||
api_json = None
|
||||
download_tries = 0
|
||||
|
||||
while api_json is None:
|
||||
try:
|
||||
api_json = self._download_webpage(
|
||||
next_url, playlist_id,
|
||||
note='Downloading song information %d/%d' % (i + 1, track_count),
|
||||
errnote='Failed to download song information')
|
||||
except ExtractorError:
|
||||
if download_tries > 3:
|
||||
raise
|
||||
else:
|
||||
download_tries += 1
|
||||
self._sleep(avg_song_duration, playlist_id)
|
||||
|
||||
api_data = json.loads(api_json)
|
||||
track_data = api_data['set']['track']
|
||||
info = {
|
||||
@@ -131,6 +149,7 @@ class EightTracksIE(InfoExtractor):
|
||||
'ext': 'm4a',
|
||||
}
|
||||
entries.append(info)
|
||||
|
||||
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
|
||||
session, mix_id, track_data['id'])
|
||||
return {
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -12,32 +11,49 @@ from ..utils import (
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
||||
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
||||
'info_dict': {
|
||||
'id': '0-7jqrsr18',
|
||||
'ext': 'mp4',
|
||||
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
||||
'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
|
||||
'timestamp': 1406876400,
|
||||
'upload_date': '20140801',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
|
||||
'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
|
||||
'info_dict': {
|
||||
'id': '0-dvzmabd5',
|
||||
'ext': 'mp4',
|
||||
'title': '1 year old twin sister makes her brother laugh',
|
||||
'description': '1 year old twin sister makes her brother laugh',
|
||||
'timestamp': 1419542075,
|
||||
'upload_date': '20141225',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._html_search_meta('VideoURL', webpage, 'url')
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description') or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<span class="publish-date"><time datetime="([^"]+)">',
|
||||
webpage, 'timestamp'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': self._html_search_meta('VideoURL', webpage, 'url'),
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
@@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
playlist = self._extract_playlist(webpage)
|
||||
|
@@ -1,8 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
@@ -24,9 +22,7 @@ class ElPaisIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
prefix = self._html_search_regex(
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .fivemin import FiveMinIE
|
||||
from ..utils import (
|
||||
url_basename,
|
||||
)
|
||||
@@ -27,11 +26,10 @@ class EngadgetIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if video_id is not None:
|
||||
return FiveMinIE._build_result(video_id)
|
||||
return self.url_result('5min:%s' % video_id)
|
||||
else:
|
||||
title = url_basename(url)
|
||||
webpage = self._download_webpage(url, title)
|
||||
@@ -39,5 +37,5 @@ class EngadgetIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': [FiveMinIE._build_result(id) for id in ids]
|
||||
'entries': [self.url_result('5min:%s' % vid) for vid in ids]
|
||||
}
|
||||
|
45
youtube_dl/extractor/eroprofile.py
Normal file
45
youtube_dl/extractor/eroprofile.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EroProfileIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
|
||||
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
|
||||
'info_dict': {
|
||||
'id': '3733775',
|
||||
'display_id': 'sexy-babe-softcore',
|
||||
'ext': 'm4v',
|
||||
'title': 'sexy babe softcore',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||
webpage, 'video id', default=None)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source src="([^"]+)', webpage, 'video url')
|
||||
title = self._html_search_regex(
|
||||
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
@@ -3,16 +3,18 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class ExtremeTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||
@@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
|
@@ -13,9 +13,10 @@ from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
limit_length,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '637842556329505',
|
||||
'ext': 'mp4',
|
||||
'duration': 38,
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
}
|
||||
}, {
|
||||
@@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'duration': int(video_data['video_duration']),
|
||||
'thumbnail': video_data['thumbnail_src'],
|
||||
'duration': int_or_none(video_data.get('video_duration')),
|
||||
'thumbnail': video_data.get('thumbnail_src'),
|
||||
}
|
||||
|
@@ -1,19 +1,20 @@
|
||||
#! -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_TEST = {
|
||||
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
|
||||
@@ -26,9 +27,7 @@ class FC2IE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
|
||||
|
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FiredriveIE(InfoExtractor):
|
||||
@@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
url = 'http://firedrive.com/file/%s' % video_id
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
|
||||
|
@@ -1,11 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
class FiveMinIE(InfoExtractor):
|
||||
IE_NAME = '5min'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
|
||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||
5min:)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
@@ -41,13 +41,8 @@ class FiveMinIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _build_result(cls, video_id):
|
||||
return cls.url_result('5min:%s' % video_id, cls.ie_key())
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed page')
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class FKTVIE(InfoExtractor):
|
||||
IE_NAME = 'fernsehkritik.tv'
|
||||
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
||||
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fernsehkritik.tv/folge-1',
|
||||
@@ -26,29 +26,32 @@ class FKTVIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = int(mobj.group('ep'))
|
||||
episode = int(self._match_id(url))
|
||||
|
||||
server = random.randint(2, 4)
|
||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
|
||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
|
||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
|
||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
|
||||
episode)
|
||||
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
||||
'playlist', flags=re.DOTALL)
|
||||
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
||||
# TODO: return a single multipart video
|
||||
|
||||
videos = []
|
||||
for i, _ in enumerate(files, 1):
|
||||
video_id = '%04d%d' % (episode, i)
|
||||
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
||||
video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
|
||||
videos.append({
|
||||
'ext': 'flv',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
|
||||
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
|
||||
'thumbnail': video_thumbnail
|
||||
})
|
||||
return videos
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'entries': videos,
|
||||
'id': 'folge-%s' % episode,
|
||||
}
|
||||
|
||||
|
||||
class FKTVPosteckeIE(InfoExtractor):
|
||||
|
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
parse_duration,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,9 +33,7 @@ class FourTubeIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage_url = 'http://www.4tube.com/videos/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
|
@@ -5,7 +5,7 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
@@ -6,13 +6,15 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
parse_duration,
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
|
@@ -6,7 +6,9 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_with_ns,
|
||||
parse_iso8601
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
NAMESPACE_MAP = {
|
||||
@@ -21,25 +23,41 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
|
||||
|
||||
class GameOneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.gameone.de/tv/288',
|
||||
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
||||
'info_dict': {
|
||||
'id': '288',
|
||||
'ext': 'mp4',
|
||||
'title': 'Game One - Folge 288',
|
||||
'duration': 1238,
|
||||
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
||||
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
||||
'age_limit': 16,
|
||||
'upload_date': '20140513',
|
||||
'timestamp': 1399980122,
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gameone.de/tv/288',
|
||||
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
||||
'info_dict': {
|
||||
'id': '288',
|
||||
'ext': 'mp4',
|
||||
'title': 'Game One - Folge 288',
|
||||
'duration': 1238,
|
||||
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
||||
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
||||
'age_limit': 16,
|
||||
'upload_date': '20140513',
|
||||
'timestamp': 1399980122,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://gameone.de/tv/220',
|
||||
'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
|
||||
'info_dict': {
|
||||
'id': '220',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120918',
|
||||
'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
|
||||
'timestamp': 1347971451,
|
||||
'title': 'Game One - Folge 220',
|
||||
'duration': 896.62,
|
||||
'age_limit': 16,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage, secure=False)
|
||||
@@ -66,13 +84,13 @@ class GameOneIE(InfoExtractor):
|
||||
video_id,
|
||||
'Downloading media:content')
|
||||
rendition_items = content.findall('.//rendition')
|
||||
duration = int(rendition_items[0].get('duration'))
|
||||
duration = float_or_none(rendition_items[0].get('duration'))
|
||||
formats = [
|
||||
{
|
||||
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
||||
'width': int(r.get('width')),
|
||||
'height': int(r.get('height')),
|
||||
'tbr': int(r.get('bitrate')),
|
||||
'width': int_or_none(r.get('width')),
|
||||
'height': int_or_none(r.get('height')),
|
||||
'tbr': int_or_none(r.get('bitrate')),
|
||||
}
|
||||
for r in rendition_items
|
||||
]
|
||||
@@ -105,7 +123,8 @@ class GameOnePlaylistIE(InfoExtractor):
|
||||
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
||||
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
||||
entries = [
|
||||
self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
|
||||
self.url_result('http://www.gameone.de/tv/%d' %
|
||||
video_id, 'GameOne')
|
||||
for video_id in range(max_id, 0, -1)]
|
||||
|
||||
return {
|
||||
|
@@ -4,9 +4,11 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
@@ -39,7 +39,8 @@ class GDCVaultIE(InfoExtractor):
|
||||
'id': '1015301',
|
||||
'ext': 'flv',
|
||||
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
|
||||
}
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}
|
||||
]
|
||||
|
||||
|
@@ -23,6 +23,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_basename,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
@@ -130,12 +131,13 @@ class GenericIE(InfoExtractor):
|
||||
# ooyala video
|
||||
{
|
||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||
'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
|
||||
'md5': '166dd577b433b4d4ebfee10b0824d8ff',
|
||||
'info_dict': {
|
||||
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
||||
'ext': 'mp4',
|
||||
'title': '2cc213299525360.mov', # that's what we get
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
@@ -145,7 +147,7 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130224',
|
||||
'uploader_id': 'TheVerge',
|
||||
'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
|
||||
'description': 're:^Chris Ziegler takes a look at the\.*',
|
||||
'uploader': 'The Verge',
|
||||
'title': 'First Firefox OS phones side-by-side',
|
||||
},
|
||||
@@ -180,6 +182,14 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
},
|
||||
# BBC iPlayer embeds
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
|
||||
'info_dict': {
|
||||
'title': 'BBC - Blogs - Adam Curtis - BUGGER',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||
@@ -467,8 +477,17 @@ class GenericIE(InfoExtractor):
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
]
|
||||
}
|
||||
|
||||
},
|
||||
# Cinchcast embed
|
||||
{
|
||||
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
|
||||
'info_dict': {
|
||||
'id': '7141703',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20141126',
|
||||
'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -689,9 +708,9 @@ class GenericIE(InfoExtractor):
|
||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||
|
||||
# Helper method
|
||||
def _playlist_from_matches(matches, getter, ie=None):
|
||||
def _playlist_from_matches(matches, getter=None, ie=None):
|
||||
urlrs = orderedSet(
|
||||
self.url_result(self._proto_relative_url(getter(m)), ie)
|
||||
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
|
||||
for m in matches)
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
@@ -895,6 +914,11 @@ class GenericIE(InfoExtractor):
|
||||
return _playlist_from_matches(
|
||||
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
||||
|
||||
# Look for BBC iPlayer embed
|
||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='BBCCoUk')
|
||||
|
||||
# Look for embedded RUTV player
|
||||
rutv_url = RUTVIE._extract_url(webpage)
|
||||
if rutv_url:
|
||||
@@ -902,7 +926,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded TED player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'TED')
|
||||
|
||||
@@ -962,6 +986,13 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'SBS')
|
||||
|
||||
# Look for embedded Cinchcast player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Cinchcast')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
|
||||
webpage)
|
||||
@@ -1041,7 +1072,7 @@ class GenericIE(InfoExtractor):
|
||||
'url': new_url,
|
||||
}
|
||||
if not found:
|
||||
raise ExtractorError('Unsupported URL: %s' % url)
|
||||
raise UnsupportedError(url)
|
||||
|
||||
entries = []
|
||||
for video_url in found:
|
||||
|
81
youtube_dl/extractor/giantbomb.py
Normal file
81
youtube_dl/extractor/giantbomb.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
qualities,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GiantBombIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
||||
'md5': '57badeface303ecf6b98b812de1b9018',
|
||||
'info_dict': {
|
||||
'id': '2300-9782',
|
||||
'display_id': 'quick-look-destiny-the-dark-below',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quick Look: Destiny: The Dark Below',
|
||||
'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24',
|
||||
'duration': 2399,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
video = json.loads(unescapeHTML(self._search_regex(
|
||||
r'data-video="([^"]+)"', webpage, 'data-video')))
|
||||
|
||||
duration = int_or_none(video.get('lengthSeconds'))
|
||||
|
||||
quality = qualities([
|
||||
'f4m_low', 'progressive_low', 'f4m_high',
|
||||
'progressive_high', 'f4m_hd', 'progressive_hd'])
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video['videoStreams'].items():
|
||||
if format_id == 'f4m_stream':
|
||||
continue
|
||||
if video_url.endswith('.f4m'):
|
||||
f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
|
||||
if f4m_formats:
|
||||
f4m_formats[0]['quality'] = quality(format_id)
|
||||
formats.extend(f4m_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
youtube_id = video.get('youtubeID')
|
||||
if youtube_id:
|
||||
return self.url_result(youtube_id, 'Youtube')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
101
youtube_dl/extractor/giga.py
Normal file
101
youtube_dl/extractor/giga.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
qualities,
|
||||
compat_str,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class GigaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
|
||||
'md5': '6bc5535e945e724640664632055a584f',
|
||||
'info_dict': {
|
||||
'id': '2622086',
|
||||
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
|
||||
'description': 'md5:afdf5862241aded4718a30dff6a57baf',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 578,
|
||||
'timestamp': 1414749706,
|
||||
'upload_date': '20141031',
|
||||
'uploader': 'Robin Schweiger',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
|
||||
webpage, 'video id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
|
||||
% video_id, video_id)[0]
|
||||
|
||||
quality = qualities(['normal', 'hd720'])
|
||||
|
||||
formats = []
|
||||
for format_id in itertools.count(0):
|
||||
fmt = playlist.get(compat_str(format_id))
|
||||
if not fmt:
|
||||
break
|
||||
formats.append({
|
||||
'url': fmt['src'],
|
||||
'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
|
||||
'quality': quality(fmt['quality']),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'title', webpage, 'title', fatal=True)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
|
||||
uploader = self._search_regex(
|
||||
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,9 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GoldenMoustacheIE(InfoExtractor):
|
||||
@@ -17,7 +14,6 @@ class GoldenMoustacheIE(InfoExtractor):
|
||||
'title': 'Suricate - Le Poker',
|
||||
'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
|
||||
@@ -28,7 +24,6 @@ class GoldenMoustacheIE(InfoExtractor):
|
||||
'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
|
||||
'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -42,9 +37,6 @@ class GoldenMoustacheIE(InfoExtractor):
|
||||
r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<strong>([0-9]+)</strong>\s*VUES</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -53,5 +45,4 @@ class GoldenMoustacheIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
@@ -2,8 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
@@ -4,7 +4,7 @@ import itertools
|
||||
import re
|
||||
|
||||
from .common import SearchInfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user