Compare commits
468 Commits
2013.10.23
...
2013.12.10
Author | SHA1 | Date | |
---|---|---|---|
a30a60d8eb | |||
5a3ea17c94 | |||
475700acfe | |||
45598aab08 | |||
26e6393134 | |||
49929a20a7 | |||
f8bd0194a7 | |||
77526143e7 | |||
4ff50ef846 | |||
caefb1de87 | |||
1e1f84dac9 | |||
1d87e3a1c6 | |||
df8ae1e3a2 | |||
f7d8d4a116 | |||
1c088fa89d | |||
de2dd4c502 | |||
395293a889 | |||
db4da14027 | |||
2101830c0d | |||
977887469c | |||
ffa8f0df0a | |||
693b8b2d31 | |||
a0d96c9843 | |||
2a18bc9a4b | |||
eaa1a7bde3 | |||
0783b09b92 | |||
ffe62508e4 | |||
ac79fa02b8 | |||
7cc3570e53 | |||
baa7b1978b | |||
ac5118bcb9 | |||
5adb818947 | |||
52defb0c9b | |||
56a8ab7d60 | |||
22686b91f0 | |||
31812a9e0e | |||
11bf848191 | |||
d4df5ed14c | |||
303b479e0a | |||
4c52160646 | |||
a213880aaf | |||
42d3bf844a | |||
b860967ce4 | |||
8ca6b8fba1 | |||
c4d9e6731a | |||
0d9ec5d963 | |||
870fc4e578 | |||
f623530d6e | |||
ca9e02dc00 | |||
fb30ec22fd | |||
5cc14c2fd7 | |||
d349cd2240 | |||
0b6a9f639f | |||
715c8e7bdb | |||
7d4afc557f | |||
563e405411 | |||
f53c966a73 | |||
336c3a69bd | |||
4e76179476 | |||
ef4fd84857 | |||
72135030d1 | |||
3514813d5b | |||
9e60602084 | |||
19e3dfc9f8 | |||
a1ef7e85d6 | |||
ef2fac6f4a | |||
7fc3fa0545 | |||
673d1273ff | |||
b9a2c53833 | |||
e9bf7479d2 | |||
bfb9f7bc4c | |||
6a656a843a | |||
29030c0a4c | |||
c0ade33e16 | |||
671c0f151d | |||
27dcce1904 | |||
8aff7b9bc4 | |||
55f6597c67 | |||
d494389821 | |||
1dcc4c0cad | |||
84db81815a | |||
fb7abb31af | |||
ce93879a9b | |||
938384c587 | |||
e9d8e302aa | |||
cb7fb54600 | |||
cf6758d204 | |||
731e3dde29 | |||
a0eaa341e1 | |||
fb27c2295e | |||
1b753cb334 | |||
36a826a50d | |||
8796857429 | |||
aaebed13a8 | |||
25939ffe56 | |||
5270d8cb13 | |||
0037e02921 | |||
6ad14cab59 | |||
a9be0cc736 | |||
55a10eab48 | |||
e344693b65 | |||
355e4fd07e | |||
5e09d6abbd | |||
0a688bc0b2 | |||
b138de72f2 | |||
06dcbb71d8 | |||
c5171c454b | |||
323ec6ae56 | |||
befd88b786 | |||
a3fb4675fb | |||
5f077efcb1 | |||
9986238ba9 | |||
e1f900d6a4 | |||
acf37ca151 | |||
17769d5a6c | |||
677c18092d | |||
3862402ff3 | |||
b03d0d064c | |||
d8d6148628 | |||
2be54167d0 | |||
4e0084d92e | |||
fc9e1cc697 | |||
f8f60d2793 | |||
ea07dbb8b1 | |||
2a275ab007 | |||
a2e6db365c | |||
9d93e7da6c | |||
0e44d8381a | |||
35907e23ec | |||
76d1700b28 | |||
dcca796ce4 | |||
4b19e38954 | |||
5f09bbff4d | |||
c1f9c59d11 | |||
652cdaa269 | |||
e26f871228 | |||
6e47b51eef | |||
4a98cdbf3b | |||
c5ed4e8f7e | |||
c2e52508cc | |||
d8ec4959c8 | |||
d31209a144 | |||
529a2e2cc3 | |||
781a7d0546 | |||
fb04e40396 | |||
d9b011f201 | |||
b0b9eaa196 | |||
8b134b1062 | |||
0c75c3fa7a | |||
a3927cf7ee | |||
1a62c18f65 | |||
2a15e7063b | |||
d46cc192d7 | |||
bb2bebdbe1 | |||
5db07df634 | |||
ea36cbac5e | |||
d0d2b49ab7 | |||
31cb6d8fef | |||
daa0dd2973 | |||
de79c46c8f | |||
94ccb6fa2e | |||
07e4035879 | |||
d0efb9ec9a | |||
ac05067d3d | |||
113577e155 | |||
79d09f47c2 | |||
c059bdd432 | |||
02dbf93f0e | |||
1fb2bcbbf7 | |||
16e055849e | |||
66cfab4226 | |||
6d88bc37a3 | |||
b7553b2554 | |||
e03db0a077 | |||
a1ee09e815 | |||
267ed0c5d3 | |||
f459d17018 | |||
dc65dcbb6d | |||
d214fdb8fe | |||
138df537ff | |||
0c7c19d6bc | |||
eaaafc59c2 | |||
382ed50e0e | |||
66ec019240 | |||
bd49928f7a | |||
23e6d50d73 | |||
2e767313e4 | |||
38b2db6a66 | |||
13ebea791f | |||
4c9c57428f | |||
8bf9319e9c | |||
4914120727 | |||
36de0a0e1a | |||
e5c146d586 | |||
52ad14aeb0 | |||
43afe28588 | |||
a87b0615aa | |||
d7386f6276 | |||
081640940e | |||
7012b23c94 | |||
d3b30148ed | |||
9f79463803 | |||
d35dc6d3b5 | |||
50123be421 | |||
3f8ced5144 | |||
00ea0f11eb | |||
dca0872056 | |||
0b63aed8df | |||
15c3adbb16 | |||
f143a42fe6 | |||
241650c7ff | |||
bfe7439a20 | |||
cffa6aa107 | |||
02e4ebbbad | |||
ab009f59ef | |||
0980426559 | |||
b1c9c66936 | |||
a6a173c2fd | |||
2bb683c201 | |||
64bb5187f5 | |||
9e4f50a8ae | |||
0190eecc00 | |||
ca872a4c0b | |||
f2e87ef4fa | |||
0ad97bbc05 | |||
c4864091a1 | |||
9a98a466b3 | |||
f99e0f1ed6 | |||
d323bcb152 | |||
da6a795fdb | |||
c5edcde21f | |||
15ff3c831e | |||
100959a6d9 | |||
0a120f74b2 | |||
8f05351984 | |||
4eb92208a3 | |||
71791f414c | |||
f3682997d7 | |||
cc13cc0251 | |||
86bd5f2ca9 | |||
8694c60000 | |||
9d1538182f | |||
5904088811 | |||
69545c2aff | |||
495da337ae | |||
34b3afc7be | |||
00373a4c5d | |||
cb7dfeeac4 | |||
efd6c574a2 | |||
4113e6ab56 | |||
9a942a4671 | |||
9906d397a0 | |||
ae8f787141 | |||
a81b4d5c8f | |||
887c6acdf2 | |||
83aa529330 | |||
96b31b6533 | |||
fccd377198 | |||
2b35c9ef74 | |||
73c566695f | |||
63b7b7224a | |||
ce80c8b8ee | |||
749febf4d1 | |||
bdde425cbe | |||
746f491f82 | |||
1672647ade | |||
90b6bbc38c | |||
ce02ed60f2 | |||
1e5b9a95fd | |||
1d699755e0 | |||
ddf49c6344 | |||
ba3881dffd | |||
d1c252048b | |||
eab2724138 | |||
21ea3e06c9 | |||
52d703d3d1 | |||
ce152341a1 | |||
f058e34011 | |||
b5349e8721 | |||
7150858d49 | |||
91c7271aab | |||
aa13b2dffd | |||
fc2ef392be | |||
463a908705 | |||
d24ffe1cfa | |||
78fb87b283 | |||
ab2d524780 | |||
85d61685f1 | |||
b9643eed7c | |||
feee2ecfa9 | |||
a25a5cfeec | |||
0e145dd541 | |||
9f9be844fc | |||
e3b9ab5e18 | |||
c66d2baa9c | |||
08bc37cdd0 | |||
9771cceb2c | |||
ca715127a2 | |||
ea7a7af1d4 | |||
880e1c529d | |||
dcbb45803f | |||
80b9bbce86 | |||
d37936386f | |||
c3a3028f9f | |||
6c5ad80cdc | |||
b5bdc2699a | |||
384b98cd8f | |||
eb9b5bffef | |||
0bd59f3723 | |||
8b8cbd8f6d | |||
72b18c5d34 | |||
eb0a839866 | |||
1777d5a952 | |||
d4b7da84c3 | |||
801dbbdffd | |||
0ed05a1d2d | |||
1008bebade | |||
ae84f879d7 | |||
be6dfd1b49 | |||
231516b6c9 | |||
fb53d58dcf | |||
2a9e9b210b | |||
897d6cc43a | |||
f470c6c812 | |||
566d4e0425 | |||
81be02d2f9 | |||
c2b6a482d5 | |||
12c167c881 | |||
20aafee7fa | |||
be07375b66 | |||
4894fe8c5b | |||
dd5bcdc4c9 | |||
6161d17579 | |||
4ac5306ae7 | |||
b1a80ec1a9 | |||
672fe94dcb | |||
51040b72ed | |||
4f045eef8f | |||
5d7b253ea0 | |||
b0759f0c19 | |||
065472936a | |||
fc4a0c2aec | |||
eeb165e674 | |||
9ee2b5f6f2 | |||
da54be877a | |||
50a886b7ab | |||
76e67c2cb6 | |||
5137ebac0b | |||
a8eeb0597b | |||
4ed3e51080 | |||
7f34001d57 | |||
2dcf7d8f99 | |||
19b0668251 | |||
e7e6b54d8a | |||
2a1a8ffe41 | |||
08fb86c49b | |||
3633d77c0f | |||
165e179764 | |||
12ebdd1506 | |||
1baf9a5938 | |||
a56f9de156 | |||
fa5d47af4b | |||
d607038753 | |||
9ac6a01aaf | |||
be97abc247 | |||
9103bbc5cd | |||
b6c45014ae | |||
a3dd924871 | |||
137bbb3e37 | |||
86ad94bb2e | |||
3e56add7c9 | |||
f52f01b5d2 | |||
98d7efb537 | |||
cf51923545 | |||
38fcd4597a | |||
165e3bb67a | |||
38db46794f | |||
a9a3876d55 | |||
1f343eaabb | |||
72a5b4f702 | |||
0a43ddf320 | |||
31366066bd | |||
aa2484e390 | |||
8eddf3e91d | |||
60d142aa8d | |||
66cf3ac342 | |||
ab4e151347 | |||
ac2547f5ff | |||
5f1ea943ab | |||
0ef7ad5cd4 | |||
9f1109a564 | |||
33b1d9595d | |||
7193498811 | |||
72321ead7b | |||
b5d0d817bc | |||
94badb2599 | |||
b9a836515f | |||
21c924f406 | |||
e54fd4b23b | |||
57dd9a8f2f | |||
912cbf5d4e | |||
43d7895ea0 | |||
f7ff55aa78 | |||
795f28f871 | |||
f6cc16f5d8 | |||
321a01f971 | |||
646e17a53d | |||
dd508b7c4f | |||
2563bcc85c | |||
702665c085 | |||
dcc2a706ef | |||
2bc67c35ac | |||
77ae65877e | |||
32a35e4418 | |||
369a759acc | |||
79b3f61228 | |||
216d71d001 | |||
78a3a9f89e | |||
a7685f3bf4 | |||
f088ea5486 | |||
1003d108d5 | |||
8abeeb9449 | |||
c1002e96e9 | |||
77d0a82fef | |||
ebc14f251c | |||
d41e6efc85 | |||
8ffa13e03e | |||
db477d3a37 | |||
750e9833b8 | |||
82f0ac657c | |||
eb6a2277a2 | |||
f8778fb0fa | |||
e2f9de207c | |||
a93cc0d943 | |||
7d8c2e07f2 | |||
efb4c36b18 | |||
29526d0d2b | |||
198e370f23 | |||
c19f7764a5 | |||
bc63d9d329 | |||
aa929c37d5 | |||
af4d506eb3 | |||
5da0549581 | |||
749a4fd2fd | |||
6f71ef580c | |||
67874aeffa | |||
3e6a330d38 | |||
aee5e18c8f | |||
5b11143d05 | |||
7b2212e954 | |||
71865091ab | |||
125cfd78e8 | |||
8cb57d9b91 | |||
14e10b2b6e | |||
6e76104d66 | |||
1d45a23b74 | |||
7df286540f | |||
5d0c97541a | |||
49a25557b0 | |||
b5936c0059 | |||
600cc1a4f0 | |||
ea32fbacc8 | |||
00fe14fc75 | |||
fcc28edb2f | |||
fac6be2dd5 | |||
d5a9bb4ea9 | |||
b0505eb611 | |||
f44415360e |
102
README.md
102
README.md
@ -30,13 +30,16 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--list-extractors List all supported extractors and the URLs they
|
--list-extractors List all supported extractors and the URLs they
|
||||||
would handle
|
would handle
|
||||||
--extractor-descriptions Output descriptions of all supported extractors
|
--extractor-descriptions Output descriptions of all supported extractors
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy
|
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an
|
||||||
|
empty string (--proxy "") for direct connection
|
||||||
--no-check-certificate Suppress HTTPS certificate validation.
|
--no-check-certificate Suppress HTTPS certificate validation.
|
||||||
--cache-dir DIR Location in the filesystem where youtube-dl can
|
--cache-dir DIR Location in the filesystem where youtube-dl can
|
||||||
store downloaded information permanently. By
|
store downloaded information permanently. By
|
||||||
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
|
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
|
||||||
/youtube-dl .
|
/youtube-dl .
|
||||||
--no-cache-dir Disable filesystem caching
|
--no-cache-dir Disable filesystem caching
|
||||||
|
--bidi-workaround Work around terminals that lack bidirectional
|
||||||
|
text support. Requires fribidi executable in PATH
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
@ -55,8 +58,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--dateafter DATE download only videos uploaded after this date
|
--dateafter DATE download only videos uploaded after this date
|
||||||
--no-playlist download only the currently playing video
|
--no-playlist download only the currently playing video
|
||||||
--age-limit YEARS download only videos suitable for the given age
|
--age-limit YEARS download only videos suitable for the given age
|
||||||
--download-archive FILE Download only videos not present in the archive
|
--download-archive FILE Download only videos not listed in the archive
|
||||||
file. Record all downloaded videos in it.
|
file. Record the IDs of all downloaded videos in
|
||||||
|
it.
|
||||||
|
|
||||||
## Download Options:
|
## Download Options:
|
||||||
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
|
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
|
||||||
@ -79,24 +83,29 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
different, %(autonumber)s to get an automatically
|
different, %(autonumber)s to get an automatically
|
||||||
incremented number, %(ext)s for the filename
|
incremented number, %(ext)s for the filename
|
||||||
extension, %(format)s for the format description
|
extension, %(format)s for the format description
|
||||||
(like "22 - 1280x720" or "HD")%(upload_date)s for
|
(like "22 - 1280x720" or "HD"),%(format_id)s for
|
||||||
the upload date (YYYYMMDD), %(extractor)s for the
|
the unique id of the format (like Youtube's
|
||||||
provider (youtube, metacafe, etc), %(id)s for the
|
itags: "137"),%(upload_date)s for the upload date
|
||||||
video id , %(playlist)s for the playlist the
|
(YYYYMMDD), %(extractor)s for the provider
|
||||||
video is in, %(playlist_index)s for the position
|
(youtube, metacafe, etc), %(id)s for the video id
|
||||||
in the playlist and %% for a literal percent. Use
|
, %(playlist)s for the playlist the video is in,
|
||||||
- to output to stdout. Can also be used to
|
%(playlist_index)s for the position in the
|
||||||
download to a different directory, for example
|
playlist and %% for a literal percent. Use - to
|
||||||
with -o '/my/downloads/%(uploader)s/%(title)s-%(i
|
output to stdout. Can also be used to download to
|
||||||
d)s.%(ext)s' .
|
a different directory, for example with -o '/my/d
|
||||||
|
ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||||
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
|
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
|
||||||
when it is present in output filename template or
|
when it is present in output filename template or
|
||||||
--autonumber option is given
|
--auto-number option is given
|
||||||
--restrict-filenames Restrict filenames to only ASCII characters, and
|
--restrict-filenames Restrict filenames to only ASCII characters, and
|
||||||
avoid "&" and spaces in filenames
|
avoid "&" and spaces in filenames
|
||||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||||
|
--load-info FILE json file containing the video information
|
||||||
|
(created with the "--write-json" option
|
||||||
-w, --no-overwrites do not overwrite files
|
-w, --no-overwrites do not overwrite files
|
||||||
-c, --continue resume partially downloaded files
|
-c, --continue force resume of partially downloaded files. By
|
||||||
|
default, youtube-dl will resume downloads if
|
||||||
|
possible.
|
||||||
--no-continue do not resume partially downloaded files (restart
|
--no-continue do not resume partially downloaded files (restart
|
||||||
from beginning)
|
from beginning)
|
||||||
--cookies FILE file to read cookies from and dump cookie jar in
|
--cookies FILE file to read cookies from and dump cookie jar in
|
||||||
@ -120,15 +129,18 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--get-description simulate, quiet but print video description
|
--get-description simulate, quiet but print video description
|
||||||
--get-filename simulate, quiet but print output filename
|
--get-filename simulate, quiet but print output filename
|
||||||
--get-format simulate, quiet but print output format
|
--get-format simulate, quiet but print output format
|
||||||
|
-j, --dump-json simulate, quiet but print JSON information
|
||||||
--newline output progress bar as new lines
|
--newline output progress bar as new lines
|
||||||
--no-progress do not print progress bar
|
--no-progress do not print progress bar
|
||||||
--console-title display progress in console titlebar
|
--console-title display progress in console titlebar
|
||||||
-v, --verbose print various debugging information
|
-v, --verbose print various debugging information
|
||||||
--dump-intermediate-pages print downloaded pages to debug problems(very
|
--dump-intermediate-pages print downloaded pages to debug problems(very
|
||||||
verbose)
|
verbose)
|
||||||
|
--write-pages Write downloaded intermediary pages to files in
|
||||||
|
the current directory to debug problems
|
||||||
|
|
||||||
## Video Format Options:
|
## Video Format Options:
|
||||||
-f, --format FORMAT video format code, specifiy the order of
|
-f, --format FORMAT video format code, specify the order of
|
||||||
preference using slashes: "-f 22/17/18". "-f mp4"
|
preference using slashes: "-f 22/17/18". "-f mp4"
|
||||||
and "-f flv" are also supported
|
and "-f flv" are also supported
|
||||||
--all-formats download all available video formats
|
--all-formats download all available video formats
|
||||||
@ -176,7 +188,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`.
|
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
|
||||||
|
|
||||||
# OUTPUT TEMPLATE
|
# OUTPUT TEMPLATE
|
||||||
|
|
||||||
@ -266,14 +278,54 @@ This README file was originally written by Daniel Bolton (<https://github.com/db
|
|||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>
|
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
||||||
|
|
||||||
Please include:
|
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||||
|
|
||||||
* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem.
|
|
||||||
* If possible re-run the command with `--verbose`, and include the full output, it is really helpful to us.
|
|
||||||
* The output of `youtube-dl --version`
|
|
||||||
* The output of `python --version`
|
|
||||||
* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough).
|
|
||||||
|
|
||||||
For discussions, join us in the irc channel #youtube-dl on freenode.
|
For discussions, join us in the irc channel #youtube-dl on freenode.
|
||||||
|
|
||||||
|
When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist):
|
||||||
|
|
||||||
|
### Is the description of the issue itself sufficient?
|
||||||
|
|
||||||
|
We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
|
||||||
|
|
||||||
|
So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
|
||||||
|
|
||||||
|
- What the problem is
|
||||||
|
- How it could be fixed
|
||||||
|
- How your proposed solution would look like
|
||||||
|
|
||||||
|
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
|
||||||
|
|
||||||
|
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||||
|
|
||||||
|
Site support requests must contain an example URL. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||||
|
|
||||||
|
### Are you using the latest version?
|
||||||
|
|
||||||
|
Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. Ábout 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
|
||||||
|
|
||||||
|
### Is the issue already documented?
|
||||||
|
|
||||||
|
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or at https://github.com/rg3/youtube-dl/search?type=Issues . If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
|
||||||
|
|
||||||
|
### Why are existing options not enough?
|
||||||
|
|
||||||
|
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||||
|
|
||||||
|
### Is there enough context in your bug report?
|
||||||
|
|
||||||
|
People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
|
||||||
|
|
||||||
|
We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
|
||||||
|
|
||||||
|
### Does the issue involve one problem, and one problem only?
|
||||||
|
|
||||||
|
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
|
||||||
|
|
||||||
|
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
|
||||||
|
|
||||||
|
### Is anyone going to need the feature?
|
||||||
|
|
||||||
|
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||||
|
@ -1,10 +1,21 @@
|
|||||||
__youtube_dl()
|
__youtube_dl()
|
||||||
{
|
{
|
||||||
local cur prev opts
|
local cur prev opts fileopts diropts keywords
|
||||||
COMPREPLY=()
|
COMPREPLY=()
|
||||||
cur="${COMP_WORDS[COMP_CWORD]}"
|
cur="${COMP_WORDS[COMP_CWORD]}"
|
||||||
|
prev="${COMP_WORDS[COMP_CWORD-1]}"
|
||||||
opts="{{flags}}"
|
opts="{{flags}}"
|
||||||
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
|
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
|
||||||
|
fileopts="-a|--batch-file|--download-archive|--cookies"
|
||||||
|
diropts="--cache-dir"
|
||||||
|
|
||||||
|
if [[ ${prev} =~ ${fileopts} ]]; then
|
||||||
|
COMPREPLY=( $(compgen -f -- ${cur}) )
|
||||||
|
return 0
|
||||||
|
elif [[ ${prev} =~ ${diropts} ]]; then
|
||||||
|
COMPREPLY=( $(compgen -d -- ${cur}) )
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ ${cur} =~ : ]]; then
|
if [[ ${cur} =~ : ]]; then
|
||||||
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
||||||
|
39
devscripts/check-porn.py
Normal file
39
devscripts/check-porn.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""
|
||||||
|
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
|
||||||
|
if we are not 'age_limit' tagging some porn site
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import get_testcases
|
||||||
|
from youtube_dl.utils import compat_urllib_request
|
||||||
|
|
||||||
|
for test in get_testcases():
|
||||||
|
try:
|
||||||
|
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||||
|
except:
|
||||||
|
print('\nFail: {0}'.format(test['name']))
|
||||||
|
continue
|
||||||
|
|
||||||
|
webpage = webpage.decode('utf8', 'replace')
|
||||||
|
|
||||||
|
if 'porn' in webpage.lower() and ('info_dict' not in test
|
||||||
|
or 'age_limit' not in test['info_dict']
|
||||||
|
or test['info_dict']['age_limit'] != 18):
|
||||||
|
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
||||||
|
|
||||||
|
elif 'porn' not in webpage.lower() and ('info_dict' in test and
|
||||||
|
'age_limit' in test['info_dict'] and
|
||||||
|
test['info_dict']['age_limit'] == 18):
|
||||||
|
print('\nPotential false negative: {0}'.format(test['name']))
|
||||||
|
|
||||||
|
else:
|
||||||
|
sys.stdout.write('.')
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
print()
|
9
setup.py
9
setup.py
@ -8,8 +8,10 @@ import sys
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
setuptools_available = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from distutils.core import setup
|
from distutils.core import setup
|
||||||
|
setuptools_available = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# This will create an exe that needs Microsoft Visual C++ 2008
|
# This will create an exe that needs Microsoft Visual C++ 2008
|
||||||
@ -43,13 +45,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
|||||||
params = py2exe_params
|
params = py2exe_params
|
||||||
else:
|
else:
|
||||||
params = {
|
params = {
|
||||||
'scripts': ['bin/youtube-dl'],
|
|
||||||
'data_files': [ # Installing system-wide would require sudo...
|
'data_files': [ # Installing system-wide would require sudo...
|
||||||
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
||||||
('share/doc/youtube_dl', ['README.txt']),
|
('share/doc/youtube_dl', ['README.txt']),
|
||||||
('share/man/man1/', ['youtube-dl.1'])
|
('share/man/man1', ['youtube-dl.1'])
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
if setuptools_available:
|
||||||
|
params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
|
||||||
|
else:
|
||||||
|
params['scripts'] = ['bin/youtube-dl']
|
||||||
|
|
||||||
# Get the version from youtube_dl/version.py without importing the package
|
# Get the version from youtube_dl/version.py without importing the package
|
||||||
exec(compile(open('youtube_dl/version.py').read(),
|
exec(compile(open('youtube_dl/version.py').read(),
|
||||||
|
@ -5,13 +5,11 @@ import json
|
|||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import types
|
import types
|
||||||
|
import sys
|
||||||
|
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
|
from youtube_dl.utils import preferredencoding
|
||||||
|
|
||||||
def global_setup():
|
|
||||||
youtube_dl._setup_opener(timeout=10)
|
|
||||||
|
|
||||||
|
|
||||||
def get_params(override=None):
|
def get_params(override=None):
|
||||||
@ -33,6 +31,21 @@ def try_rm(filename):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def report_warning(message):
|
||||||
|
'''
|
||||||
|
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||||
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
|
'''
|
||||||
|
if sys.stderr.isatty() and os.name != 'nt':
|
||||||
|
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||||
|
else:
|
||||||
|
_msg_header = u'WARNING:'
|
||||||
|
output = u'%s %s\n' % (_msg_header, message)
|
||||||
|
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
|
||||||
|
output = output.encode(preferredencoding())
|
||||||
|
sys.stderr.write(output)
|
||||||
|
|
||||||
|
|
||||||
class FakeYDL(YoutubeDL):
|
class FakeYDL(YoutubeDL):
|
||||||
def __init__(self, override=None):
|
def __init__(self, override=None):
|
||||||
# Different instances of the downloader can't share the same dictionary
|
# Different instances of the downloader can't share the same dictionary
|
||||||
|
@ -39,5 +39,6 @@
|
|||||||
"writeinfojson": true,
|
"writeinfojson": true,
|
||||||
"writesubtitles": false,
|
"writesubtitles": false,
|
||||||
"allsubtitles": false,
|
"allsubtitles": false,
|
||||||
"listssubtitles": false
|
"listssubtitles": false,
|
||||||
|
"socket_timeout": 20
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL
|
from test.helper import FakeYDL
|
||||||
|
from youtube_dl import YoutubeDL
|
||||||
|
|
||||||
|
|
||||||
class YDL(FakeYDL):
|
class YDL(FakeYDL):
|
||||||
@ -62,10 +63,10 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
|
|
||||||
def test_format_limit(self):
|
def test_format_limit(self):
|
||||||
formats = [
|
formats = [
|
||||||
{u'format_id': u'meh'},
|
{u'format_id': u'meh', u'url': u'http://example.com/meh'},
|
||||||
{u'format_id': u'good'},
|
{u'format_id': u'good', u'url': u'http://example.com/good'},
|
||||||
{u'format_id': u'great'},
|
{u'format_id': u'great', u'url': u'http://example.com/great'},
|
||||||
{u'format_id': u'excellent'},
|
{u'format_id': u'excellent', u'url': u'http://example.com/exc'},
|
||||||
]
|
]
|
||||||
info_dict = {
|
info_dict = {
|
||||||
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
||||||
@ -128,6 +129,32 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], u'35')
|
self.assertEqual(downloaded['format_id'], u'35')
|
||||||
|
|
||||||
|
def test_add_extra_info(self):
|
||||||
|
test_dict = {
|
||||||
|
'extractor': 'Foo',
|
||||||
|
}
|
||||||
|
extra_info = {
|
||||||
|
'extractor': 'Bar',
|
||||||
|
'playlist': 'funny videos',
|
||||||
|
}
|
||||||
|
YDL.add_extra_info(test_dict, extra_info)
|
||||||
|
self.assertEqual(test_dict['extractor'], 'Foo')
|
||||||
|
self.assertEqual(test_dict['playlist'], 'funny videos')
|
||||||
|
|
||||||
|
def test_prepare_filename(self):
|
||||||
|
info = {
|
||||||
|
u'id': u'1234',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'width': None,
|
||||||
|
}
|
||||||
|
def fname(templ):
|
||||||
|
ydl = YoutubeDL({'outtmpl': templ})
|
||||||
|
return ydl.prepare_filename(info)
|
||||||
|
self.assertEqual(fname(u'%(id)s.%(ext)s'), u'1234.mp4')
|
||||||
|
self.assertEqual(fname(u'%(id)s-%(width)s.%(ext)s'), u'1234-NA.mp4')
|
||||||
|
# Replace missing fields with 'NA'
|
||||||
|
self.assertEqual(fname(u'%(uploader_date)s-%(id)s.%(ext)s'), u'NA-1234.mp4')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -6,8 +6,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import global_setup, try_rm
|
from test.helper import try_rm
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
@ -24,7 +23,7 @@ def _download_restricted(url, filename, age):
|
|||||||
}
|
}
|
||||||
ydl = YoutubeDL(params)
|
ydl = YoutubeDL(params)
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
json_filename = filename + '.info.json'
|
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
||||||
try_rm(json_filename)
|
try_rm(json_filename)
|
||||||
ydl.download([url])
|
ydl.download([url])
|
||||||
res = os.path.exists(json_filename)
|
res = os.path.exists(json_filename)
|
||||||
|
@ -100,11 +100,19 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
def test_keywords(self):
|
def test_keywords(self):
|
||||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
self.assertMatch(':ythistory', ['youtube:history'])
|
||||||
self.assertMatch(':tds', ['ComedyCentral'])
|
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||||
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||||
self.assertMatch(':cr', ['ComedyCentral'])
|
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
|
||||||
|
self.assertMatch(':cr', ['ComedyCentralShows'])
|
||||||
|
|
||||||
|
def test_vimeo_matching(self):
|
||||||
|
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||||
|
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
||||||
|
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/1930
|
||||||
|
def test_soundcloud_not_matching_sets(self):
|
||||||
|
self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -1,70 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
# Allow direct execution
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import unittest
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup, md5
|
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import DailymotionIE
|
|
||||||
|
|
||||||
class TestDailymotionSubtitles(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
self.DL = FakeYDL()
|
|
||||||
self.url = 'http://www.dailymotion.com/video/xczg00'
|
|
||||||
def getInfoDict(self):
|
|
||||||
IE = DailymotionIE(self.DL)
|
|
||||||
info_dict = IE.extract(self.url)
|
|
||||||
return info_dict
|
|
||||||
def getSubtitles(self):
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
return info_dict[0]['subtitles']
|
|
||||||
def test_no_writesubtitles(self):
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
def test_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
|
||||||
def test_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['fr']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
|
||||||
def test_allsubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(len(subtitles.keys()), 5)
|
|
||||||
def test_list_subtitles(self):
|
|
||||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
def test_automatic_captions(self):
|
|
||||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslang'] = ['en']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(len(subtitles.keys()) == 0)
|
|
||||||
def test_nosubtitles(self):
|
|
||||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
|
||||||
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(len(subtitles), 0)
|
|
||||||
def test_multiple_langs(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['es', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
@ -6,8 +6,13 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_params, get_testcases, global_setup, try_rm, md5
|
from test.helper import (
|
||||||
global_setup()
|
get_params,
|
||||||
|
get_testcases,
|
||||||
|
try_rm,
|
||||||
|
md5,
|
||||||
|
report_warning
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
@ -19,10 +24,12 @@ import youtube_dl.YoutubeDL
|
|||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
|
compat_HTTPError,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
)
|
)
|
||||||
|
from youtube_dl.extractor import get_info_extractor
|
||||||
|
|
||||||
RETRIES = 3
|
RETRIES = 3
|
||||||
|
|
||||||
@ -55,17 +62,25 @@ def generator(test_case):
|
|||||||
|
|
||||||
def test_template(self):
|
def test_template(self):
|
||||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||||
|
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
||||||
def print_skipping(reason):
|
def print_skipping(reason):
|
||||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||||
if not ie._WORKING:
|
if not ie.working():
|
||||||
print_skipping('IE marked as not _WORKING')
|
print_skipping('IE marked as not _WORKING')
|
||||||
return
|
return
|
||||||
if 'playlist' not in test_case and not test_case['file']:
|
if 'playlist' not in test_case:
|
||||||
print_skipping('No output file specified')
|
info_dict = test_case.get('info_dict', {})
|
||||||
return
|
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||||
|
print_skipping('The output file cannot be know, the "file" '
|
||||||
|
'key is missing or the info_dict is incomplete')
|
||||||
|
return
|
||||||
if 'skip' in test_case:
|
if 'skip' in test_case:
|
||||||
print_skipping(test_case['skip'])
|
print_skipping(test_case['skip'])
|
||||||
return
|
return
|
||||||
|
for other_ie in other_ies:
|
||||||
|
if not other_ie.working():
|
||||||
|
print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
|
||||||
|
return
|
||||||
|
|
||||||
params = get_params(test_case.get('params', {}))
|
params = get_params(test_case.get('params', {}))
|
||||||
|
|
||||||
@ -77,35 +92,48 @@ def generator(test_case):
|
|||||||
finished_hook_called.add(status['filename'])
|
finished_hook_called.add(status['filename'])
|
||||||
ydl.fd.add_progress_hook(_hook)
|
ydl.fd.add_progress_hook(_hook)
|
||||||
|
|
||||||
|
def get_tc_filename(tc):
|
||||||
|
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||||
|
|
||||||
test_cases = test_case.get('playlist', [test_case])
|
test_cases = test_case.get('playlist', [test_case])
|
||||||
for tc in test_cases:
|
def try_rm_tcs_files():
|
||||||
try_rm(tc['file'])
|
for tc in test_cases:
|
||||||
try_rm(tc['file'] + '.part')
|
tc_filename = get_tc_filename(tc)
|
||||||
try_rm(tc['file'] + '.info.json')
|
try_rm(tc_filename)
|
||||||
|
try_rm(tc_filename + '.part')
|
||||||
|
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
||||||
|
try_rm_tcs_files()
|
||||||
try:
|
try:
|
||||||
for retry in range(1, RETRIES + 1):
|
try_num = 1
|
||||||
|
while True:
|
||||||
try:
|
try:
|
||||||
ydl.download([test_case['url']])
|
ydl.download([test_case['url']])
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
if retry == RETRIES: raise
|
|
||||||
|
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
|
if try_num == RETRIES:
|
||||||
|
report_warning(u'Failed due to network errors, skipping...')
|
||||||
|
return
|
||||||
|
|
||||||
|
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
|
||||||
|
|
||||||
|
try_num += 1
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
|
tc_filename = get_tc_filename(tc)
|
||||||
if not test_case.get('params', {}).get('skip_download', False):
|
if not test_case.get('params', {}).get('skip_download', False):
|
||||||
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
|
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||||
self.assertTrue(tc['file'] in finished_hook_called)
|
self.assertTrue(tc_filename in finished_hook_called)
|
||||||
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
|
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||||
|
self.assertTrue(os.path.exists(info_json_fn))
|
||||||
if 'md5' in tc:
|
if 'md5' in tc:
|
||||||
md5_for_file = _file_md5(tc['file'])
|
md5_for_file = _file_md5(tc_filename)
|
||||||
self.assertEqual(md5_for_file, tc['md5'])
|
self.assertEqual(md5_for_file, tc['md5'])
|
||||||
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
|
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
@ -125,11 +153,11 @@ def generator(test_case):
|
|||||||
# Check for the presence of mandatory fields
|
# Check for the presence of mandatory fields
|
||||||
for key in ('id', 'url', 'title', 'ext'):
|
for key in ('id', 'url', 'title', 'ext'):
|
||||||
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
||||||
|
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||||
|
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||||
|
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
|
||||||
finally:
|
finally:
|
||||||
for tc in test_cases:
|
try_rm_tcs_files()
|
||||||
try_rm(tc['file'])
|
|
||||||
try_rm(tc['file'] + '.part')
|
|
||||||
try_rm(tc['file'] + '.info.json')
|
|
||||||
|
|
||||||
return test_template
|
return test_template
|
||||||
|
|
||||||
|
@ -8,18 +8,25 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup
|
from test.helper import FakeYDL
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
DailymotionUserIE,
|
DailymotionUserIE,
|
||||||
VimeoChannelIE,
|
VimeoChannelIE,
|
||||||
|
VimeoUserIE,
|
||||||
|
VimeoAlbumIE,
|
||||||
|
VimeoGroupsIE,
|
||||||
UstreamChannelIE,
|
UstreamChannelIE,
|
||||||
|
SoundcloudSetIE,
|
||||||
SoundcloudUserIE,
|
SoundcloudUserIE,
|
||||||
LivestreamIE,
|
LivestreamIE,
|
||||||
NHLVideocenterIE,
|
NHLVideocenterIE,
|
||||||
|
BambuserChannelIE,
|
||||||
|
BandcampAlbumIE,
|
||||||
|
SmotriCommunityIE,
|
||||||
|
SmotriUserIE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -52,6 +59,30 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['title'], u'Vimeo Tributes')
|
self.assertEqual(result['title'], u'Vimeo Tributes')
|
||||||
self.assertTrue(len(result['entries']) > 24)
|
self.assertTrue(len(result['entries']) > 24)
|
||||||
|
|
||||||
|
def test_vimeo_user(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = VimeoUserIE(dl)
|
||||||
|
result = ie.extract('http://vimeo.com/nkistudio/videos')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'Nki')
|
||||||
|
self.assertTrue(len(result['entries']) > 65)
|
||||||
|
|
||||||
|
def test_vimeo_album(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = VimeoAlbumIE(dl)
|
||||||
|
result = ie.extract('http://vimeo.com/album/2632481')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'Staff Favorites: November 2013')
|
||||||
|
self.assertTrue(len(result['entries']) > 12)
|
||||||
|
|
||||||
|
def test_vimeo_groups(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = VimeoGroupsIE(dl)
|
||||||
|
result = ie.extract('http://vimeo.com/groups/rolexawards')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'Rolex Awards for Enterprise')
|
||||||
|
self.assertTrue(len(result['entries']) > 72)
|
||||||
|
|
||||||
def test_ustream_channel(self):
|
def test_ustream_channel(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = UstreamChannelIE(dl)
|
ie = UstreamChannelIE(dl)
|
||||||
@ -60,6 +91,14 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['id'], u'5124905')
|
self.assertEqual(result['id'], u'5124905')
|
||||||
self.assertTrue(len(result['entries']) >= 11)
|
self.assertTrue(len(result['entries']) >= 11)
|
||||||
|
|
||||||
|
def test_soundcloud_set(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = SoundcloudSetIE(dl)
|
||||||
|
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'The Royal Concept EP')
|
||||||
|
self.assertTrue(len(result['entries']) >= 6)
|
||||||
|
|
||||||
def test_soundcloud_user(self):
|
def test_soundcloud_user(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = SoundcloudUserIE(dl)
|
ie = SoundcloudUserIE(dl)
|
||||||
@ -85,5 +124,39 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['title'], u'Highlights')
|
self.assertEqual(result['title'], u'Highlights')
|
||||||
self.assertEqual(len(result['entries']), 12)
|
self.assertEqual(len(result['entries']), 12)
|
||||||
|
|
||||||
|
def test_bambuser_channel(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = BambuserChannelIE(dl)
|
||||||
|
result = ie.extract('http://bambuser.com/channel/pixelversity')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'pixelversity')
|
||||||
|
self.assertTrue(len(result['entries']) >= 60)
|
||||||
|
|
||||||
|
def test_bandcamp_album(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = BandcampAlbumIE(dl)
|
||||||
|
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], u'Nightmare Night EP')
|
||||||
|
self.assertTrue(len(result['entries']) >= 4)
|
||||||
|
|
||||||
|
def test_smotri_community(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = SmotriCommunityIE(dl)
|
||||||
|
result = ie.extract('http://smotri.com/community/video/kommuna')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], u'kommuna')
|
||||||
|
self.assertEqual(result['title'], u'КПРФ')
|
||||||
|
self.assertTrue(len(result['entries']) >= 4)
|
||||||
|
|
||||||
|
def test_smotri_user(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = SmotriUserIE(dl)
|
||||||
|
result = ie.extract('http://smotri.com/user/inspector')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], u'inspector')
|
||||||
|
self.assertEqual(result['title'], u'Inspector')
|
||||||
|
self.assertTrue(len(result['entries']) >= 9)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
210
test/test_subtitles.py
Normal file
210
test/test_subtitles.py
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import FakeYDL, md5
|
||||||
|
|
||||||
|
|
||||||
|
from youtube_dl.extractor import (
|
||||||
|
YoutubeIE,
|
||||||
|
DailymotionIE,
|
||||||
|
TEDIE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTestSubtitles(unittest.TestCase):
|
||||||
|
url = None
|
||||||
|
IE = None
|
||||||
|
def setUp(self):
|
||||||
|
self.DL = FakeYDL()
|
||||||
|
self.ie = self.IE(self.DL)
|
||||||
|
|
||||||
|
def getInfoDict(self):
|
||||||
|
info_dict = self.ie.extract(self.url)
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
def getSubtitles(self):
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
return info_dict['subtitles']
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'QRS8MkLhQmM'
|
||||||
|
IE = YoutubeIE
|
||||||
|
|
||||||
|
def getSubtitles(self):
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
return info_dict[0]['subtitles']
|
||||||
|
|
||||||
|
def test_youtube_no_writesubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = False
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_youtube_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||||
|
|
||||||
|
def test_youtube_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['it']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||||
|
|
||||||
|
def test_youtube_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles.keys()), 13)
|
||||||
|
|
||||||
|
def test_youtube_subtitles_sbv_format(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitlesformat'] = 'sbv'
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||||
|
|
||||||
|
def test_youtube_subtitles_vtt_format(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitlesformat'] = 'vtt'
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||||
|
|
||||||
|
def test_youtube_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_youtube_automatic_captions(self):
|
||||||
|
self.url = '8YoUxe5ncPo'
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['it']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(subtitles['it'] is not None)
|
||||||
|
|
||||||
|
def test_youtube_nosubtitles(self):
|
||||||
|
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||||
|
self.url = 'sAjKT8FhjI8'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
def test_youtube_multiple_langs(self):
|
||||||
|
self.url = 'QRS8MkLhQmM'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['it', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.dailymotion.com/video/xczg00'
|
||||||
|
IE = DailymotionIE
|
||||||
|
|
||||||
|
def test_no_writesubtitles(self):
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||||
|
|
||||||
|
def test_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['fr']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles.keys()), 5)
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_automatic_captions(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslang'] = ['en']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(len(subtitles.keys()) == 0)
|
||||||
|
|
||||||
|
def test_nosubtitles(self):
|
||||||
|
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||||
|
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
def test_multiple_langs(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['es', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTedSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||||
|
IE = TEDIE
|
||||||
|
|
||||||
|
def test_no_writesubtitles(self):
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
|
||||||
|
|
||||||
|
def test_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['fr']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles.keys()), 28)
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_automatic_captions(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslang'] = ['en']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(len(subtitles.keys()) == 0)
|
||||||
|
|
||||||
|
def test_multiple_langs(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['es', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -24,6 +24,9 @@ from youtube_dl.utils import (
|
|||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
shell_quote,
|
||||||
|
encodeFilename,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
@ -170,6 +173,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(res_url, url)
|
self.assertEqual(res_url, url)
|
||||||
self.assertEqual(res_data, None)
|
self.assertEqual(res_data, None)
|
||||||
|
|
||||||
|
def test_shell_quote(self):
|
||||||
|
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
|
||||||
|
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||||
|
|
||||||
|
def test_str_to_int(self):
|
||||||
|
self.assertEqual(str_to_int('123,456'), 123456)
|
||||||
|
self.assertEqual(str_to_int('123.456'), 123456)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -7,8 +7,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_params, global_setup, try_rm
|
from test.helper import get_params, try_rm
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
@ -7,8 +7,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import get_params, global_setup
|
from test.helper import get_params
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
@ -31,9 +30,10 @@ params = get_params({
|
|||||||
|
|
||||||
|
|
||||||
TEST_ID = 'BaW_jenozKc'
|
TEST_ID = 'BaW_jenozKc'
|
||||||
INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
|
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||||
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
||||||
|
test URL: https://github.com/rg3/youtube-dl/issues/1892
|
||||||
|
|
||||||
This is a test video for youtube-dl.
|
This is a test video for youtube-dl.
|
||||||
|
|
||||||
|
@ -6,8 +6,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup
|
from test.helper import FakeYDL
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
@ -16,6 +15,7 @@ from youtube_dl.extractor import (
|
|||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
YoutubeShowIE,
|
||||||
|
YoutubeTopListIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
def test_youtube_playlist(self):
|
def test_youtube_playlist(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], 'ytdl test PL')
|
self.assertEqual(result['title'], 'ytdl test PL')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||||
@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
def test_issue_673(self):
|
def test_issue_673(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('PLBB231211A4F62143')[0]
|
result = ie.extract('PLBB231211A4F62143')
|
||||||
self.assertTrue(len(result['entries']) > 25)
|
self.assertTrue(len(result['entries']) > 25)
|
||||||
|
|
||||||
def test_youtube_playlist_long(self):
|
def test_youtube_playlist_long(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertTrue(len(result['entries']) >= 799)
|
self.assertTrue(len(result['entries']) >= 799)
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
#651
|
#651
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||||
@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
def test_youtube_playlist_empty(self):
|
def test_youtube_playlist_empty(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(len(result['entries']), 0)
|
self.assertEqual(len(result['entries']), 0)
|
||||||
|
|
||||||
@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
# TODO find a > 100 (paginating?) videos course
|
# TODO find a > 100 (paginating?) videos course
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
|
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
@ -84,22 +84,22 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeChannelIE(dl)
|
ie = YoutubeChannelIE(dl)
|
||||||
#test paginated channel
|
#test paginated channel
|
||||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
|
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
||||||
self.assertTrue(len(result['entries']) > 90)
|
self.assertTrue(len(result['entries']) > 90)
|
||||||
#test autogenerated channel
|
#test autogenerated channel
|
||||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
|
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
self.assertTrue(len(result['entries']) >= 18)
|
self.assertTrue(len(result['entries']) >= 18)
|
||||||
|
|
||||||
def test_youtube_user(self):
|
def test_youtube_user(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeUserIE(dl)
|
ie = YoutubeUserIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
|
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||||
self.assertTrue(len(result['entries']) >= 320)
|
self.assertTrue(len(result['entries']) >= 320)
|
||||||
|
|
||||||
def test_youtube_safe_search(self):
|
def test_youtube_safe_search(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
|
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
|
||||||
self.assertEqual(len(result['entries']), 2)
|
self.assertEqual(len(result['entries']), 2)
|
||||||
|
|
||||||
def test_youtube_show(self):
|
def test_youtube_show(self):
|
||||||
@ -108,5 +108,21 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||||
self.assertTrue(len(result) >= 3)
|
self.assertTrue(len(result) >= 3)
|
||||||
|
|
||||||
|
def test_youtube_mix(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubePlaylistIE(dl)
|
||||||
|
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertTrue(len(entries) >= 20)
|
||||||
|
original_video = entries[0]
|
||||||
|
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||||
|
|
||||||
|
def test_youtube_toplist(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubeTopListIE(dl)
|
||||||
|
result = ie.extract('yttoplist:music:Top Tracks')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertTrue(len(entries) >= 5)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -6,9 +6,6 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import global_setup
|
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
|
@ -1,95 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
# Allow direct execution
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import unittest
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
from test.helper import FakeYDL, global_setup, md5
|
|
||||||
global_setup()
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import YoutubeIE
|
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeSubtitles(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
self.DL = FakeYDL()
|
|
||||||
self.url = 'QRS8MkLhQmM'
|
|
||||||
|
|
||||||
def getInfoDict(self):
|
|
||||||
IE = YoutubeIE(self.DL)
|
|
||||||
info_dict = IE.extract(self.url)
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
def getSubtitles(self):
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
return info_dict[0]['subtitles']
|
|
||||||
|
|
||||||
def test_youtube_no_writesubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = False
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_youtube_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
|
||||||
|
|
||||||
def test_youtube_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['it']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
|
||||||
|
|
||||||
def test_youtube_allsubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(len(subtitles.keys()), 13)
|
|
||||||
|
|
||||||
def test_youtube_subtitles_sbv_format(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitlesformat'] = 'sbv'
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
|
||||||
|
|
||||||
def test_youtube_subtitles_vtt_format(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitlesformat'] = 'vtt'
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
|
||||||
|
|
||||||
def test_youtube_list_subtitles(self):
|
|
||||||
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_youtube_automatic_captions(self):
|
|
||||||
self.url = '8YoUxe5ncPo'
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['it']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(subtitles['it'] is not None)
|
|
||||||
|
|
||||||
def test_youtube_nosubtitles(self):
|
|
||||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
|
||||||
self.url = 'sAjKT8FhjI8'
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(len(subtitles), 0)
|
|
||||||
|
|
||||||
def test_youtube_multiple_langs(self):
|
|
||||||
self.url = 'QRS8MkLhQmM'
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['it', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
@ -1,15 +1,19 @@
|
|||||||
import math
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import traceback
|
|
||||||
|
|
||||||
if os.name == 'nt':
|
from .utils import (
|
||||||
import ctypes
|
compat_urllib_error,
|
||||||
|
compat_urllib_request,
|
||||||
from .utils import *
|
ContentTooShortError,
|
||||||
|
determine_ext,
|
||||||
|
encodeFilename,
|
||||||
|
format_bytes,
|
||||||
|
sanitize_open,
|
||||||
|
timeconvert,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FileDownloader(object):
|
class FileDownloader(object):
|
||||||
@ -49,20 +53,6 @@ class FileDownloader(object):
|
|||||||
self._progress_hooks = []
|
self._progress_hooks = []
|
||||||
self.params = params
|
self.params = params
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def format_bytes(bytes):
|
|
||||||
if bytes is None:
|
|
||||||
return 'N/A'
|
|
||||||
if type(bytes) is str:
|
|
||||||
bytes = float(bytes)
|
|
||||||
if bytes == 0.0:
|
|
||||||
exponent = 0
|
|
||||||
else:
|
|
||||||
exponent = int(math.log(bytes, 1024.0))
|
|
||||||
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
|
|
||||||
converted = float(bytes) / float(1024 ** exponent)
|
|
||||||
return '%.2f%s' % (converted, suffix)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_seconds(seconds):
|
def format_seconds(seconds):
|
||||||
(mins, secs) = divmod(seconds, 60)
|
(mins, secs) = divmod(seconds, 60)
|
||||||
@ -113,7 +103,7 @@ class FileDownloader(object):
|
|||||||
def format_speed(speed):
|
def format_speed(speed):
|
||||||
if speed is None:
|
if speed is None:
|
||||||
return '%10s' % '---b/s'
|
return '%10s' % '---b/s'
|
||||||
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
|
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def best_block_size(elapsed_time, bytes):
|
def best_block_size(elapsed_time, bytes):
|
||||||
@ -144,16 +134,8 @@ class FileDownloader(object):
|
|||||||
def to_stderr(self, message):
|
def to_stderr(self, message):
|
||||||
self.ydl.to_screen(message)
|
self.ydl.to_screen(message)
|
||||||
|
|
||||||
def to_cons_title(self, message):
|
def to_console_title(self, message):
|
||||||
"""Set console/terminal window title to message."""
|
self.ydl.to_console_title(message)
|
||||||
if not self.params.get('consoletitle', False):
|
|
||||||
return
|
|
||||||
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
|
||||||
# c_wchar_p() might not be necessary if `message` is
|
|
||||||
# already of type unicode()
|
|
||||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
|
||||||
elif 'TERM' in os.environ:
|
|
||||||
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
|
|
||||||
|
|
||||||
def trouble(self, *args, **kargs):
|
def trouble(self, *args, **kargs):
|
||||||
self.ydl.trouble(*args, **kargs)
|
self.ydl.trouble(*args, **kargs)
|
||||||
@ -194,7 +176,7 @@ class FileDownloader(object):
|
|||||||
if old_filename == new_filename:
|
if old_filename == new_filename:
|
||||||
return
|
return
|
||||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError):
|
||||||
self.report_error(u'unable to rename file')
|
self.report_error(u'unable to rename file')
|
||||||
|
|
||||||
def try_utime(self, filename, last_modified_hdr):
|
def try_utime(self, filename, last_modified_hdr):
|
||||||
@ -222,22 +204,59 @@ class FileDownloader(object):
|
|||||||
"""Report destination filename."""
|
"""Report destination filename."""
|
||||||
self.to_screen(u'[download] Destination: ' + filename)
|
self.to_screen(u'[download] Destination: ' + filename)
|
||||||
|
|
||||||
|
def _report_progress_status(self, msg, is_last_line=False):
|
||||||
|
fullmsg = u'[download] ' + msg
|
||||||
|
if self.params.get('progress_with_newline', False):
|
||||||
|
self.to_screen(fullmsg)
|
||||||
|
else:
|
||||||
|
if os.name == 'nt':
|
||||||
|
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||||
|
0)
|
||||||
|
if prev_len > len(fullmsg):
|
||||||
|
fullmsg += u' ' * (prev_len - len(fullmsg))
|
||||||
|
self._report_progress_prev_line_length = len(fullmsg)
|
||||||
|
clear_line = u'\r'
|
||||||
|
else:
|
||||||
|
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
|
||||||
|
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||||
|
self.to_console_title(u'youtube-dl ' + msg)
|
||||||
|
|
||||||
def report_progress(self, percent, data_len_str, speed, eta):
|
def report_progress(self, percent, data_len_str, speed, eta):
|
||||||
"""Report download progress."""
|
"""Report download progress."""
|
||||||
if self.params.get('noprogress', False):
|
if self.params.get('noprogress', False):
|
||||||
return
|
return
|
||||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
if eta is not None:
|
||||||
eta_str = self.format_eta(eta)
|
eta_str = self.format_eta(eta)
|
||||||
percent_str = self.format_percent(percent)
|
|
||||||
speed_str = self.format_speed(speed)
|
|
||||||
if self.params.get('progress_with_newline', False):
|
|
||||||
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
|
||||||
(percent_str, data_len_str, speed_str, eta_str))
|
|
||||||
else:
|
else:
|
||||||
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
|
eta_str = 'Unknown ETA'
|
||||||
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
if percent is not None:
|
||||||
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
percent_str = self.format_percent(percent)
|
||||||
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
|
else:
|
||||||
|
percent_str = 'Unknown %'
|
||||||
|
speed_str = self.format_speed(speed)
|
||||||
|
|
||||||
|
msg = (u'%s of %s at %s ETA %s' %
|
||||||
|
(percent_str, data_len_str, speed_str, eta_str))
|
||||||
|
self._report_progress_status(msg)
|
||||||
|
|
||||||
|
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
||||||
|
if self.params.get('noprogress', False):
|
||||||
|
return
|
||||||
|
downloaded_str = format_bytes(downloaded_data_len)
|
||||||
|
speed_str = self.format_speed(speed)
|
||||||
|
elapsed_str = FileDownloader.format_seconds(elapsed)
|
||||||
|
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
||||||
|
self._report_progress_status(msg)
|
||||||
|
|
||||||
|
def report_finish(self, data_len_str, tot_time):
|
||||||
|
"""Report download finished."""
|
||||||
|
if self.params.get('noprogress', False):
|
||||||
|
self.to_screen(u'[download] Download completed')
|
||||||
|
else:
|
||||||
|
self._report_progress_status(
|
||||||
|
(u'100%% of %s in %s' %
|
||||||
|
(data_len_str, self.format_seconds(tot_time))),
|
||||||
|
is_last_line=True)
|
||||||
|
|
||||||
def report_resuming_byte(self, resume_len):
|
def report_resuming_byte(self, resume_len):
|
||||||
"""Report attempt to resume at given byte."""
|
"""Report attempt to resume at given byte."""
|
||||||
@ -251,23 +270,85 @@ class FileDownloader(object):
|
|||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
try:
|
try:
|
||||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||||
except (UnicodeEncodeError) as err:
|
except UnicodeEncodeError:
|
||||||
self.to_screen(u'[download] The file has already been downloaded')
|
self.to_screen(u'[download] The file has already been downloaded')
|
||||||
|
|
||||||
def report_unable_to_resume(self):
|
def report_unable_to_resume(self):
|
||||||
"""Report it was impossible to resume download."""
|
"""Report it was impossible to resume download."""
|
||||||
self.to_screen(u'[download] Unable to resume')
|
self.to_screen(u'[download] Unable to resume')
|
||||||
|
|
||||||
def report_finish(self, data_len_str, tot_time):
|
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
|
||||||
"""Report download finished."""
|
def run_rtmpdump(args):
|
||||||
if self.params.get('noprogress', False):
|
start = time.time()
|
||||||
self.to_screen(u'[download] Download completed')
|
resume_percent = None
|
||||||
else:
|
resume_downloaded_data_len = None
|
||||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||||
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
|
cursor_in_new_line = True
|
||||||
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
proc_stderr_closed = False
|
||||||
|
while not proc_stderr_closed:
|
||||||
|
# read line from stderr
|
||||||
|
line = u''
|
||||||
|
while True:
|
||||||
|
char = proc.stderr.read(1)
|
||||||
|
if not char:
|
||||||
|
proc_stderr_closed = True
|
||||||
|
break
|
||||||
|
if char in [b'\r', b'\n']:
|
||||||
|
break
|
||||||
|
line += char.decode('ascii', 'replace')
|
||||||
|
if not line:
|
||||||
|
# proc_stderr_closed is True
|
||||||
|
continue
|
||||||
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||||
|
if mobj:
|
||||||
|
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||||
|
percent = float(mobj.group(2))
|
||||||
|
if not resume_percent:
|
||||||
|
resume_percent = percent
|
||||||
|
resume_downloaded_data_len = downloaded_data_len
|
||||||
|
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||||
|
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||||
|
data_len = None
|
||||||
|
if percent > 0:
|
||||||
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
|
data_len_str = u'~' + format_bytes(data_len)
|
||||||
|
self.report_progress(percent, data_len_str, speed, eta)
|
||||||
|
cursor_in_new_line = False
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': downloaded_data_len,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'downloading',
|
||||||
|
'eta': eta,
|
||||||
|
'speed': speed,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# no percent for live streams
|
||||||
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||||
|
if mobj:
|
||||||
|
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||||
|
time_now = time.time()
|
||||||
|
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||||
|
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
||||||
|
cursor_in_new_line = False
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': downloaded_data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'downloading',
|
||||||
|
'speed': speed,
|
||||||
|
})
|
||||||
|
elif self.params.get('verbose', False):
|
||||||
|
if not cursor_in_new_line:
|
||||||
|
self.to_screen(u'')
|
||||||
|
cursor_in_new_line = True
|
||||||
|
self.to_screen(u'[rtmpdump] '+line)
|
||||||
|
proc.wait()
|
||||||
|
if not cursor_in_new_line:
|
||||||
|
self.to_screen(u'')
|
||||||
|
return proc.returncode
|
||||||
|
|
||||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
|
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
test = self.params.get('test', False)
|
test = self.params.get('test', False)
|
||||||
@ -278,12 +359,11 @@ class FileDownloader(object):
|
|||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||||
return False
|
return False
|
||||||
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
|
|
||||||
|
|
||||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||||
# the connection was interrumpted and resuming appears to be
|
# the connection was interrumpted and resuming appears to be
|
||||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||||
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
|
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||||
if player_url is not None:
|
if player_url is not None:
|
||||||
basic_args += ['--swfVfy', player_url]
|
basic_args += ['--swfVfy', player_url]
|
||||||
if page_url is not None:
|
if page_url is not None:
|
||||||
@ -294,31 +374,53 @@ class FileDownloader(object):
|
|||||||
basic_args += ['--tcUrl', url]
|
basic_args += ['--tcUrl', url]
|
||||||
if test:
|
if test:
|
||||||
basic_args += ['--stop', '1']
|
basic_args += ['--stop', '1']
|
||||||
|
if live:
|
||||||
|
basic_args += ['--live']
|
||||||
|
if conn:
|
||||||
|
basic_args += ['--conn', conn]
|
||||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||||
|
|
||||||
|
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||||
|
# Windows subprocess module does not actually support Unicode
|
||||||
|
# on Python 2.x
|
||||||
|
# See http://stackoverflow.com/a/9951851/35070
|
||||||
|
subprocess_encoding = sys.getfilesystemencoding()
|
||||||
|
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||||
|
else:
|
||||||
|
subprocess_encoding = None
|
||||||
|
|
||||||
if self.params.get('verbose', False):
|
if self.params.get('verbose', False):
|
||||||
|
if subprocess_encoding:
|
||||||
|
str_args = [
|
||||||
|
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||||
|
for a in args]
|
||||||
|
else:
|
||||||
|
str_args = args
|
||||||
try:
|
try:
|
||||||
import pipes
|
import pipes
|
||||||
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
|
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||||
except ImportError:
|
except ImportError:
|
||||||
shell_quote = repr
|
shell_quote = repr
|
||||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
|
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||||
retval = subprocess.call(args)
|
|
||||||
|
retval = run_rtmpdump(args)
|
||||||
|
|
||||||
while (retval == 2 or retval == 1) and not test:
|
while (retval == 2 or retval == 1) and not test:
|
||||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
|
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||||
time.sleep(5.0) # This seems to be needed
|
time.sleep(5.0) # This seems to be needed
|
||||||
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
if prevsize == cursize and retval == 1:
|
if prevsize == cursize and retval == 1:
|
||||||
break
|
break
|
||||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||||
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||||
retval = 0
|
retval = 0
|
||||||
break
|
break
|
||||||
if retval == 0 or (test and retval == 2):
|
if retval == 0 or (test and retval == 2):
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
|
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': fsize,
|
'downloaded_bytes': fsize,
|
||||||
@ -366,15 +468,20 @@ class FileDownloader(object):
|
|||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
|
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||||
# Check for ffmpeg first
|
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||||
try:
|
|
||||||
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
|
|
||||||
return False
|
|
||||||
|
|
||||||
retval = subprocess.call(args)
|
for program in ['avconv', 'ffmpeg']:
|
||||||
|
try:
|
||||||
|
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||||
|
break
|
||||||
|
except (OSError, IOError):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||||
|
cmd = [program] + args
|
||||||
|
|
||||||
|
retval = subprocess.call(cmd)
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||||
@ -411,7 +518,9 @@ class FileDownloader(object):
|
|||||||
info_dict.get('player_url', None),
|
info_dict.get('player_url', None),
|
||||||
info_dict.get('page_url', None),
|
info_dict.get('page_url', None),
|
||||||
info_dict.get('play_path', None),
|
info_dict.get('play_path', None),
|
||||||
info_dict.get('tc_url', None))
|
info_dict.get('tc_url', None),
|
||||||
|
info_dict.get('rtmp_live', False),
|
||||||
|
info_dict.get('rtmp_conn', None))
|
||||||
|
|
||||||
# Attempt to download using mplayer
|
# Attempt to download using mplayer
|
||||||
if url.startswith('mms') or url.startswith('rtsp'):
|
if url.startswith('mms') or url.startswith('rtsp'):
|
||||||
@ -515,7 +624,7 @@ class FileDownloader(object):
|
|||||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
data_len_str = self.format_bytes(data_len)
|
data_len_str = format_bytes(data_len)
|
||||||
byte_counter = 0 + resume_len
|
byte_counter = 0 + resume_len
|
||||||
block_size = self.params.get('buffersize', 1024)
|
block_size = self.params.get('buffersize', 1024)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
@ -550,12 +659,11 @@ class FileDownloader(object):
|
|||||||
# Progress message
|
# Progress message
|
||||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||||
if data_len is None:
|
if data_len is None:
|
||||||
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
|
eta = percent = None
|
||||||
eta = None
|
|
||||||
else:
|
else:
|
||||||
percent = self.calc_percent(byte_counter, data_len)
|
percent = self.calc_percent(byte_counter, data_len)
|
||||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
self.report_progress(percent, data_len_str, speed, eta)
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
|
@ -501,7 +501,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
|
|
||||||
options = ['-c', 'copy']
|
options = ['-c', 'copy']
|
||||||
for (name, value) in metadata.items():
|
for (name, value) in metadata.items():
|
||||||
options.extend(['-metadata', '%s="%s"' % (name, value)])
|
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||||
options.extend(['-f', ext])
|
options.extend(['-f', ext])
|
||||||
|
|
||||||
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
|
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||||
|
@ -3,19 +3,56 @@
|
|||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import collections
|
||||||
import errno
|
import errno
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import subprocess
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from .utils import *
|
if os.name == 'nt':
|
||||||
|
import ctypes
|
||||||
|
|
||||||
|
from .utils import (
|
||||||
|
compat_cookiejar,
|
||||||
|
compat_http_client,
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_error,
|
||||||
|
compat_urllib_request,
|
||||||
|
ContentTooShortError,
|
||||||
|
date_from_str,
|
||||||
|
DateRange,
|
||||||
|
determine_ext,
|
||||||
|
DownloadError,
|
||||||
|
encodeFilename,
|
||||||
|
ExtractorError,
|
||||||
|
format_bytes,
|
||||||
|
get_term_width,
|
||||||
|
locked_file,
|
||||||
|
make_HTTPS_handler,
|
||||||
|
MaxDownloadsReached,
|
||||||
|
PostProcessingError,
|
||||||
|
platform_name,
|
||||||
|
preferredencoding,
|
||||||
|
SameFileError,
|
||||||
|
sanitize_filename,
|
||||||
|
subtitles_filename,
|
||||||
|
takewhile_inclusive,
|
||||||
|
UnavailableVideoError,
|
||||||
|
write_json_file,
|
||||||
|
write_string,
|
||||||
|
YoutubeDLHandler,
|
||||||
|
)
|
||||||
from .extractor import get_info_extractor, gen_extractors
|
from .extractor import get_info_extractor, gen_extractors
|
||||||
from .FileDownloader import FileDownloader
|
from .FileDownloader import FileDownloader
|
||||||
|
from .version import __version__
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(object):
|
class YoutubeDL(object):
|
||||||
@ -57,6 +94,7 @@ class YoutubeDL(object):
|
|||||||
forcethumbnail: Force printing thumbnail URL.
|
forcethumbnail: Force printing thumbnail URL.
|
||||||
forcedescription: Force printing description.
|
forcedescription: Force printing description.
|
||||||
forcefilename: Force printing final filename.
|
forcefilename: Force printing final filename.
|
||||||
|
forcejson: Force printing info_dict as JSON.
|
||||||
simulate: Do not download the video files.
|
simulate: Do not download the video files.
|
||||||
format: Video format code.
|
format: Video format code.
|
||||||
format_limit: Highest quality format to try.
|
format_limit: Highest quality format to try.
|
||||||
@ -68,6 +106,7 @@ class YoutubeDL(object):
|
|||||||
playlistend: Playlist item to end at.
|
playlistend: Playlist item to end at.
|
||||||
matchtitle: Download only matching titles.
|
matchtitle: Download only matching titles.
|
||||||
rejecttitle: Reject downloads for matching titles.
|
rejecttitle: Reject downloads for matching titles.
|
||||||
|
logger: Log messages to a logging.Logger instance.
|
||||||
logtostderr: Log messages to stderr instead of stdout.
|
logtostderr: Log messages to stderr instead of stdout.
|
||||||
writedescription: Write the video description to a .description file
|
writedescription: Write the video description to a .description file
|
||||||
writeinfojson: Write the video description to a .info.json file
|
writeinfojson: Write the video description to a .info.json file
|
||||||
@ -88,9 +127,15 @@ class YoutubeDL(object):
|
|||||||
noplaylist: Download single video instead of a playlist if in doubt.
|
noplaylist: Download single video instead of a playlist if in doubt.
|
||||||
age_limit: An integer representing the user's age in years.
|
age_limit: An integer representing the user's age in years.
|
||||||
Unsuitable videos for the given age are skipped.
|
Unsuitable videos for the given age are skipped.
|
||||||
downloadarchive: File name of a file where all downloads are recorded.
|
download_archive: File name of a file where all downloads are recorded.
|
||||||
Videos already present in the file are not downloaded
|
Videos already present in the file are not downloaded
|
||||||
again.
|
again.
|
||||||
|
cookiefile: File name where cookies should be read from and dumped to.
|
||||||
|
nocheckcertificate:Do not verify SSL certificates
|
||||||
|
proxy: URL of the proxy server to use
|
||||||
|
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||||
|
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||||
|
support, using fridibi
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@ -105,7 +150,7 @@ class YoutubeDL(object):
|
|||||||
_num_downloads = None
|
_num_downloads = None
|
||||||
_screen_file = None
|
_screen_file = None
|
||||||
|
|
||||||
def __init__(self, params):
|
def __init__(self, params=None):
|
||||||
"""Create a FileDownloader object with the given options."""
|
"""Create a FileDownloader object with the given options."""
|
||||||
self._ies = []
|
self._ies = []
|
||||||
self._ies_instances = {}
|
self._ies_instances = {}
|
||||||
@ -114,6 +159,29 @@ class YoutubeDL(object):
|
|||||||
self._download_retcode = 0
|
self._download_retcode = 0
|
||||||
self._num_downloads = 0
|
self._num_downloads = 0
|
||||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||||
|
self._err_file = sys.stderr
|
||||||
|
self.params = {} if params is None else params
|
||||||
|
|
||||||
|
if params.get('bidi_workaround', False):
|
||||||
|
try:
|
||||||
|
import pty
|
||||||
|
master, slave = pty.openpty()
|
||||||
|
width = get_term_width()
|
||||||
|
if width is None:
|
||||||
|
width_args = []
|
||||||
|
else:
|
||||||
|
width_args = ['-w', str(width)]
|
||||||
|
self._fribidi = subprocess.Popen(
|
||||||
|
['fribidi', '-c', 'UTF-8'] + width_args,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=slave,
|
||||||
|
stderr=self._err_file)
|
||||||
|
self._fribidi_channel = os.fdopen(master, 'rb')
|
||||||
|
except OSError as ose:
|
||||||
|
if ose.errno == 2:
|
||||||
|
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
||||||
@ -123,14 +191,15 @@ class YoutubeDL(object):
|
|||||||
u'Assuming --restrict-filenames since file system encoding '
|
u'Assuming --restrict-filenames since file system encoding '
|
||||||
u'cannot encode all charactes. '
|
u'cannot encode all charactes. '
|
||||||
u'Set the LC_ALL environment variable to fix this.')
|
u'Set the LC_ALL environment variable to fix this.')
|
||||||
params['restrictfilenames'] = True
|
self.params['restrictfilenames'] = True
|
||||||
|
|
||||||
self.params = params
|
|
||||||
self.fd = FileDownloader(self, self.params)
|
self.fd = FileDownloader(self, self.params)
|
||||||
|
|
||||||
if '%(stitle)s' in self.params['outtmpl']:
|
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||||
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||||
|
|
||||||
|
self._setup_opener()
|
||||||
|
|
||||||
def add_info_extractor(self, ie):
|
def add_info_extractor(self, ie):
|
||||||
"""Add an InfoExtractor object to the end of the list."""
|
"""Add an InfoExtractor object to the end of the list."""
|
||||||
self._ies.append(ie)
|
self._ies.append(ie)
|
||||||
@ -161,24 +230,76 @@ class YoutubeDL(object):
|
|||||||
self._pps.append(pp)
|
self._pps.append(pp)
|
||||||
pp.set_downloader(self)
|
pp.set_downloader(self)
|
||||||
|
|
||||||
|
def _bidi_workaround(self, message):
|
||||||
|
if not hasattr(self, '_fribidi_channel'):
|
||||||
|
return message
|
||||||
|
|
||||||
|
assert type(message) == type(u'')
|
||||||
|
line_count = message.count(u'\n') + 1
|
||||||
|
self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
|
||||||
|
self._fribidi.stdin.flush()
|
||||||
|
res = u''.join(self._fribidi_channel.readline().decode('utf-8')
|
||||||
|
for _ in range(line_count))
|
||||||
|
return res[:-len(u'\n')]
|
||||||
|
|
||||||
def to_screen(self, message, skip_eol=False):
|
def to_screen(self, message, skip_eol=False):
|
||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
if not self.params.get('quiet', False):
|
return self.to_stdout(message, skip_eol, check_quiet=True)
|
||||||
|
|
||||||
|
def to_stdout(self, message, skip_eol=False, check_quiet=False):
|
||||||
|
"""Print message to stdout if not in quiet mode."""
|
||||||
|
if self.params.get('logger'):
|
||||||
|
self.params['logger'].debug(message)
|
||||||
|
elif not check_quiet or not self.params.get('quiet', False):
|
||||||
|
message = self._bidi_workaround(message)
|
||||||
terminator = [u'\n', u''][skip_eol]
|
terminator = [u'\n', u''][skip_eol]
|
||||||
output = message + terminator
|
output = message + terminator
|
||||||
|
|
||||||
write_string(output, self._screen_file)
|
write_string(output, self._screen_file)
|
||||||
|
|
||||||
def to_stderr(self, message):
|
def to_stderr(self, message):
|
||||||
"""Print message to stderr."""
|
"""Print message to stderr."""
|
||||||
assert type(message) == type(u'')
|
assert type(message) == type(u'')
|
||||||
output = message + u'\n'
|
if self.params.get('logger'):
|
||||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
self.params['logger'].error(message)
|
||||||
output = output.encode(preferredencoding())
|
else:
|
||||||
sys.stderr.write(output)
|
message = self._bidi_workaround(message)
|
||||||
|
output = message + u'\n'
|
||||||
|
write_string(output, self._err_file)
|
||||||
|
|
||||||
def fixed_template(self):
|
def to_console_title(self, message):
|
||||||
"""Checks if the output template is fixed."""
|
if not self.params.get('consoletitle', False):
|
||||||
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
return
|
||||||
|
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||||
|
# c_wchar_p() might not be necessary if `message` is
|
||||||
|
# already of type unicode()
|
||||||
|
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||||
|
elif 'TERM' in os.environ:
|
||||||
|
write_string(u'\033]0;%s\007' % message, self._screen_file)
|
||||||
|
|
||||||
|
def save_console_title(self):
|
||||||
|
if not self.params.get('consoletitle', False):
|
||||||
|
return
|
||||||
|
if 'TERM' in os.environ:
|
||||||
|
# Save the title on stack
|
||||||
|
write_string(u'\033[22;0t', self._screen_file)
|
||||||
|
|
||||||
|
def restore_console_title(self):
|
||||||
|
if not self.params.get('consoletitle', False):
|
||||||
|
return
|
||||||
|
if 'TERM' in os.environ:
|
||||||
|
# Restore the title from stack
|
||||||
|
write_string(u'\033[23;0t', self._screen_file)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.save_console_title()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
self.restore_console_title()
|
||||||
|
|
||||||
|
if self.params.get('cookiefile') is not None:
|
||||||
|
self.cookiejar.save()
|
||||||
|
|
||||||
def trouble(self, message=None, tb=None):
|
def trouble(self, message=None, tb=None):
|
||||||
"""Determine action to take when a download problem appears.
|
"""Determine action to take when a download problem appears.
|
||||||
@ -215,7 +336,7 @@ class YoutubeDL(object):
|
|||||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||||
If stderr is a tty file the 'WARNING:' will be colored
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
'''
|
'''
|
||||||
if sys.stderr.isatty() and os.name != 'nt':
|
if self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = u'WARNING:'
|
_msg_header = u'WARNING:'
|
||||||
@ -227,7 +348,7 @@ class YoutubeDL(object):
|
|||||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||||
in red if stderr is a tty file.
|
in red if stderr is a tty file.
|
||||||
'''
|
'''
|
||||||
if sys.stderr.isatty() and os.name != 'nt':
|
if self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = u'\033[0;31mERROR:\033[0m'
|
_msg_header = u'\033[0;31mERROR:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = u'ERROR:'
|
_msg_header = u'ERROR:'
|
||||||
@ -254,7 +375,7 @@ class YoutubeDL(object):
|
|||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
try:
|
try:
|
||||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||||
except (UnicodeEncodeError) as err:
|
except UnicodeEncodeError:
|
||||||
self.to_screen(u'[download] The file has already been downloaded')
|
self.to_screen(u'[download] The file has already been downloaded')
|
||||||
|
|
||||||
def increment_downloads(self):
|
def increment_downloads(self):
|
||||||
@ -272,22 +393,21 @@ class YoutubeDL(object):
|
|||||||
autonumber_size = 5
|
autonumber_size = 5
|
||||||
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
|
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
|
||||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||||
if template_dict['playlist_index'] is not None:
|
if template_dict.get('playlist_index') is not None:
|
||||||
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
||||||
|
|
||||||
sanitize = lambda k, v: sanitize_filename(
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
u'NA' if v is None else compat_str(v),
|
compat_str(v),
|
||||||
restricted=self.params.get('restrictfilenames'),
|
restricted=self.params.get('restrictfilenames'),
|
||||||
is_id=(k == u'id'))
|
is_id=(k == u'id'))
|
||||||
template_dict = dict((k, sanitize(k, v))
|
template_dict = dict((k, sanitize(k, v))
|
||||||
for k, v in template_dict.items())
|
for k, v in template_dict.items()
|
||||||
|
if v is not None)
|
||||||
|
template_dict = collections.defaultdict(lambda: u'NA', template_dict)
|
||||||
|
|
||||||
tmpl = os.path.expanduser(self.params['outtmpl'])
|
tmpl = os.path.expanduser(self.params['outtmpl'])
|
||||||
filename = tmpl % template_dict
|
filename = tmpl % template_dict
|
||||||
return filename
|
return filename
|
||||||
except KeyError as err:
|
|
||||||
self.report_error(u'Erroneous output template')
|
|
||||||
return None
|
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
|
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
|
||||||
return None
|
return None
|
||||||
@ -295,15 +415,17 @@ class YoutubeDL(object):
|
|||||||
def _match_entry(self, info_dict):
|
def _match_entry(self, info_dict):
|
||||||
""" Returns None iff the file should be downloaded """
|
""" Returns None iff the file should be downloaded """
|
||||||
|
|
||||||
title = info_dict['title']
|
if 'title' in info_dict:
|
||||||
matchtitle = self.params.get('matchtitle', False)
|
# This can happen when we're just evaluating the playlist
|
||||||
if matchtitle:
|
title = info_dict['title']
|
||||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
matchtitle = self.params.get('matchtitle', False)
|
||||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
if matchtitle:
|
||||||
rejecttitle = self.params.get('rejecttitle', False)
|
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||||
if rejecttitle:
|
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
rejecttitle = self.params.get('rejecttitle', False)
|
||||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
if rejecttitle:
|
||||||
|
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||||
|
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||||
date = info_dict.get('upload_date', None)
|
date = info_dict.get('upload_date', None)
|
||||||
if date is not None:
|
if date is not None:
|
||||||
dateRange = self.params.get('daterange', DateRange())
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
@ -314,11 +436,18 @@ class YoutubeDL(object):
|
|||||||
if age_limit < info_dict.get('age_limit', 0):
|
if age_limit < info_dict.get('age_limit', 0):
|
||||||
return u'Skipping "' + title + '" because it is age restricted'
|
return u'Skipping "' + title + '" because it is age restricted'
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return (u'%(title)s has already been recorded in archive'
|
return (u'%s has already been recorded in archive'
|
||||||
% info_dict)
|
% info_dict.get('title', info_dict.get('id', u'video')))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
@staticmethod
|
||||||
|
def add_extra_info(info_dict, extra_info):
|
||||||
|
'''Set the keys from extra_info in info dict if they are missing'''
|
||||||
|
for key, value in extra_info.items():
|
||||||
|
info_dict.setdefault(key, value)
|
||||||
|
|
||||||
|
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||||
|
process=True):
|
||||||
'''
|
'''
|
||||||
Returns a list with a dictionary for each video we find.
|
Returns a list with a dictionary for each video we find.
|
||||||
If 'download', also downloads the videos.
|
If 'download', also downloads the videos.
|
||||||
@ -344,17 +473,20 @@ class YoutubeDL(object):
|
|||||||
break
|
break
|
||||||
if isinstance(ie_result, list):
|
if isinstance(ie_result, list):
|
||||||
# Backwards compatibility: old IE result format
|
# Backwards compatibility: old IE result format
|
||||||
for result in ie_result:
|
|
||||||
result.update(extra_info)
|
|
||||||
ie_result = {
|
ie_result = {
|
||||||
'_type': 'compat_list',
|
'_type': 'compat_list',
|
||||||
'entries': ie_result,
|
'entries': ie_result,
|
||||||
}
|
}
|
||||||
|
self.add_extra_info(ie_result,
|
||||||
|
{
|
||||||
|
'extractor': ie.IE_NAME,
|
||||||
|
'webpage_url': url,
|
||||||
|
'extractor_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
if process:
|
||||||
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
ie_result.update(extra_info)
|
return ie_result
|
||||||
if 'extractor' not in ie_result:
|
|
||||||
ie_result['extractor'] = ie.IE_NAME
|
|
||||||
return self.process_ie_result(ie_result, download=download)
|
|
||||||
except ExtractorError as de: # An error we somewhat expected
|
except ExtractorError as de: # An error we somewhat expected
|
||||||
self.report_error(compat_str(de), de.format_traceback())
|
self.report_error(compat_str(de), de.format_traceback())
|
||||||
break
|
break
|
||||||
@ -378,8 +510,8 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
||||||
if result_type == 'video':
|
if result_type == 'video':
|
||||||
ie_result.update(extra_info)
|
self.add_extra_info(ie_result, extra_info)
|
||||||
return self.process_video_result(ie_result)
|
return self.process_video_result(ie_result, download=download)
|
||||||
elif result_type == 'url':
|
elif result_type == 'url':
|
||||||
# We have to add extra_info to the results because it may be
|
# We have to add extra_info to the results because it may be
|
||||||
# contained in a playlist
|
# contained in a playlist
|
||||||
@ -387,6 +519,32 @@ class YoutubeDL(object):
|
|||||||
download,
|
download,
|
||||||
ie_key=ie_result.get('ie_key'),
|
ie_key=ie_result.get('ie_key'),
|
||||||
extra_info=extra_info)
|
extra_info=extra_info)
|
||||||
|
elif result_type == 'url_transparent':
|
||||||
|
# Use the information from the embedding page
|
||||||
|
info = self.extract_info(
|
||||||
|
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
||||||
|
extra_info=extra_info, download=False, process=False)
|
||||||
|
|
||||||
|
def make_result(embedded_info):
|
||||||
|
new_result = ie_result.copy()
|
||||||
|
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
||||||
|
'entries', 'urlhandle', 'ie_key', 'duration',
|
||||||
|
'subtitles', 'annotations', 'format',
|
||||||
|
'thumbnail', 'thumbnails'):
|
||||||
|
if f in new_result:
|
||||||
|
del new_result[f]
|
||||||
|
if f in embedded_info:
|
||||||
|
new_result[f] = embedded_info[f]
|
||||||
|
return new_result
|
||||||
|
new_result = make_result(info)
|
||||||
|
|
||||||
|
assert new_result.get('_type') != 'url_transparent'
|
||||||
|
if new_result.get('_type') == 'compat_list':
|
||||||
|
new_result['entries'] = [
|
||||||
|
make_result(e) for e in new_result['entries']]
|
||||||
|
|
||||||
|
return self.process_ie_result(
|
||||||
|
new_result, download=download, extra_info=extra_info)
|
||||||
elif result_type == 'playlist':
|
elif result_type == 'playlist':
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||||
@ -413,12 +571,16 @@ class YoutubeDL(object):
|
|||||||
extra = {
|
extra = {
|
||||||
'playlist': playlist,
|
'playlist': playlist,
|
||||||
'playlist_index': i + playliststart,
|
'playlist_index': i + playliststart,
|
||||||
|
'extractor': ie_result['extractor'],
|
||||||
|
'webpage_url': ie_result['webpage_url'],
|
||||||
|
'extractor_key': ie_result['extractor_key'],
|
||||||
}
|
}
|
||||||
if not 'extractor' in entry:
|
|
||||||
# We set the extractor, if it's an url it will be set then to
|
reason = self._match_entry(entry)
|
||||||
# the new extractor, but if it's already a video we must make
|
if reason is not None:
|
||||||
# sure it's present: see issue #877
|
self.to_screen(u'[download] ' + reason)
|
||||||
entry['extractor'] = ie_result['extractor']
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
entry_result = self.process_ie_result(entry,
|
||||||
download=download,
|
download=download,
|
||||||
extra_info=extra)
|
extra_info=extra)
|
||||||
@ -427,10 +589,15 @@ class YoutubeDL(object):
|
|||||||
return ie_result
|
return ie_result
|
||||||
elif result_type == 'compat_list':
|
elif result_type == 'compat_list':
|
||||||
def _fixup(r):
|
def _fixup(r):
|
||||||
r.setdefault('extractor', ie_result['extractor'])
|
self.add_extra_info(r,
|
||||||
|
{
|
||||||
|
'extractor': ie_result['extractor'],
|
||||||
|
'webpage_url': ie_result['webpage_url'],
|
||||||
|
'extractor_key': ie_result['extractor_key'],
|
||||||
|
})
|
||||||
return r
|
return r
|
||||||
ie_result['entries'] = [
|
ie_result['entries'] = [
|
||||||
self.process_ie_result(_fixup(r), download=download)
|
self.process_ie_result(_fixup(r), download, extra_info)
|
||||||
for r in ie_result['entries']
|
for r in ie_result['entries']
|
||||||
]
|
]
|
||||||
return ie_result
|
return ie_result
|
||||||
@ -462,7 +629,7 @@ class YoutubeDL(object):
|
|||||||
info_dict['playlist_index'] = None
|
info_dict['playlist_index'] = None
|
||||||
|
|
||||||
# This extractors handle format selection themselves
|
# This extractors handle format selection themselves
|
||||||
if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
|
if info_dict['extractor'] in [u'youtube', u'Youku']:
|
||||||
if download:
|
if download:
|
||||||
self.process_info(info_dict)
|
self.process_info(info_dict)
|
||||||
return info_dict
|
return info_dict
|
||||||
@ -482,8 +649,11 @@ class YoutubeDL(object):
|
|||||||
format['format'] = u'{id} - {res}{note}'.format(
|
format['format'] = u'{id} - {res}{note}'.format(
|
||||||
id=format['format_id'],
|
id=format['format_id'],
|
||||||
res=self.format_resolution(format),
|
res=self.format_resolution(format),
|
||||||
note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||||
)
|
)
|
||||||
|
# Automatically determine file extension if missing
|
||||||
|
if 'ext' not in format:
|
||||||
|
format['ext'] = determine_ext(format['url'])
|
||||||
|
|
||||||
if self.params.get('listformats', None):
|
if self.params.get('listformats', None):
|
||||||
self.list_formats(info_dict)
|
self.list_formats(info_dict)
|
||||||
@ -521,7 +691,8 @@ class YoutubeDL(object):
|
|||||||
formats_to_download = [selected_format]
|
formats_to_download = [selected_format]
|
||||||
break
|
break
|
||||||
if not formats_to_download:
|
if not formats_to_download:
|
||||||
raise ExtractorError(u'requested format not available')
|
raise ExtractorError(u'requested format not available',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
if download:
|
if download:
|
||||||
if len(formats_to_download) > 1:
|
if len(formats_to_download) > 1:
|
||||||
@ -565,20 +736,23 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
# Forced printings
|
# Forced printings
|
||||||
if self.params.get('forcetitle', False):
|
if self.params.get('forcetitle', False):
|
||||||
compat_print(info_dict['title'])
|
self.to_stdout(info_dict['fulltitle'])
|
||||||
if self.params.get('forceid', False):
|
if self.params.get('forceid', False):
|
||||||
compat_print(info_dict['id'])
|
self.to_stdout(info_dict['id'])
|
||||||
if self.params.get('forceurl', False):
|
if self.params.get('forceurl', False):
|
||||||
# For RTMP URLs, also include the playpath
|
# For RTMP URLs, also include the playpath
|
||||||
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
|
self.to_stdout(info_dict['url'] + info_dict.get('play_path', u''))
|
||||||
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
|
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||||
compat_print(info_dict['thumbnail'])
|
self.to_stdout(info_dict['thumbnail'])
|
||||||
if self.params.get('forcedescription', False) and 'description' in info_dict:
|
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||||
compat_print(info_dict['description'])
|
self.to_stdout(info_dict['description'])
|
||||||
if self.params.get('forcefilename', False) and filename is not None:
|
if self.params.get('forcefilename', False) and filename is not None:
|
||||||
compat_print(filename)
|
self.to_stdout(filename)
|
||||||
if self.params.get('forceformat', False):
|
if self.params.get('forceformat', False):
|
||||||
compat_print(info_dict['format'])
|
self.to_stdout(info_dict['format'])
|
||||||
|
if self.params.get('forcejson', False):
|
||||||
|
info_dict['_filename'] = filename
|
||||||
|
self.to_stdout(json.dumps(info_dict))
|
||||||
|
|
||||||
# Do nothing else if in simulate mode
|
# Do nothing else if in simulate mode
|
||||||
if self.params.get('simulate', False):
|
if self.params.get('simulate', False):
|
||||||
@ -626,7 +800,7 @@ class YoutubeDL(object):
|
|||||||
# subtitles download errors are already managed as troubles in relevant IE
|
# subtitles download errors are already managed as troubles in relevant IE
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
subtitles = info_dict['subtitles']
|
subtitles = info_dict['subtitles']
|
||||||
sub_format = self.params.get('subtitlesformat')
|
sub_format = self.params.get('subtitlesformat', 'srt')
|
||||||
for sub_lang in subtitles.keys():
|
for sub_lang in subtitles.keys():
|
||||||
sub = subtitles[sub_lang]
|
sub = subtitles[sub_lang]
|
||||||
if sub is None:
|
if sub is None:
|
||||||
@ -641,7 +815,7 @@ class YoutubeDL(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
if self.params.get('writeinfojson', False):
|
||||||
infofn = filename + u'.info.json'
|
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
||||||
self.report_writeinfojson(infofn)
|
self.report_writeinfojson(infofn)
|
||||||
try:
|
try:
|
||||||
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
||||||
@ -692,13 +866,15 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def download(self, url_list):
|
def download(self, url_list):
|
||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
if len(url_list) > 1 and self.fixed_template():
|
if (len(url_list) > 1 and
|
||||||
|
'%' not in self.params['outtmpl']
|
||||||
|
and self.params.get('max_downloads') != 1):
|
||||||
raise SameFileError(self.params['outtmpl'])
|
raise SameFileError(self.params['outtmpl'])
|
||||||
|
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
try:
|
try:
|
||||||
#It also downloads the videos
|
#It also downloads the videos
|
||||||
videos = self.extract_info(url)
|
self.extract_info(url)
|
||||||
except UnavailableVideoError:
|
except UnavailableVideoError:
|
||||||
self.report_error(u'unable to download video')
|
self.report_error(u'unable to download video')
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
@ -707,6 +883,20 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
|
def download_with_info_file(self, info_filename):
|
||||||
|
with io.open(info_filename, 'r', encoding='utf-8') as f:
|
||||||
|
info = json.load(f)
|
||||||
|
try:
|
||||||
|
self.process_ie_result(info, download=True)
|
||||||
|
except DownloadError:
|
||||||
|
webpage_url = info.get('webpage_url')
|
||||||
|
if webpage_url is not None:
|
||||||
|
self.report_warning(u'The info failed to download, trying with "%s"' % webpage_url)
|
||||||
|
return self.download([webpage_url])
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
return self._download_retcode
|
||||||
|
|
||||||
def post_process(self, filename, ie_info):
|
def post_process(self, filename, ie_info):
|
||||||
"""Run all the postprocessors on the given file."""
|
"""Run all the postprocessors on the given file."""
|
||||||
info = dict(ie_info)
|
info = dict(ie_info)
|
||||||
@ -730,11 +920,26 @@ class YoutubeDL(object):
|
|||||||
except (IOError, OSError):
|
except (IOError, OSError):
|
||||||
self.report_warning(u'Unable to remove downloaded video file')
|
self.report_warning(u'Unable to remove downloaded video file')
|
||||||
|
|
||||||
|
def _make_archive_id(self, info_dict):
|
||||||
|
# Future-proof against any change in case
|
||||||
|
# and backwards compatibility with prior versions
|
||||||
|
extractor = info_dict.get('extractor_key')
|
||||||
|
if extractor is None:
|
||||||
|
if 'id' in info_dict:
|
||||||
|
extractor = info_dict.get('ie_key') # key in a playlist
|
||||||
|
if extractor is None:
|
||||||
|
return None # Incomplete video information
|
||||||
|
return extractor.lower() + u' ' + info_dict['id']
|
||||||
|
|
||||||
def in_download_archive(self, info_dict):
|
def in_download_archive(self, info_dict):
|
||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return False
|
return False
|
||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
|
||||||
|
vid_id = self._make_archive_id(info_dict)
|
||||||
|
if vid_id is None:
|
||||||
|
return False # Incomplete video information
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||||
for line in archive_file:
|
for line in archive_file:
|
||||||
@ -749,35 +954,145 @@ class YoutubeDL(object):
|
|||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return
|
return
|
||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
vid_id = self._make_archive_id(info_dict)
|
||||||
|
assert vid_id
|
||||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||||
archive_file.write(vid_id + u'\n')
|
archive_file.write(vid_id + u'\n')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_resolution(format):
|
def format_resolution(format, default='unknown'):
|
||||||
|
if format.get('vcodec') == 'none':
|
||||||
|
return 'audio only'
|
||||||
|
if format.get('_resolution') is not None:
|
||||||
|
return format['_resolution']
|
||||||
if format.get('height') is not None:
|
if format.get('height') is not None:
|
||||||
if format.get('width') is not None:
|
if format.get('width') is not None:
|
||||||
res = u'%sx%s' % (format['width'], format['height'])
|
res = u'%sx%s' % (format['width'], format['height'])
|
||||||
else:
|
else:
|
||||||
res = u'%sp' % format['height']
|
res = u'%sp' % format['height']
|
||||||
else:
|
else:
|
||||||
res = '???'
|
res = default
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def list_formats(self, info_dict):
|
def list_formats(self, info_dict):
|
||||||
formats_s = []
|
def format_note(fdict):
|
||||||
for format in info_dict.get('formats', [info_dict]):
|
res = u''
|
||||||
formats_s.append(u'%-15s: %-5s %-15s[%s]' % (
|
if fdict.get('format_note') is not None:
|
||||||
|
res += fdict['format_note'] + u' '
|
||||||
|
if (fdict.get('vcodec') is not None and
|
||||||
|
fdict.get('vcodec') != 'none'):
|
||||||
|
res += u'%-5s' % fdict['vcodec']
|
||||||
|
elif fdict.get('vbr') is not None:
|
||||||
|
res += u'video'
|
||||||
|
if fdict.get('vbr') is not None:
|
||||||
|
res += u'@%4dk' % fdict['vbr']
|
||||||
|
if fdict.get('acodec') is not None:
|
||||||
|
if res:
|
||||||
|
res += u', '
|
||||||
|
res += u'%-5s' % fdict['acodec']
|
||||||
|
elif fdict.get('abr') is not None:
|
||||||
|
if res:
|
||||||
|
res += u', '
|
||||||
|
res += 'audio'
|
||||||
|
if fdict.get('abr') is not None:
|
||||||
|
res += u'@%3dk' % fdict['abr']
|
||||||
|
if fdict.get('filesize') is not None:
|
||||||
|
if res:
|
||||||
|
res += u', '
|
||||||
|
res += format_bytes(fdict['filesize'])
|
||||||
|
return res
|
||||||
|
|
||||||
|
def line(format, idlen=20):
|
||||||
|
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
|
||||||
format['format_id'],
|
format['format_id'],
|
||||||
format['ext'],
|
format['ext'],
|
||||||
format.get('format_note') or '-',
|
|
||||||
self.format_resolution(format),
|
self.format_resolution(format),
|
||||||
)
|
format_note(format),
|
||||||
)
|
))
|
||||||
if len(formats_s) != 1:
|
|
||||||
formats_s[0] += ' (worst)'
|
formats = info_dict.get('formats', [info_dict])
|
||||||
formats_s[-1] += ' (best)'
|
idlen = max(len(u'format code'),
|
||||||
formats_s = "\n".join(formats_s)
|
max(len(f['format_id']) for f in formats))
|
||||||
self.to_screen(u'[info] Available formats for %s:\n'
|
formats_s = [line(f, idlen) for f in formats]
|
||||||
u'format code extension note resolution\n%s' % (
|
if len(formats) > 1:
|
||||||
info_dict['id'], formats_s))
|
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
||||||
|
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
||||||
|
|
||||||
|
header_line = line({
|
||||||
|
'format_id': u'format code', 'ext': u'extension',
|
||||||
|
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
|
||||||
|
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
||||||
|
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
||||||
|
|
||||||
|
def urlopen(self, req):
|
||||||
|
""" Start an HTTP download """
|
||||||
|
return self._opener.open(req)
|
||||||
|
|
||||||
|
def print_debug_header(self):
|
||||||
|
if not self.params.get('verbose'):
|
||||||
|
return
|
||||||
|
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||||
|
try:
|
||||||
|
sp = subprocess.Popen(
|
||||||
|
['git', 'rev-parse', '--short', 'HEAD'],
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
|
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
out, err = sp.communicate()
|
||||||
|
out = out.decode().strip()
|
||||||
|
if re.match('[0-9a-f]+', out):
|
||||||
|
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
sys.exc_clear()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
write_string(u'[debug] Python version %s - %s' %
|
||||||
|
(platform.python_version(), platform_name()) + u'\n')
|
||||||
|
|
||||||
|
proxy_map = {}
|
||||||
|
for handler in self._opener.handlers:
|
||||||
|
if hasattr(handler, 'proxies'):
|
||||||
|
proxy_map.update(handler.proxies)
|
||||||
|
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
||||||
|
|
||||||
|
def _setup_opener(self):
|
||||||
|
timeout_val = self.params.get('socket_timeout')
|
||||||
|
timeout = 600 if timeout_val is None else float(timeout_val)
|
||||||
|
|
||||||
|
opts_cookiefile = self.params.get('cookiefile')
|
||||||
|
opts_proxy = self.params.get('proxy')
|
||||||
|
|
||||||
|
if opts_cookiefile is None:
|
||||||
|
self.cookiejar = compat_cookiejar.CookieJar()
|
||||||
|
else:
|
||||||
|
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||||
|
opts_cookiefile)
|
||||||
|
if os.access(opts_cookiefile, os.R_OK):
|
||||||
|
self.cookiejar.load()
|
||||||
|
|
||||||
|
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
|
||||||
|
self.cookiejar)
|
||||||
|
if opts_proxy is not None:
|
||||||
|
if opts_proxy == '':
|
||||||
|
proxies = {}
|
||||||
|
else:
|
||||||
|
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||||
|
else:
|
||||||
|
proxies = compat_urllib_request.getproxies()
|
||||||
|
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||||
|
if 'http' in proxies and 'https' not in proxies:
|
||||||
|
proxies['https'] = proxies['http']
|
||||||
|
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||||
|
https_handler = make_HTTPS_handler(
|
||||||
|
self.params.get('nocheckcertificate', False))
|
||||||
|
opener = compat_urllib_request.build_opener(
|
||||||
|
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||||
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
|
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||||
|
opener.addheaders = []
|
||||||
|
self._opener = opener
|
||||||
|
|
||||||
|
# TODO remove this global modification
|
||||||
|
compat_urllib_request.install_opener(opener)
|
||||||
|
socket.setdefaulttimeout(timeout)
|
||||||
|
@ -32,50 +32,45 @@ __authors__ = (
|
|||||||
'Ismael Mejía',
|
'Ismael Mejía',
|
||||||
'Steffan \'Ruirize\' James',
|
'Steffan \'Ruirize\' James',
|
||||||
'Andras Elso',
|
'Andras Elso',
|
||||||
|
'Jelle van der Waa',
|
||||||
|
'Marcin Cieślak',
|
||||||
|
'Anton Larionov',
|
||||||
|
'Takuya Tsuchida',
|
||||||
|
'Sergey M.',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import collections
|
|
||||||
import getpass
|
import getpass
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import socket
|
|
||||||
import subprocess
|
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
|
||||||
import platform
|
|
||||||
|
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
compat_cookiejar,
|
|
||||||
compat_print,
|
compat_print,
|
||||||
compat_str,
|
|
||||||
compat_urllib_request,
|
|
||||||
DateRange,
|
DateRange,
|
||||||
decodeOption,
|
decodeOption,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
get_term_width,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
get_cachedir,
|
get_cachedir,
|
||||||
make_HTTPS_handler,
|
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
platform_name,
|
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
std_headers,
|
std_headers,
|
||||||
write_string,
|
write_string,
|
||||||
YoutubeDLHandler,
|
|
||||||
)
|
)
|
||||||
from .update import update_self
|
from .update import update_self
|
||||||
from .version import __version__
|
|
||||||
from .FileDownloader import (
|
from .FileDownloader import (
|
||||||
FileDownloader,
|
FileDownloader,
|
||||||
)
|
)
|
||||||
from .extractor import gen_extractors
|
from .extractor import gen_extractors
|
||||||
|
from .version import __version__
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
from .PostProcessor import (
|
from .PostProcessor import (
|
||||||
FFmpegMetadataPP,
|
FFmpegMetadataPP,
|
||||||
@ -86,11 +81,11 @@ from .PostProcessor import (
|
|||||||
|
|
||||||
|
|
||||||
def parseOpts(overrideArguments=None):
|
def parseOpts(overrideArguments=None):
|
||||||
def _readOptions(filename_bytes):
|
def _readOptions(filename_bytes, default=[]):
|
||||||
try:
|
try:
|
||||||
optionf = open(filename_bytes)
|
optionf = open(filename_bytes)
|
||||||
except IOError:
|
except IOError:
|
||||||
return [] # silently skip if file is not present
|
return default # silently skip if file is not present
|
||||||
try:
|
try:
|
||||||
res = []
|
res = []
|
||||||
for l in optionf:
|
for l in optionf:
|
||||||
@ -118,19 +113,6 @@ def parseOpts(overrideArguments=None):
|
|||||||
def _comma_separated_values_options_callback(option, opt_str, value, parser):
|
def _comma_separated_values_options_callback(option, opt_str, value, parser):
|
||||||
setattr(parser.values, option.dest, value.split(','))
|
setattr(parser.values, option.dest, value.split(','))
|
||||||
|
|
||||||
def _find_term_columns():
|
|
||||||
columns = os.environ.get('COLUMNS', None)
|
|
||||||
if columns:
|
|
||||||
return int(columns)
|
|
||||||
|
|
||||||
try:
|
|
||||||
sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
||||||
out,err = sp.communicate()
|
|
||||||
return int(out.split()[1])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _hide_login_info(opts):
|
def _hide_login_info(opts):
|
||||||
opts = list(opts)
|
opts = list(opts)
|
||||||
for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
|
for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
|
||||||
@ -145,7 +127,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
max_help_position = 80
|
max_help_position = 80
|
||||||
|
|
||||||
# No need to wrap help messages if we're on a wide console
|
# No need to wrap help messages if we're on a wide console
|
||||||
columns = _find_term_columns()
|
columns = get_term_width()
|
||||||
if columns: max_width = columns
|
if columns: max_width = columns
|
||||||
|
|
||||||
fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
|
fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
|
||||||
@ -196,7 +178,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option('--extractor-descriptions',
|
general.add_option('--extractor-descriptions',
|
||||||
action='store_true', dest='list_extractor_descriptions',
|
action='store_true', dest='list_extractor_descriptions',
|
||||||
help='Output descriptions of all supported extractors', default=False)
|
help='Output descriptions of all supported extractors', default=False)
|
||||||
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
general.add_option(
|
||||||
|
'--proxy', dest='proxy', default=None, metavar='URL',
|
||||||
|
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||||
@ -204,6 +188,12 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option(
|
general.add_option(
|
||||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||||
help='Disable filesystem caching')
|
help='Disable filesystem caching')
|
||||||
|
general.add_option(
|
||||||
|
'--socket-timeout', dest='socket_timeout',
|
||||||
|
type=float, default=None, help=optparse.SUPPRESS_HELP)
|
||||||
|
general.add_option(
|
||||||
|
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||||
|
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option('--playlist-start',
|
selection.add_option('--playlist-start',
|
||||||
@ -212,7 +202,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
||||||
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
||||||
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
|
selection.add_option('--max-downloads', metavar='NUMBER',
|
||||||
|
dest='max_downloads', type=int, default=None,
|
||||||
|
help='Abort after downloading NUMBER files')
|
||||||
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||||
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||||
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||||
@ -224,7 +216,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
default=None, type=int)
|
default=None, type=int)
|
||||||
selection.add_option('--download-archive', metavar='FILE',
|
selection.add_option('--download-archive', metavar='FILE',
|
||||||
dest='download_archive',
|
dest='download_archive',
|
||||||
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
|
help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
|
||||||
|
|
||||||
|
|
||||||
authentication.add_option('-u', '--username',
|
authentication.add_option('-u', '--username',
|
||||||
@ -239,7 +231,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
video_format.add_option('-f', '--format',
|
video_format.add_option('-f', '--format',
|
||||||
action='store', dest='format', metavar='FORMAT', default='best',
|
action='store', dest='format', metavar='FORMAT', default='best',
|
||||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||||
video_format.add_option('--all-formats',
|
video_format.add_option('--all-formats',
|
||||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||||
video_format.add_option('--prefer-free-formats',
|
video_format.add_option('--prefer-free-formats',
|
||||||
@ -304,6 +296,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
verbosity.add_option('--get-format',
|
verbosity.add_option('--get-format',
|
||||||
action='store_true', dest='getformat',
|
action='store_true', dest='getformat',
|
||||||
help='simulate, quiet but print output format', default=False)
|
help='simulate, quiet but print output format', default=False)
|
||||||
|
verbosity.add_option('-j', '--dump-json',
|
||||||
|
action='store_true', dest='dumpjson',
|
||||||
|
help='simulate, quiet but print JSON information', default=False)
|
||||||
verbosity.add_option('--newline',
|
verbosity.add_option('--newline',
|
||||||
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
||||||
verbosity.add_option('--no-progress',
|
verbosity.add_option('--no-progress',
|
||||||
@ -316,6 +311,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
verbosity.add_option('--dump-intermediate-pages',
|
verbosity.add_option('--dump-intermediate-pages',
|
||||||
action='store_true', dest='dump_intermediate_pages', default=False,
|
action='store_true', dest='dump_intermediate_pages', default=False,
|
||||||
help='print downloaded pages to debug problems(very verbose)')
|
help='print downloaded pages to debug problems(very verbose)')
|
||||||
|
verbosity.add_option('--write-pages',
|
||||||
|
action='store_true', dest='write_pages', default=False,
|
||||||
|
help='Write downloaded intermediary pages to files in the current directory to debug problems')
|
||||||
verbosity.add_option('--youtube-print-sig-code',
|
verbosity.add_option('--youtube-print-sig-code',
|
||||||
action='store_true', dest='youtube_print_sig_code', default=False,
|
action='store_true', dest='youtube_print_sig_code', default=False,
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
@ -336,7 +334,8 @@ def parseOpts(overrideArguments=None):
|
|||||||
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
|
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
|
||||||
'%(autonumber)s to get an automatically incremented number, '
|
'%(autonumber)s to get an automatically incremented number, '
|
||||||
'%(ext)s for the filename extension, '
|
'%(ext)s for the filename extension, '
|
||||||
'%(format)s for the format description (like "22 - 1280x720" or "HD")'
|
'%(format)s for the format description (like "22 - 1280x720" or "HD"),'
|
||||||
|
'%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"),'
|
||||||
'%(upload_date)s for the upload date (YYYYMMDD), '
|
'%(upload_date)s for the upload date (YYYYMMDD), '
|
||||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||||
'%(id)s for the video id , %(playlist)s for the playlist the video is in, '
|
'%(id)s for the video id , %(playlist)s for the playlist the video is in, '
|
||||||
@ -345,16 +344,19 @@ def parseOpts(overrideArguments=None):
|
|||||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||||
filesystem.add_option('--autonumber-size',
|
filesystem.add_option('--autonumber-size',
|
||||||
dest='autonumber_size', metavar='NUMBER',
|
dest='autonumber_size', metavar='NUMBER',
|
||||||
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
|
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
|
||||||
filesystem.add_option('--restrict-filenames',
|
filesystem.add_option('--restrict-filenames',
|
||||||
action='store_true', dest='restrictfilenames',
|
action='store_true', dest='restrictfilenames',
|
||||||
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
||||||
filesystem.add_option('-a', '--batch-file',
|
filesystem.add_option('-a', '--batch-file',
|
||||||
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
||||||
|
filesystem.add_option('--load-info',
|
||||||
|
dest='load_info_filename', metavar='FILE',
|
||||||
|
help='json file containing the video information (created with the "--write-json" option')
|
||||||
filesystem.add_option('-w', '--no-overwrites',
|
filesystem.add_option('-w', '--no-overwrites',
|
||||||
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
||||||
filesystem.add_option('-c', '--continue',
|
filesystem.add_option('-c', '--continue',
|
||||||
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
|
action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
|
||||||
filesystem.add_option('--no-continue',
|
filesystem.add_option('--no-continue',
|
||||||
action='store_false', dest='continue_dl',
|
action='store_false', dest='continue_dl',
|
||||||
help='do not resume partially downloaded files (restart from beginning)')
|
help='do not resume partially downloaded files (restart from beginning)')
|
||||||
@ -412,6 +414,8 @@ def parseOpts(overrideArguments=None):
|
|||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||||
else:
|
else:
|
||||||
|
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||||
|
|
||||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||||
if xdg_config_home:
|
if xdg_config_home:
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
||||||
@ -421,8 +425,31 @@ def parseOpts(overrideArguments=None):
|
|||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
||||||
if not os.path.isfile(userConfFile):
|
if not os.path.isfile(userConfFile):
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
userConf = _readOptions(userConfFile, None)
|
||||||
userConf = _readOptions(userConfFile)
|
|
||||||
|
if userConf is None:
|
||||||
|
appdata_dir = os.environ.get('appdata')
|
||||||
|
if appdata_dir:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = []
|
||||||
|
|
||||||
commandLineConf = sys.argv[1:]
|
commandLineConf = sys.argv[1:]
|
||||||
argv = systemConf + userConf + commandLineConf
|
argv = systemConf + userConf + commandLineConf
|
||||||
opts, args = parser.parse_args(argv)
|
opts, args = parser.parse_args(argv)
|
||||||
@ -441,19 +468,6 @@ def _real_main(argv=None):
|
|||||||
|
|
||||||
parser, opts, args = parseOpts(argv)
|
parser, opts, args = parseOpts(argv)
|
||||||
|
|
||||||
# Open appropriate CookieJar
|
|
||||||
if opts.cookiefile is None:
|
|
||||||
jar = compat_cookiejar.CookieJar()
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
|
|
||||||
if os.access(opts.cookiefile, os.R_OK):
|
|
||||||
jar.load()
|
|
||||||
except (IOError, OSError) as err:
|
|
||||||
if opts.verbose:
|
|
||||||
traceback.print_exc()
|
|
||||||
write_string(u'ERROR: unable to open cookie file\n')
|
|
||||||
sys.exit(101)
|
|
||||||
# Set user agent
|
# Set user agent
|
||||||
if opts.user_agent is not None:
|
if opts.user_agent is not None:
|
||||||
std_headers['User-Agent'] = opts.user_agent
|
std_headers['User-Agent'] = opts.user_agent
|
||||||
@ -485,8 +499,6 @@ def _real_main(argv=None):
|
|||||||
all_urls = batchurls + args
|
all_urls = batchurls + args
|
||||||
all_urls = [url.strip() for url in all_urls]
|
all_urls = [url.strip() for url in all_urls]
|
||||||
|
|
||||||
opener = _setup_opener(jar=jar, opts=opts)
|
|
||||||
|
|
||||||
extractors = gen_extractors()
|
extractors = gen_extractors()
|
||||||
|
|
||||||
if opts.list_extractors:
|
if opts.list_extractors:
|
||||||
@ -541,7 +553,7 @@ def _real_main(argv=None):
|
|||||||
if opts.retries is not None:
|
if opts.retries is not None:
|
||||||
try:
|
try:
|
||||||
opts.retries = int(opts.retries)
|
opts.retries = int(opts.retries)
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid retry count specified')
|
parser.error(u'invalid retry count specified')
|
||||||
if opts.buffersize is not None:
|
if opts.buffersize is not None:
|
||||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||||
@ -552,13 +564,13 @@ def _real_main(argv=None):
|
|||||||
opts.playliststart = int(opts.playliststart)
|
opts.playliststart = int(opts.playliststart)
|
||||||
if opts.playliststart <= 0:
|
if opts.playliststart <= 0:
|
||||||
raise ValueError(u'Playlist start must be positive')
|
raise ValueError(u'Playlist start must be positive')
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid playlist start number specified')
|
parser.error(u'invalid playlist start number specified')
|
||||||
try:
|
try:
|
||||||
opts.playlistend = int(opts.playlistend)
|
opts.playlistend = int(opts.playlistend)
|
||||||
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
||||||
raise ValueError(u'Playlist end must be greater than playlist start')
|
raise ValueError(u'Playlist end must be greater than playlist start')
|
||||||
except (TypeError, ValueError) as err:
|
except (TypeError, ValueError):
|
||||||
parser.error(u'invalid playlist end number specified')
|
parser.error(u'invalid playlist end number specified')
|
||||||
if opts.extractaudio:
|
if opts.extractaudio:
|
||||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||||
@ -597,13 +609,12 @@ def _real_main(argv=None):
|
|||||||
u' file! Use "%%(ext)s" instead of %r' %
|
u' file! Use "%%(ext)s" instead of %r' %
|
||||||
determine_ext(outtmpl, u''))
|
determine_ext(outtmpl, u''))
|
||||||
|
|
||||||
# YoutubeDL
|
ydl_opts = {
|
||||||
ydl = YoutubeDL({
|
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
'username': opts.username,
|
'username': opts.username,
|
||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
'videopassword': opts.videopassword,
|
'videopassword': opts.videopassword,
|
||||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
||||||
'forceurl': opts.geturl,
|
'forceurl': opts.geturl,
|
||||||
'forcetitle': opts.gettitle,
|
'forcetitle': opts.gettitle,
|
||||||
'forceid': opts.getid,
|
'forceid': opts.getid,
|
||||||
@ -611,8 +622,9 @@ def _real_main(argv=None):
|
|||||||
'forcedescription': opts.getdescription,
|
'forcedescription': opts.getdescription,
|
||||||
'forcefilename': opts.getfilename,
|
'forcefilename': opts.getfilename,
|
||||||
'forceformat': opts.getformat,
|
'forceformat': opts.getformat,
|
||||||
|
'forcejson': opts.dumpjson,
|
||||||
'simulate': opts.simulate,
|
'simulate': opts.simulate,
|
||||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
||||||
'format': opts.format,
|
'format': opts.format,
|
||||||
'format_limit': opts.format_limit,
|
'format_limit': opts.format_limit,
|
||||||
'listformats': opts.listformats,
|
'listformats': opts.listformats,
|
||||||
@ -651,6 +663,7 @@ def _real_main(argv=None):
|
|||||||
'prefer_free_formats': opts.prefer_free_formats,
|
'prefer_free_formats': opts.prefer_free_formats,
|
||||||
'verbose': opts.verbose,
|
'verbose': opts.verbose,
|
||||||
'dump_intermediate_pages': opts.dump_intermediate_pages,
|
'dump_intermediate_pages': opts.dump_intermediate_pages,
|
||||||
|
'write_pages': opts.write_pages,
|
||||||
'test': opts.test,
|
'test': opts.test,
|
||||||
'keepvideo': opts.keepvideo,
|
'keepvideo': opts.keepvideo,
|
||||||
'min_filesize': opts.min_filesize,
|
'min_filesize': opts.min_filesize,
|
||||||
@ -660,102 +673,51 @@ def _real_main(argv=None):
|
|||||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||||
'age_limit': opts.age_limit,
|
'age_limit': opts.age_limit,
|
||||||
'download_archive': opts.download_archive,
|
'download_archive': opts.download_archive,
|
||||||
})
|
'cookiefile': opts.cookiefile,
|
||||||
|
'nocheckcertificate': opts.no_check_certificate,
|
||||||
|
'proxy': opts.proxy,
|
||||||
|
'socket_timeout': opts.socket_timeout,
|
||||||
|
'bidi_workaround': opts.bidi_workaround,
|
||||||
|
}
|
||||||
|
|
||||||
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
ydl.print_debug_header()
|
||||||
|
ydl.add_default_info_extractors()
|
||||||
|
|
||||||
|
# PostProcessors
|
||||||
|
# Add the metadata pp first, the other pps will copy it
|
||||||
|
if opts.addmetadata:
|
||||||
|
ydl.add_post_processor(FFmpegMetadataPP())
|
||||||
|
if opts.extractaudio:
|
||||||
|
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||||
|
if opts.recodevideo:
|
||||||
|
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||||
|
if opts.embedsubtitles:
|
||||||
|
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
||||||
|
|
||||||
|
# Update version
|
||||||
|
if opts.update_self:
|
||||||
|
update_self(ydl.to_screen, opts.verbose)
|
||||||
|
|
||||||
|
# Maybe do nothing
|
||||||
|
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
||||||
|
if not opts.update_self:
|
||||||
|
parser.error(u'you must provide at least one URL')
|
||||||
|
else:
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
if opts.verbose:
|
|
||||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
|
||||||
try:
|
try:
|
||||||
sp = subprocess.Popen(
|
if opts.load_info_filename is not None:
|
||||||
['git', 'rev-parse', '--short', 'HEAD'],
|
retcode = ydl.download_with_info_file(opts.load_info_filename)
|
||||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
else:
|
||||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
retcode = ydl.download(all_urls)
|
||||||
out, err = sp.communicate()
|
except MaxDownloadsReached:
|
||||||
out = out.decode().strip()
|
ydl.to_screen(u'--max-download limit reached, aborting.')
|
||||||
if re.match('[0-9a-f]+', out):
|
retcode = 101
|
||||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
sys.exc_clear()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
|
|
||||||
|
|
||||||
proxy_map = {}
|
|
||||||
for handler in opener.handlers:
|
|
||||||
if hasattr(handler, 'proxies'):
|
|
||||||
proxy_map.update(handler.proxies)
|
|
||||||
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
|
||||||
|
|
||||||
ydl.add_default_info_extractors()
|
|
||||||
|
|
||||||
# PostProcessors
|
|
||||||
# Add the metadata pp first, the other pps will copy it
|
|
||||||
if opts.addmetadata:
|
|
||||||
ydl.add_post_processor(FFmpegMetadataPP())
|
|
||||||
if opts.extractaudio:
|
|
||||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
|
||||||
if opts.recodevideo:
|
|
||||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
|
||||||
if opts.embedsubtitles:
|
|
||||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
|
||||||
|
|
||||||
# Update version
|
|
||||||
if opts.update_self:
|
|
||||||
update_self(ydl.to_screen, opts.verbose)
|
|
||||||
|
|
||||||
# Maybe do nothing
|
|
||||||
if len(all_urls) < 1:
|
|
||||||
if not opts.update_self:
|
|
||||||
parser.error(u'you must provide at least one URL')
|
|
||||||
else:
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
try:
|
|
||||||
retcode = ydl.download(all_urls)
|
|
||||||
except MaxDownloadsReached:
|
|
||||||
ydl.to_screen(u'--max-download limit reached, aborting.')
|
|
||||||
retcode = 101
|
|
||||||
|
|
||||||
# Dump cookie jar if requested
|
|
||||||
if opts.cookiefile is not None:
|
|
||||||
try:
|
|
||||||
jar.save()
|
|
||||||
except (IOError, OSError):
|
|
||||||
sys.exit(u'ERROR: unable to save cookie jar')
|
|
||||||
|
|
||||||
sys.exit(retcode)
|
sys.exit(retcode)
|
||||||
|
|
||||||
|
|
||||||
def _setup_opener(jar=None, opts=None, timeout=300):
|
|
||||||
if opts is None:
|
|
||||||
FakeOptions = collections.namedtuple(
|
|
||||||
'FakeOptions', ['proxy', 'no_check_certificate'])
|
|
||||||
opts = FakeOptions(proxy=None, no_check_certificate=False)
|
|
||||||
|
|
||||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
|
||||||
if opts.proxy is not None:
|
|
||||||
if opts.proxy == '':
|
|
||||||
proxies = {}
|
|
||||||
else:
|
|
||||||
proxies = {'http': opts.proxy, 'https': opts.proxy}
|
|
||||||
else:
|
|
||||||
proxies = compat_urllib_request.getproxies()
|
|
||||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
|
||||||
if 'http' in proxies and 'https' not in proxies:
|
|
||||||
proxies['https'] = proxies['http']
|
|
||||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
|
||||||
https_handler = make_HTTPS_handler(opts)
|
|
||||||
opener = compat_urllib_request.build_opener(
|
|
||||||
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
|
||||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
|
||||||
opener.addheaders = []
|
|
||||||
compat_urllib_request.install_opener(opener)
|
|
||||||
socket.setdefaulttimeout(timeout)
|
|
||||||
return opener
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv=None):
|
def main(argv=None):
|
||||||
try:
|
try:
|
||||||
_real_main(argv)
|
_real_main(argv)
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from .appletrailers import AppleTrailersIE
|
from .appletrailers import AppleTrailersIE
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
|
from .anitube import AnitubeIE
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
from .ard import ARDIE
|
from .ard import ARDIE
|
||||||
from .arte import (
|
from .arte import (
|
||||||
@ -7,9 +8,11 @@ from .arte import (
|
|||||||
ArteTVPlus7IE,
|
ArteTVPlus7IE,
|
||||||
ArteTVCreativeIE,
|
ArteTVCreativeIE,
|
||||||
ArteTVFutureIE,
|
ArteTVFutureIE,
|
||||||
|
ArteTVDDCIE,
|
||||||
)
|
)
|
||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .bandcamp import BandcampIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
@ -18,12 +21,15 @@ from .c56 import C56IE
|
|||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
|
from .clipfish import ClipfishIE
|
||||||
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
from .cnn import CNNIE
|
from .cnn import CNNIE
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
|
from .d8 import D8IE
|
||||||
from .dailymotion import (
|
from .dailymotion import (
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
@ -37,8 +43,10 @@ from .defense import DefenseGouvFrIE
|
|||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
|
from .eitb import EitbIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
|
from .extremetube import ExtremeTubeIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
from .fktv import (
|
from .fktv import (
|
||||||
@ -49,11 +57,12 @@ from .flickr import FlickrIE
|
|||||||
from .francetv import (
|
from .francetv import (
|
||||||
PluzzIE,
|
PluzzIE,
|
||||||
FranceTvInfoIE,
|
FranceTvInfoIE,
|
||||||
France2IE,
|
FranceTVIE,
|
||||||
GenerationQuoiIE
|
GenerationQuoiIE
|
||||||
)
|
)
|
||||||
from .freesound import FreesoundIE
|
from .freesound import FreesoundIE
|
||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
|
from .gamekings import GamekingsIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
@ -64,6 +73,7 @@ from .hotnewhiphop import HotNewHipHopIE
|
|||||||
from .howcast import HowcastIE
|
from .howcast import HowcastIE
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
from .ign import IGNIE, OneUPIE
|
from .ign import IGNIE, OneUPIE
|
||||||
|
from .imdb import ImdbIE
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
@ -72,29 +82,38 @@ from .jeuxvideo import JeuxVideoIE
|
|||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .justintv import JustinTVIE
|
from .justintv import JustinTVIE
|
||||||
from .kankan import KankanIE
|
from .kankan import KankanIE
|
||||||
|
from .keezmovies import KeezMoviesIE
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE
|
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mit import TechTVMITIE, MITIE
|
from .mit import TechTVMITIE, MITIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
|
from .mofosex import MofosexIE
|
||||||
from .mtv import MTVIE
|
from .mtv import MTVIE
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
|
from .myspace import MySpaceIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import NBCNewsIE
|
from .nbc import NBCNewsIE
|
||||||
|
from .ndtv import NDTVIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
|
from .niconico import NiconicoIE
|
||||||
|
from .ninegag import NineGagIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import ORFIE
|
from .orf import ORFIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .podomatic import PodomaticIE
|
||||||
|
from .pornhub import PornHubIE
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
|
from .pyvideo import PyvideoIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .ringtv import RingTVIE
|
from .ringtv import RingTVIE
|
||||||
@ -106,24 +125,40 @@ from .rutube import RutubeIE
|
|||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .slashdot import SlashdotIE
|
from .slashdot import SlashdotIE
|
||||||
from .slideshare import SlideshareIE
|
from .slideshare import SlideshareIE
|
||||||
|
from .smotri import (
|
||||||
|
SmotriIE,
|
||||||
|
SmotriCommunityIE,
|
||||||
|
SmotriUserIE,
|
||||||
|
SmotriBroadcastIE,
|
||||||
|
)
|
||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
||||||
from .southparkstudios import SouthParkStudiosIE
|
from .southparkstudios import (
|
||||||
|
SouthParkStudiosIE,
|
||||||
|
SouthparkDeIE,
|
||||||
|
)
|
||||||
|
from .space import SpaceIE
|
||||||
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE
|
from .spiegel import SpiegelIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .statigram import StatigramIE
|
from .statigram import StatigramIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
|
from .streamcloud import StreamcloudIE
|
||||||
from .sztvhu import SztvHuIE
|
from .sztvhu import SztvHuIE
|
||||||
from .teamcoco import TeamcocoIE
|
from .teamcoco import TeamcocoIE
|
||||||
from .techtalks import TechTalksIE
|
from .techtalks import TechTalksIE
|
||||||
from .ted import TEDIE
|
from .ted import TEDIE
|
||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
|
from .theplatform import ThePlatformIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
|
from .toutv import TouTvIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
|
from .tube8 import Tube8IE
|
||||||
from .tudou import TudouIE
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
|
from .tvp import TvpIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
@ -135,17 +170,31 @@ from .viddler import ViddlerIE
|
|||||||
from .videodetective import VideoDetectiveIE
|
from .videodetective import VideoDetectiveIE
|
||||||
from .videofyme import VideofyMeIE
|
from .videofyme import VideofyMeIE
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
from .vimeo import VimeoIE, VimeoChannelIE
|
from .vimeo import (
|
||||||
|
VimeoIE,
|
||||||
|
VimeoChannelIE,
|
||||||
|
VimeoUserIE,
|
||||||
|
VimeoAlbumIE,
|
||||||
|
VimeoGroupsIE,
|
||||||
|
)
|
||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
|
from .viki import VikiIE
|
||||||
|
from .vk import VKIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .websurg import WeBSurgIE
|
from .websurg import WeBSurgIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
|
from .wistia import WistiaIE
|
||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
from .xhamster import XHamsterIE
|
from .xhamster import XHamsterIE
|
||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xvideos import XVideosIE
|
from .xvideos import XVideosIE
|
||||||
from .yahoo import YahooIE, YahooSearchIE
|
from .xtube import XTubeIE
|
||||||
|
from .yahoo import (
|
||||||
|
YahooIE,
|
||||||
|
YahooNewsIE,
|
||||||
|
YahooSearchIE,
|
||||||
|
)
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
from .youku import YoukuIE
|
from .youku import YoukuIE
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
@ -153,6 +202,7 @@ from .youtube import (
|
|||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
YoutubeSearchIE,
|
YoutubeSearchIE,
|
||||||
|
YoutubeSearchDateIE,
|
||||||
YoutubeUserIE,
|
YoutubeUserIE,
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
YoutubeShowIE,
|
||||||
@ -161,6 +211,8 @@ from .youtube import (
|
|||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
|
YoutubeHistoryIE,
|
||||||
|
YoutubeTopListIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
@ -13,12 +13,12 @@ from ..utils import (
|
|||||||
|
|
||||||
class AddAnimeIE(InfoExtractor):
|
class AddAnimeIE(InfoExtractor):
|
||||||
|
|
||||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||||
IE_NAME = u'AddAnime'
|
IE_NAME = u'AddAnime'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||||
u'file': u'24MR3YO5SAS9.flv',
|
u'file': u'24MR3YO5SAS9.mp4',
|
||||||
u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
|
u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"description": u"One Piece 606",
|
u"description": u"One Piece 606",
|
||||||
u"title": u"One Piece 606"
|
u"title": u"One Piece 606"
|
||||||
@ -31,7 +31,8 @@ class AddAnimeIE(InfoExtractor):
|
|||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if not isinstance(ee.cause, compat_HTTPError):
|
if not isinstance(ee.cause, compat_HTTPError) or \
|
||||||
|
ee.cause.code != 503:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
redir_webpage = ee.cause.read().decode('utf-8')
|
redir_webpage = ee.cause.read().decode('utf-8')
|
||||||
@ -60,16 +61,26 @@ class AddAnimeIE(InfoExtractor):
|
|||||||
note=u'Confirming after redirect')
|
note=u'Confirming after redirect')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
|
formats = []
|
||||||
webpage, u'video file URL')
|
for format_id in ('normal', 'hq'):
|
||||||
|
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
||||||
|
video_url = self._search_regex(rex, webpage, u'video file URLx',
|
||||||
|
fatal=False)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': video_url,
|
||||||
|
})
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError(u'Cannot find any video format!')
|
||||||
video_title = self._og_search_title(webpage)
|
video_title = self._og_search_title(webpage)
|
||||||
video_description = self._og_search_description(webpage)
|
video_description = self._og_search_description(webpage)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'ext': 'flv',
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description
|
'description': video_description
|
||||||
}
|
}
|
||||||
|
53
youtube_dl/extractor/anitube.py
Normal file
53
youtube_dl/extractor/anitube.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class AnitubeIE(InfoExtractor):
|
||||||
|
IE_NAME = u'anitube.se'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.anitube.se/video/36621',
|
||||||
|
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
|
||||||
|
u'file': u'36621.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'36621',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Recorder to Randoseru 01',
|
||||||
|
},
|
||||||
|
u'skip': u'Blocked in the US',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||||
|
webpage, u'key')
|
||||||
|
|
||||||
|
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||||
|
key)
|
||||||
|
|
||||||
|
video_title = config_xml.find('title').text
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
video_url = config_xml.find('file')
|
||||||
|
if video_url is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'sd',
|
||||||
|
'url': video_url.text,
|
||||||
|
})
|
||||||
|
video_url = config_xml.find('filehd')
|
||||||
|
if video_url is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'hd',
|
||||||
|
'url': video_url.text,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'formats': formats
|
||||||
|
}
|
@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AppleTrailersIE(InfoExtractor):
|
class AppleTrailersIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
|
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||||
u"playlist": [
|
u"playlist": [
|
||||||
@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
|
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
|
||||||
|
|
||||||
info = {
|
playlist.append({
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'user_agent': 'QuickTime compatible (youtube-dl)',
|
'user_agent': 'QuickTime compatible (youtube-dl)',
|
||||||
}
|
})
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info['url'] = formats[-1]['url']
|
|
||||||
info['ext'] = formats[-1]['ext']
|
|
||||||
|
|
||||||
playlist.append(info)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
|||||||
class ArchiveOrgIE(InfoExtractor):
|
class ArchiveOrgIE(InfoExtractor):
|
||||||
IE_NAME = 'archive.org'
|
IE_NAME = 'archive.org'
|
||||||
IE_DESC = 'archive.org videos'
|
IE_DESC = 'archive.org videos'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
|
||||||
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
|
||||||
@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||||||
for f in formats:
|
for f in formats:
|
||||||
f['ext'] = determine_ext(f['url'])
|
f['ext'] = determine_ext(f['url'])
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
|
'thumbnail': data.get('misc', {}).get('image'),
|
||||||
}
|
}
|
||||||
thumbnail = data.get('misc', {}).get('image')
|
|
||||||
if thumbnail:
|
|
||||||
info['thumbnail'] = thumbnail
|
|
||||||
|
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -10,6 +9,8 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
|
compat_str,
|
||||||
|
get_element_by_attribute,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
# There are different sources of video in arte.tv, the extraction process
|
||||||
@ -17,8 +18,8 @@ from ..utils import (
|
|||||||
# add tests.
|
# add tests.
|
||||||
|
|
||||||
class ArteTvIE(InfoExtractor):
|
class ArteTvIE(InfoExtractor):
|
||||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
_VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||||
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||||
|
|
||||||
IE_NAME = u'arte.tv'
|
IE_NAME = u'arte.tv'
|
||||||
@ -68,7 +69,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
lang = mobj.group('lang')
|
lang = mobj.group('lang')
|
||||||
return self._extract_liveweb(url, name, lang)
|
return self._extract_liveweb(url, name, lang)
|
||||||
|
|
||||||
if re.search(self._LIVE_URL, video_id) is not None:
|
if re.search(self._LIVE_URL, url) is not None:
|
||||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||||
# self.extractLiveStream(url)
|
# self.extractLiveStream(url)
|
||||||
# return
|
# return
|
||||||
@ -77,8 +78,7 @@ class ArteTvIE(InfoExtractor):
|
|||||||
"""Extract from videos.arte.tv"""
|
"""Extract from videos.arte.tv"""
|
||||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||||
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
|
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||||
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
|
|
||||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||||
config_xml_url = config_node.attrib['ref']
|
config_xml_url = config_node.attrib['ref']
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||||
@ -108,13 +108,12 @@ class ArteTvIE(InfoExtractor):
|
|||||||
"""Extract form http://liveweb.arte.tv/"""
|
"""Extract form http://liveweb.arte.tv/"""
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
|
||||||
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
||||||
video_id, u'Downloading information')
|
video_id, u'Downloading information')
|
||||||
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
event_doc = config_doc.find('event')
|
event_doc = config_doc.find('event')
|
||||||
url_node = event_doc.find('video').find('urlHd')
|
url_node = event_doc.find('video').find('urlHd')
|
||||||
if url_node is None:
|
if url_node is None:
|
||||||
url_node = video_doc.find('urlSd')
|
url_node = event_doc.find('urlSd')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
||||||
@ -144,7 +143,9 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||||
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
|
||||||
|
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
info = json.loads(json_info)
|
info = json.loads(json_info)
|
||||||
@ -158,7 +159,9 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
}
|
}
|
||||||
|
|
||||||
formats = player_info['VSR'].values()
|
all_formats = player_info['VSR'].values()
|
||||||
|
# Some formats use the m3u8 protocol
|
||||||
|
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
|
||||||
def _match_lang(f):
|
def _match_lang(f):
|
||||||
if f.get('versionCode') is None:
|
if f.get('versionCode') is None:
|
||||||
return True
|
return True
|
||||||
@ -170,24 +173,39 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||||
# Some formats may not be in the same language as the url
|
# Some formats may not be in the same language as the url
|
||||||
formats = filter(_match_lang, formats)
|
formats = filter(_match_lang, all_formats)
|
||||||
# Some formats use the m3u8 protocol
|
|
||||||
formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
|
|
||||||
# We order the formats by quality
|
|
||||||
formats = list(formats) # in python3 filter returns an iterator
|
formats = list(formats) # in python3 filter returns an iterator
|
||||||
|
if not formats:
|
||||||
|
# Some videos are only available in the 'Originalversion'
|
||||||
|
# they aren't tagged as being in French or German
|
||||||
|
if all(f['versionCode'] == 'VO' for f in all_formats):
|
||||||
|
formats = all_formats
|
||||||
|
else:
|
||||||
|
raise ExtractorError(u'The formats list is empty')
|
||||||
|
|
||||||
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
||||||
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
def sort_key(f):
|
||||||
|
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
||||||
else:
|
else:
|
||||||
sort_key = lambda f: int(f.get('height',-1))
|
def sort_key(f):
|
||||||
|
return (
|
||||||
|
# Sort first by quality
|
||||||
|
int(f.get('height',-1)),
|
||||||
|
int(f.get('bitrate',-1)),
|
||||||
|
# The original version with subtitles has lower relevance
|
||||||
|
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
||||||
|
# The version with sourds/mal subtitles has also lower relevance
|
||||||
|
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
||||||
|
)
|
||||||
formats = sorted(formats, key=sort_key)
|
formats = sorted(formats, key=sort_key)
|
||||||
# Prefer videos without subtitles in the same language
|
|
||||||
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
|
|
||||||
# Pick the best quality
|
|
||||||
def _format(format_info):
|
def _format(format_info):
|
||||||
quality = format_info['quality']
|
quality = ''
|
||||||
m_quality = re.match(r'\w*? - (\d*)p', quality)
|
height = format_info.get('height')
|
||||||
if m_quality is not None:
|
if height is not None:
|
||||||
quality = m_quality.group(1)
|
quality = compat_str(height)
|
||||||
|
bitrate = format_info.get('bitrate')
|
||||||
|
if bitrate is not None:
|
||||||
|
quality += '-%d' % bitrate
|
||||||
if format_info.get('versionCode') is not None:
|
if format_info.get('versionCode') is not None:
|
||||||
format_id = u'%s-%s' % (quality, format_info['versionCode'])
|
format_id = u'%s-%s' % (quality, format_info['versionCode'])
|
||||||
else:
|
else:
|
||||||
@ -196,7 +214,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'format_note': format_info.get('versionLibelle'),
|
'format_note': format_info.get('versionLibelle'),
|
||||||
'width': format_info.get('width'),
|
'width': format_info.get('width'),
|
||||||
'height': format_info.get('height'),
|
'height': height,
|
||||||
}
|
}
|
||||||
if format_info['mediaType'] == u'rtmp':
|
if format_info['mediaType'] == u'rtmp':
|
||||||
info['url'] = format_info['streamer']
|
info['url'] = format_info['streamer']
|
||||||
@ -242,3 +260,35 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
|||||||
webpage = self._download_webpage(url, anchor_id)
|
webpage = self._download_webpage(url, anchor_id)
|
||||||
row = get_element_by_id(anchor_id, webpage)
|
row = get_element_by_id(anchor_id, webpage)
|
||||||
return self._extract_from_webpage(row, anchor_id, lang)
|
return self._extract_from_webpage(row, anchor_id, lang)
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||||
|
IE_NAME = u'arte.tv:ddc'
|
||||||
|
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://ddc.arte.tv/folge/neues-aus-mauretanien',
|
||||||
|
u'file': u'049881-009_PLUS7-D.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Mit offenen Karten',
|
||||||
|
u'description': u'md5:57929b0eaeddeb8a0c983f58e9ebd3b6',
|
||||||
|
u'upload_date': u'20131207',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, lang = self._extract_url_info(url)
|
||||||
|
if lang == 'folge':
|
||||||
|
lang = 'de'
|
||||||
|
elif lang == 'emission':
|
||||||
|
lang = 'fr'
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage)
|
||||||
|
script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url')
|
||||||
|
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
|
||||||
|
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
|
||||||
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
import os.path
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_urlparse,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
class AUEngineIE(InfoExtractor):
|
class AUEngineIE(InfoExtractor):
|
||||||
@ -16,7 +16,7 @@ class AUEngineIE(InfoExtractor):
|
|||||||
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
|
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?'
|
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -25,22 +25,25 @@ class AUEngineIE(InfoExtractor):
|
|||||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
||||||
webpage, u'title')
|
webpage, u'title')
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage)
|
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
||||||
links = [compat_urllib_parse.unquote(l) for l in links]
|
links = map(compat_urllib_parse.unquote, links)
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
video_url = None
|
||||||
for link in links:
|
for link in links:
|
||||||
root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path)
|
if link.endswith('.png'):
|
||||||
if pathext == '.png':
|
|
||||||
thumbnail = link
|
thumbnail = link
|
||||||
elif pathext == '.mp4':
|
elif '/videos/' in link:
|
||||||
url = link
|
video_url = link
|
||||||
ext = pathext
|
if not video_url:
|
||||||
|
raise ExtractorError(u'Could not find video URL')
|
||||||
|
ext = u'.' + determine_ext(video_url)
|
||||||
if ext == title[-len(ext):]:
|
if ext == title[-len(ext):]:
|
||||||
title = title[:-len(ext)]
|
title = title[:-len(ext)]
|
||||||
ext = ext[1:]
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': url,
|
'url': video_url,
|
||||||
'ext': ext,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}]
|
}
|
||||||
|
86
youtube_dl/extractor/bambuser.py
Normal file
86
youtube_dl/extractor/bambuser.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BambuserIE(InfoExtractor):
|
||||||
|
IE_NAME = u'bambuser'
|
||||||
|
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
|
||||||
|
_API_KEY = '005f64509e19a868399060af746a00aa'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://bambuser.com/v/4050584',
|
||||||
|
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
|
||||||
|
#u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'4050584',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'Education engineering days - lightning talks',
|
||||||
|
u'duration': 3741,
|
||||||
|
u'uploader': u'pixelversity',
|
||||||
|
u'uploader_id': u'344706',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# It doesn't respect the 'Range' header, it would download the whole video
|
||||||
|
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
|
||||||
|
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
|
||||||
|
info_json = self._download_webpage(info_url, video_id)
|
||||||
|
info = json.loads(info_json)['result']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info['title'],
|
||||||
|
'url': info['url'],
|
||||||
|
'thumbnail': info.get('preview'),
|
||||||
|
'duration': int(info['length']),
|
||||||
|
'view_count': int(info['views_total']),
|
||||||
|
'uploader': info['username'],
|
||||||
|
'uploader_id': info['uid'],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BambuserChannelIE(InfoExtractor):
|
||||||
|
IE_NAME = u'bambuser:channel'
|
||||||
|
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
||||||
|
# The maximum number we can get with each request
|
||||||
|
_STEP = 50
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
user = mobj.group('user')
|
||||||
|
urls = []
|
||||||
|
last_id = ''
|
||||||
|
for i in itertools.count(1):
|
||||||
|
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
|
||||||
|
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||||
|
'&method=broadcast&format=json&vid_older_than={last}'
|
||||||
|
).format(user=user, count=self._STEP, last=last_id)
|
||||||
|
req = compat_urllib_request.Request(req_url)
|
||||||
|
# Without setting this header, we wouldn't get any result
|
||||||
|
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
|
||||||
|
info_json = self._download_webpage(req, user,
|
||||||
|
u'Downloading page %d' % i)
|
||||||
|
results = json.loads(info_json)['result']
|
||||||
|
if len(results) == 0:
|
||||||
|
break
|
||||||
|
last_id = results[-1]['vid']
|
||||||
|
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': user,
|
||||||
|
'entries': urls,
|
||||||
|
}
|
@ -3,13 +3,16 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
|
IE_NAME = u'Bandcamp'
|
||||||
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
u'file': u'1812978515.mp3',
|
u'file': u'1812978515.mp3',
|
||||||
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
|
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
|
||||||
@ -17,7 +20,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
||||||
},
|
},
|
||||||
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -26,6 +29,23 @@ class BandcampIE(InfoExtractor):
|
|||||||
# We get the link to the free download page
|
# We get the link to the free download page
|
||||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||||
if m_download is None:
|
if m_download is None:
|
||||||
|
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||||
|
if m_trackinfo:
|
||||||
|
json_code = m_trackinfo.group(1)
|
||||||
|
data = json.loads(json_code)
|
||||||
|
|
||||||
|
for d in data:
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'format_id',
|
||||||
|
'url': format_url,
|
||||||
|
'ext': format_id.partition('-')[0]
|
||||||
|
} for format_id, format_url in sorted(d['file'].items())]
|
||||||
|
return {
|
||||||
|
'id': compat_str(d['id']),
|
||||||
|
'title': d['title'],
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
else:
|
||||||
raise ExtractorError(u'No free songs found')
|
raise ExtractorError(u'No free songs found')
|
||||||
|
|
||||||
download_link = m_download.group(1)
|
download_link = m_download.group(1)
|
||||||
@ -61,3 +81,49 @@ class BandcampIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
return [track_info]
|
return [track_info]
|
||||||
|
|
||||||
|
|
||||||
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
|
IE_NAME = u'Bandcamp:album'
|
||||||
|
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
|
u'playlist': [
|
||||||
|
{
|
||||||
|
u'file': u'1353101989.mp3',
|
||||||
|
u'md5': u'39bc1eded3476e927c724321ddf116cf',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Intro',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'file': u'38097443.mp3',
|
||||||
|
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Kero One - Keep It Alive (Blazo remix)',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
u'params': {
|
||||||
|
u'playlistend': 2
|
||||||
|
},
|
||||||
|
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||||
|
if not tracks_paths:
|
||||||
|
raise ExtractorError(u'The page doesn\'t contain any track')
|
||||||
|
entries = [
|
||||||
|
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||||
|
for t_path in tracks_paths]
|
||||||
|
title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': title,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
@ -51,8 +51,7 @@ class BlipTVIE(InfoExtractor):
|
|||||||
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
|
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
|
||||||
urlp = compat_urllib_parse_urlparse(url)
|
urlp = compat_urllib_parse_urlparse(url)
|
||||||
if urlp.path.startswith('/play/'):
|
if urlp.path.startswith('/play/'):
|
||||||
request = compat_urllib_request.Request(url)
|
response = self._request_webpage(url, None, False)
|
||||||
response = compat_urllib_request.urlopen(request)
|
|
||||||
redirecturl = response.geturl()
|
redirecturl = response.geturl()
|
||||||
rurlp = compat_urllib_parse_urlparse(redirecturl)
|
rurlp = compat_urllib_parse_urlparse(redirecturl)
|
||||||
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
|
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
|
||||||
@ -69,25 +68,23 @@ class BlipTVIE(InfoExtractor):
|
|||||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||||
self.report_extraction(mobj.group(1))
|
self.report_extraction(mobj.group(1))
|
||||||
info = None
|
info = None
|
||||||
try:
|
urlh = self._request_webpage(request, None, False,
|
||||||
urlh = compat_urllib_request.urlopen(request)
|
u'unable to download video info webpage')
|
||||||
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
||||||
basename = url.split('/')[-1]
|
basename = url.split('/')[-1]
|
||||||
title,ext = os.path.splitext(basename)
|
title,ext = os.path.splitext(basename)
|
||||||
title = title.decode('UTF-8')
|
title = title.decode('UTF-8')
|
||||||
ext = ext.replace('.', '')
|
ext = ext.replace('.', '')
|
||||||
self.report_direct_download(title)
|
self.report_direct_download(title)
|
||||||
info = {
|
info = {
|
||||||
'id': title,
|
'id': title,
|
||||||
'url': url,
|
'url': url,
|
||||||
'uploader': None,
|
'uploader': None,
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'urlhandle': urlh
|
'urlhandle': urlh
|
||||||
}
|
}
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
|
|
||||||
if info is None: # Regular URL
|
if info is None: # Regular URL
|
||||||
try:
|
try:
|
||||||
json_code_bytes = urlh.read()
|
json_code_bytes = urlh.read()
|
||||||
|
@ -4,7 +4,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class BloombergIE(InfoExtractor):
|
class BloombergIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
|
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||||
|
@ -9,10 +9,13 @@ from ..utils import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BrightcoveIE(InfoExtractor):
|
class BrightcoveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
||||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||||
@ -23,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
|
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
|
||||||
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
|
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
|
||||||
u'file': u'2371591881001.mp4',
|
u'file': u'2371591881001.mp4',
|
||||||
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
|
u'md5': u'8eccab865181d29ec2958f32a6a754f5',
|
||||||
u'note': u'Test Brightcove downloads and detection in GenericIE',
|
u'note': u'Test Brightcove downloads and detection in GenericIE',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
||||||
@ -41,6 +44,29 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
u'uploader': u'Oracle',
|
u'uploader': u'Oracle',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
||||||
|
u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'2750934548001',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'This Bracelet Acts as a Personal Thermostat',
|
||||||
|
u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||||
|
u'uploader': u'Mashable',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# test that the default referer works
|
||||||
|
# from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
|
||||||
|
u'url': u'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'2878862109001',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Lost in Motion II',
|
||||||
|
u'description': u'md5:363109c02998fee92ec02211bd8000df',
|
||||||
|
u'uploader': u'National Ballet of Canada',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -61,31 +87,65 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
params = {'flashID': object_doc.attrib['id'],
|
params = {'flashID': object_doc.attrib['id'],
|
||||||
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
||||||
}
|
}
|
||||||
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
|
def find_param(name):
|
||||||
|
node = find_xpath_attr(object_doc, './param', 'name', name)
|
||||||
|
if node is not None:
|
||||||
|
return node.attrib['value']
|
||||||
|
return None
|
||||||
|
playerKey = find_param('playerKey')
|
||||||
# Not all pages define this value
|
# Not all pages define this value
|
||||||
if playerKey is not None:
|
if playerKey is not None:
|
||||||
params['playerKey'] = playerKey.attrib['value']
|
params['playerKey'] = playerKey
|
||||||
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
|
# The three fields hold the id of the video
|
||||||
|
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
||||||
if videoPlayer is not None:
|
if videoPlayer is not None:
|
||||||
params['@videoPlayer'] = videoPlayer.attrib['value']
|
params['@videoPlayer'] = videoPlayer
|
||||||
|
linkBase = find_param('linkBaseURL')
|
||||||
|
if linkBase is not None:
|
||||||
|
params['linkBaseURL'] = linkBase
|
||||||
data = compat_urllib_parse.urlencode(params)
|
data = compat_urllib_parse.urlencode(params)
|
||||||
return cls._FEDERATED_URL_TEMPLATE % data
|
return cls._FEDERATED_URL_TEMPLATE % data
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_brightcove_url(cls, webpage):
|
||||||
|
"""Try to extract the brightcove url from the wepbage, returns None
|
||||||
|
if it can't be found
|
||||||
|
"""
|
||||||
|
m_brightcove = re.search(
|
||||||
|
r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
|
||||||
|
webpage, re.DOTALL)
|
||||||
|
if m_brightcove is not None:
|
||||||
|
return cls._build_brighcove_url(m_brightcove.group())
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
# Change the 'videoId' and others field to '@videoPlayer'
|
||||||
|
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
|
||||||
|
# Change bckey (used by bcove.me urls) to playerKey
|
||||||
|
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
query_str = mobj.group('query')
|
query_str = mobj.group('query')
|
||||||
query = compat_urlparse.parse_qs(query_str)
|
query = compat_urlparse.parse_qs(query_str)
|
||||||
|
|
||||||
videoPlayer = query.get('@videoPlayer')
|
videoPlayer = query.get('@videoPlayer')
|
||||||
if videoPlayer:
|
if videoPlayer:
|
||||||
return self._get_video_info(videoPlayer[0], query_str)
|
return self._get_video_info(videoPlayer[0], query_str, query,
|
||||||
|
# We set the original url as the default 'Referer' header
|
||||||
|
referer=url)
|
||||||
else:
|
else:
|
||||||
player_key = query['playerKey']
|
player_key = query['playerKey']
|
||||||
return self._get_playlist_info(player_key[0])
|
return self._get_playlist_info(player_key[0])
|
||||||
|
|
||||||
def _get_video_info(self, video_id, query):
|
def _get_video_info(self, video_id, query_str, query, referer=None):
|
||||||
request_url = self._FEDERATED_URL_TEMPLATE % query
|
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||||
webpage = self._download_webpage(request_url, video_id)
|
req = compat_urllib_request.Request(request_url)
|
||||||
|
linkBase = query.get('linkBaseURL')
|
||||||
|
if linkBase is not None:
|
||||||
|
referer = linkBase[0]
|
||||||
|
if referer is not None:
|
||||||
|
req.add_header('Referer', referer)
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
||||||
@ -109,7 +169,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_video_info(self, video_info):
|
def _extract_video_info(self, video_info):
|
||||||
info = {
|
info = {
|
||||||
'id': video_info['id'],
|
'id': compat_str(video_info['id']),
|
||||||
'title': video_info['displayName'],
|
'title': video_info['displayName'],
|
||||||
'description': video_info.get('shortDescription'),
|
'description': video_info.get('shortDescription'),
|
||||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||||
@ -119,15 +179,14 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
renditions = video_info.get('renditions')
|
renditions = video_info.get('renditions')
|
||||||
if renditions:
|
if renditions:
|
||||||
renditions = sorted(renditions, key=lambda r: r['size'])
|
renditions = sorted(renditions, key=lambda r: r['size'])
|
||||||
best_format = renditions[-1]
|
info['formats'] = [{
|
||||||
info.update({
|
'url': rend['defaultURL'],
|
||||||
'url': best_format['defaultURL'],
|
'height': rend.get('frameHeight'),
|
||||||
'ext': 'mp4',
|
'width': rend.get('frameWidth'),
|
||||||
})
|
} for rend in renditions]
|
||||||
elif video_info.get('FLVFullLengthURL') is not None:
|
elif video_info.get('FLVFullLengthURL') is not None:
|
||||||
info.update({
|
info.update({
|
||||||
'url': video_info['FLVFullLengthURL'],
|
'url': video_info['FLVFullLengthURL'],
|
||||||
'ext': 'flv',
|
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
|
raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
|
||||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
class Canalc2IE(InfoExtractor):
|
class Canalc2IE(InfoExtractor):
|
||||||
IE_NAME = 'canalc2.tv'
|
IE_NAME = 'canalc2.tv'
|
||||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
|
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||||
@ -18,7 +18,9 @@ class Canalc2IE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = re.match(self._VALID_URL, url).group(1)
|
video_id = re.match(self._VALID_URL, url).group('id')
|
||||||
|
# We need to set the voir field for getting the file name
|
||||||
|
url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
file_name = self._search_regex(
|
file_name = self._search_regex(
|
||||||
r"so\.addVariable\('file','(.*?)'\);",
|
r"so\.addVariable\('file','(.*?)'\);",
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
class CanalplusIE(InfoExtractor):
|
class CanalplusIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||||
@ -25,16 +25,15 @@ class CanalplusIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.groupdict().get('id')
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
webpage = self._download_webpage(url, mobj.group('path'))
|
webpage = self._download_webpage(url, mobj.group('path'))
|
||||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||||
info_page = self._download_webpage(info_url,video_id,
|
doc = self._download_xml(info_url,video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
|
|
||||||
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
video_info = [video for video in doc if video.find('ID').text == video_id][0]
|
||||||
infos = video_info.find('INFOS')
|
infos = video_info.find('INFOS')
|
||||||
media = video_info.find('MEDIA')
|
media = video_info.find('MEDIA')
|
||||||
|
@ -41,7 +41,7 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
webpage_url = u'http://' + mobj.group('url')
|
webpage_url = u'http://' + mobj.group('url')
|
||||||
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
|
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
|
||||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
raise ExtractorError(u'Can\'t extract embed url and video id')
|
raise ExtractorError(u'Can\'t extract embed url and video id')
|
||||||
playerdata_url = mobj.group(u'embed_url')
|
playerdata_url = mobj.group(u'embed_url')
|
||||||
@ -55,30 +55,32 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
video_description = None
|
video_description = None
|
||||||
|
|
||||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||||
base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://.*?)/(?:vod|Cinemassacre)\'',
|
url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url')
|
||||||
playerdata, u'base_url')
|
|
||||||
base_url += '/Cinemassacre/'
|
sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file')
|
||||||
# Important: The file names in playerdata are not used by the player and even wrong for some videos
|
hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file')
|
||||||
sd_file = 'Cinemassacre-%s_high.mp4' % video_id
|
video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False)
|
||||||
hd_file = 'Cinemassacre-%s.mp4' % video_id
|
|
||||||
video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id
|
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'url': base_url + sd_file,
|
'url': url,
|
||||||
|
'play_path': 'mp4:' + sd_file,
|
||||||
|
'rtmp_live': True, # workaround
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format': 'sd',
|
'format': 'sd',
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': base_url + hd_file,
|
'url': url,
|
||||||
|
'play_path': 'mp4:' + hd_file,
|
||||||
|
'rtmp_live': True, # workaround
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format': 'hd',
|
'format': 'hd',
|
||||||
'format_id': 'hd',
|
'format_id': 'hd',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@ -86,6 +88,3 @@ class CinemassacreIE(InfoExtractor):
|
|||||||
'upload_date': video_date,
|
'upload_date': video_date,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
return info
|
|
||||||
|
58
youtube_dl/extractor/clipfish.py
Normal file
58
youtube_dl/extractor/clipfish.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import re
|
||||||
|
import time
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class ClipfishIE(InfoExtractor):
|
||||||
|
IE_NAME = u'clipfish'
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||||
|
u'file': u'3966754.mp4',
|
||||||
|
u'md5': u'2521cd644e862936cf2e698206e47385',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'FIFA 14 - E3 2013 Trailer',
|
||||||
|
u'duration': 82,
|
||||||
|
},
|
||||||
|
u'skip': 'Blocked in the US'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||||
|
(video_id, int(time.time())))
|
||||||
|
doc = self._download_xml(
|
||||||
|
info_url, video_id, note=u'Downloading info page')
|
||||||
|
title = doc.find('title').text
|
||||||
|
video_url = doc.find('filename').text
|
||||||
|
if video_url is None:
|
||||||
|
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||||
|
raise ExtractorError(u'Cannot find video URL in document %r' %
|
||||||
|
xml_bytes)
|
||||||
|
thumbnail = doc.find('imageurl').text
|
||||||
|
duration_str = doc.find('duration').text
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
||||||
|
duration_str)
|
||||||
|
if m:
|
||||||
|
duration = (
|
||||||
|
(int(m.group('hours')) * 60 * 60) +
|
||||||
|
(int(m.group('minutes')) * 60) +
|
||||||
|
(int(m.group('seconds')))
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
duration = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
52
youtube_dl/extractor/clipsyndicate.py
Normal file
52
youtube_dl/extractor/clipsyndicate.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
find_xpath_attr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClipsyndicateIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||||
|
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'4629301',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Brick Briscoe',
|
||||||
|
u'duration': 612,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
js_player = self._download_webpage(
|
||||||
|
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||||
|
video_id, u'Downlaoding player')
|
||||||
|
# it includes a required token
|
||||||
|
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
|
||||||
|
|
||||||
|
playlist_page = self._download_webpage(
|
||||||
|
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||||
|
video_id, u'Downloading video info')
|
||||||
|
# Fix broken xml
|
||||||
|
playlist_page = re.sub('&', '&', playlist_page)
|
||||||
|
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
|
||||||
|
|
||||||
|
track_doc = pdoc.find('trackList/track')
|
||||||
|
def find_param(name):
|
||||||
|
node = find_xpath_attr(track_doc, './/param', 'name', name)
|
||||||
|
if node is not None:
|
||||||
|
return node.attrib['value']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': find_param('title'),
|
||||||
|
'url': track_doc.find('location').text,
|
||||||
|
'thumbnail': find_param('thumbnail'),
|
||||||
|
'duration': int(find_param('duration')),
|
||||||
|
}
|
@ -1,12 +1,11 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
|
|||||||
path = mobj.group('path')
|
path = mobj.group('path')
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
||||||
info_xml = self._download_webpage(info_url, page_title)
|
info = self._download_xml(info_url, page_title)
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for f in info.findall('files/file'):
|
for f in info.findall('files/file'):
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
mdoc = self._download_xml(xmlUrl, video_id,
|
||||||
u'Downloading info XML',
|
u'Downloading info XML',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
|
||||||
try:
|
try:
|
||||||
videoNode = mdoc.findall('./video')[0]
|
videoNode = mdoc.findall('./video')[0]
|
||||||
youtubeIdNode = videoNode.find('./youtubeID')
|
youtubeIdNode = videoNode.find('./youtubeID')
|
||||||
@ -65,16 +63,13 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
|
|
||||||
if next_url.endswith(u'manifest.f4m'):
|
if next_url.endswith(u'manifest.f4m'):
|
||||||
manifest_url = next_url + '?hdcore=2.10.3'
|
manifest_url = next_url + '?hdcore=2.10.3'
|
||||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
adoc = self._download_xml(manifest_url, video_id,
|
||||||
u'Downloading XML manifest',
|
u'Downloading XML manifest',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
|
||||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
|
||||||
try:
|
try:
|
||||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
|
||||||
node_id = media_node.attrib['url']
|
|
||||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||||
except IndexError as err:
|
except IndexError:
|
||||||
raise ExtractorError(u'Invalid manifest file')
|
raise ExtractorError(u'Invalid manifest file')
|
||||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@ -11,7 +11,31 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(InfoExtractor):
|
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
|
||||||
|
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||||
|
u'md5': u'4167875aae411f903b751a21f357f1ee',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
|
||||||
|
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
|
||||||
|
webpage, u'mgid')
|
||||||
|
return self._get_videos_info(mgid)
|
||||||
|
|
||||||
|
|
||||||
|
class ComedyCentralShowsIE(InfoExtractor):
|
||||||
IE_DESC = u'The Daily Show / Colbert Report'
|
IE_DESC = u'The Daily Show / Colbert Report'
|
||||||
# urls can be abbreviations like :thedailyshow or :colbert
|
# urls can be abbreviations like :thedailyshow or :colbert
|
||||||
# urls for episodes like:
|
# urls for episodes like:
|
||||||
@ -127,13 +151,12 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
|
|
||||||
uri = mMovieParams[0][1]
|
uri = mMovieParams[0][1]
|
||||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
||||||
indexXml = self._download_webpage(indexUrl, epTitle,
|
idoc = self._download_xml(indexUrl, epTitle,
|
||||||
u'Downloading show index',
|
u'Downloading show index',
|
||||||
u'unable to download episode index')
|
u'unable to download episode index')
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
idoc = xml.etree.ElementTree.fromstring(indexXml)
|
|
||||||
itemEls = idoc.findall('.//item')
|
itemEls = idoc.findall('.//item')
|
||||||
for partNum,itemEl in enumerate(itemEls):
|
for partNum,itemEl in enumerate(itemEls):
|
||||||
mediaId = itemEl.findall('./guid')[0].text
|
mediaId = itemEl.findall('./guid')[0].text
|
||||||
@ -144,10 +167,9 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
|
|
||||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||||
configXml = self._download_webpage(configUrl, epTitle,
|
cdoc = self._download_xml(configUrl, epTitle,
|
||||||
u'Downloading configuration for %s' % shortMediaId)
|
u'Downloading configuration for %s' % shortMediaId)
|
||||||
|
|
||||||
cdoc = xml.etree.ElementTree.fromstring(configXml)
|
|
||||||
turls = []
|
turls = []
|
||||||
for rendition in cdoc.findall('.//rendition'):
|
for rendition in cdoc.findall('.//rendition'):
|
||||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||||
@ -169,7 +191,7 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
|
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
|
||||||
info = {
|
results.append({
|
||||||
'id': shortMediaId,
|
'id': shortMediaId,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': showId,
|
'uploader': showId,
|
||||||
@ -177,11 +199,6 @@ class ComedyCentralIE(InfoExtractor):
|
|||||||
'title': effTitle,
|
'title': effTitle,
|
||||||
'thumbnail': None,
|
'thumbnail': None,
|
||||||
'description': compat_str(officialTitle),
|
'description': compat_str(officialTitle),
|
||||||
}
|
})
|
||||||
|
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(info['formats'][-1])
|
|
||||||
|
|
||||||
results.append(info)
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
@ -4,20 +4,22 @@ import re
|
|||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import netrc
|
import netrc
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_request,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
|
|
||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
|
sanitize_filename,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class InfoExtractor(object):
|
class InfoExtractor(object):
|
||||||
"""Information Extractor class.
|
"""Information Extractor class.
|
||||||
|
|
||||||
@ -53,6 +55,9 @@ class InfoExtractor(object):
|
|||||||
subtitles: The subtitle file contents as a dictionary in the format
|
subtitles: The subtitle file contents as a dictionary in the format
|
||||||
{language: subtitles}.
|
{language: subtitles}.
|
||||||
view_count: How many users have watched the video on the platform.
|
view_count: How many users have watched the video on the platform.
|
||||||
|
like_count: Number of positive ratings of the video
|
||||||
|
dislike_count: Number of negative ratings of the video
|
||||||
|
comment_count: Number of comments on the video
|
||||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||||
like returned by urllib.request.urlopen
|
like returned by urllib.request.urlopen
|
||||||
age_limit: Age restriction for the video, as an integer (years)
|
age_limit: Age restriction for the video, as an integer (years)
|
||||||
@ -62,7 +67,7 @@ class InfoExtractor(object):
|
|||||||
* ext Will be calculated from url if missing
|
* ext Will be calculated from url if missing
|
||||||
* format A human-readable description of the format
|
* format A human-readable description of the format
|
||||||
("mp4 container with h264/opus").
|
("mp4 container with h264/opus").
|
||||||
Calculated from the format_id, width, height
|
Calculated from the format_id, width, height.
|
||||||
and format_note fields if missing.
|
and format_note fields if missing.
|
||||||
* format_id A short description of the format
|
* format_id A short description of the format
|
||||||
("mp4_h264_opus" or "19")
|
("mp4_h264_opus" or "19")
|
||||||
@ -70,6 +75,14 @@ class InfoExtractor(object):
|
|||||||
("3D" or "DASH video")
|
("3D" or "DASH video")
|
||||||
* width Width of the video, if known
|
* width Width of the video, if known
|
||||||
* height Height of the video, if known
|
* height Height of the video, if known
|
||||||
|
* abr Average audio bitrate in KBit/s
|
||||||
|
* acodec Name of the audio codec in use
|
||||||
|
* vbr Average video bitrate in KBit/s
|
||||||
|
* vcodec Name of the video codec in use
|
||||||
|
* filesize The number of bytes, if known in advance
|
||||||
|
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||||
|
should allow to get the same result again. (It will be set
|
||||||
|
by YoutubeDL if it's missing)
|
||||||
|
|
||||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||||
|
|
||||||
@ -141,27 +154,38 @@ class InfoExtractor(object):
|
|||||||
def IE_NAME(self):
|
def IE_NAME(self):
|
||||||
return type(self).__name__[:-2]
|
return type(self).__name__[:-2]
|
||||||
|
|
||||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
""" Returns the response handle """
|
""" Returns the response handle """
|
||||||
if note is None:
|
if note is None:
|
||||||
self.report_download_webpage(video_id)
|
self.report_download_webpage(video_id)
|
||||||
elif note is not False:
|
elif note is not False:
|
||||||
self.to_screen(u'%s: %s' % (video_id, note))
|
if video_id is None:
|
||||||
|
self.to_screen(u'%s' % (note,))
|
||||||
|
else:
|
||||||
|
self.to_screen(u'%s: %s' % (video_id, note))
|
||||||
try:
|
try:
|
||||||
return compat_urllib_request.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
if errnote is None:
|
if errnote is None:
|
||||||
errnote = u'Unable to download webpage'
|
errnote = u'Unable to download webpage'
|
||||||
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
|
errmsg = u'%s: %s' % (errnote, compat_str(err))
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning(errmsg)
|
||||||
|
return False
|
||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
""" Returns a tuple (page content as string, URL handle) """
|
""" Returns a tuple (page content as string, URL handle) """
|
||||||
|
|
||||||
# Strip hashes from the URL (#1038)
|
# Strip hashes from the URL (#1038)
|
||||||
if isinstance(url_or_request, (compat_str, str)):
|
if isinstance(url_or_request, (compat_str, str)):
|
||||||
url_or_request = url_or_request.partition('#')[0]
|
url_or_request = url_or_request.partition('#')[0]
|
||||||
|
|
||||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
|
||||||
|
if urlh is False:
|
||||||
|
assert not fatal
|
||||||
|
return False
|
||||||
content_type = urlh.headers.get('Content-Type', '')
|
content_type = urlh.headers.get('Content-Type', '')
|
||||||
webpage_bytes = urlh.read()
|
webpage_bytes = urlh.read()
|
||||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||||
@ -182,12 +206,34 @@ class InfoExtractor(object):
|
|||||||
self.to_screen(u'Dumping request to ' + url)
|
self.to_screen(u'Dumping request to ' + url)
|
||||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||||
self._downloader.to_screen(dump)
|
self._downloader.to_screen(dump)
|
||||||
|
if self._downloader.params.get('write_pages', False):
|
||||||
|
try:
|
||||||
|
url = url_or_request.get_full_url()
|
||||||
|
except AttributeError:
|
||||||
|
url = url_or_request
|
||||||
|
raw_filename = ('%s_%s.dump' % (video_id, url))
|
||||||
|
filename = sanitize_filename(raw_filename, restricted=True)
|
||||||
|
self.to_screen(u'Saving request to ' + filename)
|
||||||
|
with open(filename, 'wb') as outf:
|
||||||
|
outf.write(webpage_bytes)
|
||||||
|
|
||||||
content = webpage_bytes.decode(encoding, 'replace')
|
content = webpage_bytes.decode(encoding, 'replace')
|
||||||
return (content, urlh)
|
return (content, urlh)
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
""" Returns the data of the page as a string """
|
""" Returns the data of the page as a string """
|
||||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
||||||
|
if res is False:
|
||||||
|
return res
|
||||||
|
else:
|
||||||
|
content, _ = res
|
||||||
|
return content
|
||||||
|
|
||||||
|
def _download_xml(self, url_or_request, video_id,
|
||||||
|
note=u'Downloading XML', errnote=u'Unable to download XML'):
|
||||||
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
|
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||||
|
|
||||||
def to_screen(self, msg):
|
def to_screen(self, msg):
|
||||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||||
@ -210,12 +256,14 @@ class InfoExtractor(object):
|
|||||||
self.to_screen(u'Logging in')
|
self.to_screen(u'Logging in')
|
||||||
|
|
||||||
#Methods for following #608
|
#Methods for following #608
|
||||||
def url_result(self, url, ie=None):
|
def url_result(self, url, ie=None, video_id=None):
|
||||||
"""Returns a url that points to a page that should be processed"""
|
"""Returns a url that points to a page that should be processed"""
|
||||||
#TODO: ie should be the class used for getting the info
|
#TODO: ie should be the class used for getting the info
|
||||||
video_info = {'_type': 'url',
|
video_info = {'_type': 'url',
|
||||||
'url': url,
|
'url': url,
|
||||||
'ie_key': ie}
|
'ie_key': ie}
|
||||||
|
if video_id is not None:
|
||||||
|
video_info['id'] = video_id
|
||||||
return video_info
|
return video_info
|
||||||
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
||||||
"""Returns a playlist"""
|
"""Returns a playlist"""
|
||||||
@ -300,13 +348,21 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regex(prop):
|
def _og_regexes(prop):
|
||||||
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
|
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
||||||
|
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||||
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
|
return [
|
||||||
|
template % (property_re, content_re),
|
||||||
|
template % (content_re, property_re),
|
||||||
|
]
|
||||||
|
|
||||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||||
if name is None:
|
if name is None:
|
||||||
name = 'OpenGraph %s' % prop
|
name = 'OpenGraph %s' % prop
|
||||||
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
|
escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
|
||||||
|
if escaped is None:
|
||||||
|
return None
|
||||||
return unescapeHTML(escaped)
|
return unescapeHTML(escaped)
|
||||||
|
|
||||||
def _og_search_thumbnail(self, html, **kargs):
|
def _og_search_thumbnail(self, html, **kargs):
|
||||||
@ -318,10 +374,22 @@ class InfoExtractor(object):
|
|||||||
def _og_search_title(self, html, **kargs):
|
def _og_search_title(self, html, **kargs):
|
||||||
return self._og_search_property('title', html, **kargs)
|
return self._og_search_property('title', html, **kargs)
|
||||||
|
|
||||||
def _og_search_video_url(self, html, name='video url', **kargs):
|
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||||
return self._html_search_regex([self._og_regex('video:secure_url'),
|
regexes = self._og_regexes('video')
|
||||||
self._og_regex('video')],
|
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||||
html, name, **kargs)
|
return self._html_search_regex(regexes, html, name, **kargs)
|
||||||
|
|
||||||
|
def _html_search_meta(self, name, html, display_name=None):
|
||||||
|
if display_name is None:
|
||||||
|
display_name = name
|
||||||
|
return self._html_search_regex(
|
||||||
|
r'''(?ix)<meta
|
||||||
|
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
|
||||||
|
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||||
|
html, display_name, fatal=False)
|
||||||
|
|
||||||
|
def _dc_search_uploader(self, html):
|
||||||
|
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||||
|
|
||||||
def _rta_search(self, html):
|
def _rta_search(self, html):
|
||||||
# See http://www.rtalabel.org/index.php?content=howtofaq#single
|
# See http://www.rtalabel.org/index.php?content=howtofaq#single
|
||||||
@ -331,6 +399,23 @@ class InfoExtractor(object):
|
|||||||
return 18
|
return 18
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
def _media_rating_search(self, html):
|
||||||
|
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
|
||||||
|
rating = self._html_search_meta('rating', html)
|
||||||
|
|
||||||
|
if not rating:
|
||||||
|
return None
|
||||||
|
|
||||||
|
RATING_TABLE = {
|
||||||
|
'safe for kids': 0,
|
||||||
|
'general': 8,
|
||||||
|
'14 years': 14,
|
||||||
|
'mature': 17,
|
||||||
|
'restricted': 19,
|
||||||
|
}
|
||||||
|
return RATING_TABLE.get(rating.lower(), None)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@ -6,7 +6,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class CSpanIE(InfoExtractor):
|
class CSpanIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)'
|
_VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
|
u'url': u'http://www.c-spanvideo.org/program/HolderonV',
|
||||||
u'file': u'315139.flv',
|
u'file': u'315139.flv',
|
||||||
|
22
youtube_dl/extractor/d8.py
Normal file
22
youtube_dl/extractor/d8.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from .canalplus import CanalplusIE
|
||||||
|
|
||||||
|
|
||||||
|
class D8IE(CanalplusIE):
|
||||||
|
_VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
|
||||||
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
|
||||||
|
IE_NAME = u'd8.tv'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
|
||||||
|
u'file': u'966289.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Campagne intime - Documentaire exceptionnel',
|
||||||
|
u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
|
||||||
|
u'upload_date': u'20131108',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
str_to_int,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@ -21,6 +22,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
"""Build a request with the family filter disabled"""
|
"""Build a request with the family filter disabled"""
|
||||||
request = compat_urllib_request.Request(url)
|
request = compat_urllib_request.Request(url)
|
||||||
request.add_header('Cookie', 'family_filter=off')
|
request.add_header('Cookie', 'family_filter=off')
|
||||||
|
request.add_header('Cookie', 'ff=off')
|
||||||
return request
|
return request
|
||||||
|
|
||||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||||
@ -61,6 +63,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
},
|
},
|
||||||
u'skip': u'VEVO is only available in some countries',
|
u'skip': u'VEVO is only available in some countries',
|
||||||
},
|
},
|
||||||
|
# age-restricted video
|
||||||
|
{
|
||||||
|
u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
|
||||||
|
u'file': u'xyh2zz.mp4',
|
||||||
|
u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
||||||
|
u'uploader': 'HotWaves1012',
|
||||||
|
u'age_limit': 18,
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -90,7 +104,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
||||||
# Looking for official user
|
# Looking for official user
|
||||||
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
||||||
webpage, 'video uploader')
|
webpage, 'video uploader', fatal=False)
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
video_upload_date = None
|
video_upload_date = None
|
||||||
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
|
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
|
||||||
@ -127,22 +142,27 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
raise ExtractorError(u'Unable to extract video URL')
|
raise ExtractorError(u'Unable to extract video URL')
|
||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id)
|
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
self._list_available_subtitles(video_id)
|
self._list_available_subtitles(video_id, webpage)
|
||||||
return
|
return
|
||||||
|
|
||||||
return [{
|
view_count = str_to_int(self._search_regex(
|
||||||
|
r'video_views_value[^>]+>([\d\.,]+)<', webpage, u'view count'))
|
||||||
|
|
||||||
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'thumbnail': info['thumbnail_url']
|
'thumbnail': info['thumbnail_url'],
|
||||||
}]
|
'age_limit': age_limit,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
sub_list = self._download_webpage(
|
sub_list = self._download_webpage(
|
||||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
||||||
@ -171,7 +191,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
webpage = self._download_webpage(request,
|
webpage = self._download_webpage(request,
|
||||||
id, u'Downloading page %s' % pagenum)
|
id, u'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
|
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
|
||||||
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
|
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -29,17 +28,16 @@ class DaumIE(InfoExtractor):
|
|||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||||
webpage = self._download_webpage(canonical_url, video_id)
|
webpage = self._download_webpage(canonical_url, video_id)
|
||||||
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
full_id = self._search_regex(
|
||||||
|
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
||||||
webpage, u'full id')
|
webpage, u'full id')
|
||||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
info_xml = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
urls_xml = self._download_webpage(
|
urls = self._download_xml(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, u'Downloading video formats info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||||
formats = []
|
formats = []
|
||||||
@ -49,10 +47,9 @@ class DaumIE(InfoExtractor):
|
|||||||
'vid': full_id,
|
'vid': full_id,
|
||||||
'profile': profile,
|
'profile': profile,
|
||||||
})
|
})
|
||||||
url_xml = self._download_webpage(
|
url_doc = self._download_xml(
|
||||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||||
video_id, note=False)
|
video_id, note=False)
|
||||||
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
|
|
||||||
format_url = url_doc.find('result/url').text
|
format_url = url_doc.find('result/url').text
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
@ -60,7 +57,7 @@ class DaumIE(InfoExtractor):
|
|||||||
'format_id': profile,
|
'format_id': profile,
|
||||||
})
|
})
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info.find('TITLE').text,
|
'title': info.find('TITLE').text,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@ -69,6 +66,3 @@ class DaumIE(InfoExtractor):
|
|||||||
'duration': int(info.find('DURATION').text),
|
'duration': int(info.find('DURATION').text),
|
||||||
'upload_date': info.find('REGDTTM').text[:8],
|
'upload_date': info.find('REGDTTM').text[:8],
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
return info
|
|
||||||
|
@ -25,7 +25,7 @@ class DepositFilesIE(InfoExtractor):
|
|||||||
url = 'http://depositfiles.com/en/files/' + file_id
|
url = 'http://depositfiles.com/en/files/' + file_id
|
||||||
|
|
||||||
# Retrieve file webpage with 'Free download' button pressed
|
# Retrieve file webpage with 'Free download' button pressed
|
||||||
free_download_indication = { 'gateway_result' : '1' }
|
free_download_indication = {'gateway_result' : '1'}
|
||||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
|
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
|
||||||
try:
|
try:
|
||||||
self.report_download_webpage(file_id)
|
self.report_download_webpage(file_id)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -12,7 +11,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class DreiSatIE(InfoExtractor):
|
class DreiSatIE(InfoExtractor):
|
||||||
IE_NAME = '3sat'
|
IE_NAME = '3sat'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/index\.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||||
u'file': u'36983.webm',
|
u'file': u'36983.webm',
|
||||||
@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
|
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
|
||||||
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
@ -67,7 +65,7 @@ class DreiSatIE(InfoExtractor):
|
|||||||
return (qidx, prefer_http, format['video_bitrate'])
|
return (qidx, prefer_http, format['video_bitrate'])
|
||||||
formats.sort(key=_sortkey)
|
formats.sort(key=_sortkey)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
@ -78,8 +76,3 @@ class DreiSatIE(InfoExtractor):
|
|||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
config_xml = self._download_webpage(
|
config = self._download_xml(
|
||||||
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
|
||||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
video_url = config.find('file').text
|
video_url = config.find('file').text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import itertools
|
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
@ -11,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class EightTracksIE(InfoExtractor):
|
class EightTracksIE(InfoExtractor):
|
||||||
IE_NAME = '8tracks'
|
IE_NAME = '8tracks'
|
||||||
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"name": u"EightTracks",
|
u"name": u"EightTracks",
|
||||||
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||||
@ -101,7 +100,7 @@ class EightTracksIE(InfoExtractor):
|
|||||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||||
next_url = first_url
|
next_url = first_url
|
||||||
res = []
|
res = []
|
||||||
for i in itertools.count():
|
for i in range(track_count):
|
||||||
api_json = self._download_webpage(next_url, playlist_id,
|
api_json = self._download_webpage(next_url, playlist_id,
|
||||||
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
|
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
|
||||||
errnote=u'Failed to download song information')
|
errnote=u'Failed to download song information')
|
||||||
@ -116,7 +115,5 @@ class EightTracksIE(InfoExtractor):
|
|||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
}
|
}
|
||||||
res.append(info)
|
res.append(info)
|
||||||
if api_data['set']['at_last_track']:
|
|
||||||
break
|
|
||||||
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
|
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
|
||||||
return res
|
return res
|
||||||
|
37
youtube_dl/extractor/eitb.py
Normal file
37
youtube_dl/extractor/eitb.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .brightcove import BrightcoveIE
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class EitbIE(InfoExtractor):
|
||||||
|
IE_NAME = u'eitb.tv'
|
||||||
|
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'add_ie': ['Brightcove'],
|
||||||
|
u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||||
|
u'md5': u'edf4436247185adee3ea18ce64c47998',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'2743577154001',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'60 minutos (Lasa y Zabala, 30 años)',
|
||||||
|
# All videos from eitb has this description in the brightcove info
|
||||||
|
u'description': u'.',
|
||||||
|
u'uploader': u'Euskal Telebista',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
chapter_id = mobj.group('chapter_id')
|
||||||
|
webpage = self._download_webpage(url, chapter_id)
|
||||||
|
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||||
|
if bc_url is None:
|
||||||
|
raise ExtractorError(u'Could not extract the Brightcove url')
|
||||||
|
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||||
|
# it manually
|
||||||
|
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
||||||
|
return self.url_result(bc_url, BrightcoveIE.ie_key())
|
@ -11,11 +11,11 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class EscapistIE(InfoExtractor):
|
class EscapistIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||||
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
||||||
u'md5': u'c6793dbda81388f4264c1ba18684a74d',
|
u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||||
u"uploader": u"the-escapist-presents",
|
u"uploader": u"the-escapist-presents",
|
||||||
@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
showName = mobj.group('showname')
|
showName = mobj.group('showname')
|
||||||
videoId = mobj.group('episode')
|
videoId = mobj.group('episode')
|
||||||
|
|
||||||
self.report_extraction(videoId)
|
self.report_extraction(videoId)
|
||||||
webpage = self._download_webpage(url, videoId)
|
webpage = self._download_webpage(url, videoId)
|
||||||
|
|
||||||
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
videoDesc = self._html_search_regex(
|
||||||
|
r'<meta name="description" content="([^"]*)"',
|
||||||
webpage, u'description', fatal=False)
|
webpage, u'description', fatal=False)
|
||||||
|
|
||||||
playerUrl = self._og_search_video_url(webpage, name='player url')
|
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
||||||
|
|
||||||
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
title = self._html_search_regex(
|
||||||
webpage, u'player url').split(' : ')[-1]
|
r'<meta name="title" content="([^"]*)"',
|
||||||
|
webpage, u'title').split(' : ')[-1]
|
||||||
|
|
||||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
|
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
|
||||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
configUrl = compat_urllib_parse.unquote(configUrl)
|
||||||
|
|
||||||
configJSON = self._download_webpage(configUrl, videoId,
|
formats = []
|
||||||
u'Downloading configuration',
|
|
||||||
u'unable to download configuration')
|
|
||||||
|
|
||||||
# Technically, it's JavaScript, not JSON
|
def _add_format(name, cfgurl):
|
||||||
configJSON = configJSON.replace("'", '"')
|
configJSON = self._download_webpage(
|
||||||
|
cfgurl, videoId,
|
||||||
|
u'Downloading ' + name + ' configuration',
|
||||||
|
u'Unable to download ' + name + ' configuration')
|
||||||
|
|
||||||
|
# Technically, it's JavaScript, not JSON
|
||||||
|
configJSON = configJSON.replace("'", '"')
|
||||||
|
|
||||||
|
try:
|
||||||
|
config = json.loads(configJSON)
|
||||||
|
except (ValueError,) as err:
|
||||||
|
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
||||||
|
playlist = config['playlist']
|
||||||
|
formats.append({
|
||||||
|
'url': playlist[1]['url'],
|
||||||
|
'format_id': name,
|
||||||
|
})
|
||||||
|
|
||||||
|
_add_format(u'normal', configUrl)
|
||||||
|
hq_url = (configUrl +
|
||||||
|
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
||||||
try:
|
try:
|
||||||
config = json.loads(configJSON)
|
_add_format(u'hq', hq_url)
|
||||||
except (ValueError,) as err:
|
except ExtractorError:
|
||||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
pass # That's fine, we'll just use normal quality
|
||||||
|
|
||||||
playlist = config['playlist']
|
return {
|
||||||
videoUrl = playlist[1]['url']
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': videoId,
|
'id': videoId,
|
||||||
'url': videoUrl,
|
'formats': formats,
|
||||||
'uploader': showName,
|
'uploader': showName,
|
||||||
'upload_date': None,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': 'mp4',
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': videoDesc,
|
'description': videoDesc,
|
||||||
'player_url': playerUrl,
|
'player_url': playerUrl,
|
||||||
}
|
}
|
||||||
|
|
||||||
return [info]
|
|
||||||
|
@ -8,19 +8,20 @@ class ExfmIE(InfoExtractor):
|
|||||||
IE_NAME = u'exfm'
|
IE_NAME = u'exfm'
|
||||||
IE_DESC = u'ex.fm'
|
IE_DESC = u'ex.fm'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
u'url': u'http://ex.fm/song/1bgtzg',
|
u'url': u'http://ex.fm/song/eh359',
|
||||||
u'file': u'95223130.mp3',
|
u'file': u'44216187.mp3',
|
||||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
u'md5': u'e45513df5631e6d760970b14cc0c11e7',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"We Can't Stop - Miley Cyrus",
|
u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive",
|
||||||
u"uploader": u"Miley Cyrus",
|
u"uploader": u"deadjournalist",
|
||||||
u'upload_date': u'20130603',
|
u'upload_date': u'20120424',
|
||||||
u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
|
u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||||
},
|
},
|
||||||
u'note': u'Soundcloud song',
|
u'note': u'Soundcloud song',
|
||||||
|
u'skip': u'The site is down too often',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://ex.fm/song/wddt8',
|
u'url': u'http://ex.fm/song/wddt8',
|
||||||
@ -30,6 +31,7 @@ class ExfmIE(InfoExtractor):
|
|||||||
u'title': u'Safe and Sound',
|
u'title': u'Safe and Sound',
|
||||||
u'uploader': u'Capital Cities',
|
u'uploader': u'Capital Cities',
|
||||||
},
|
},
|
||||||
|
u'skip': u'The site is down too often',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
50
youtube_dl/extractor/extremetube.py
Normal file
50
youtube_dl/extractor/extremetube.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
class ExtremeTubeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||||
|
u'file': u'652431.mp4',
|
||||||
|
u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Music Video 14 british euro brit european cumshots swallow",
|
||||||
|
u"uploader": u"unknown",
|
||||||
|
u"age_limit": 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
url = 'http://www.' + mobj.group('url')
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
|
||||||
|
uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
|
||||||
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url'))
|
||||||
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
|
extension = os.path.splitext(path)[1][1:]
|
||||||
|
format = path.split('/')[5].split('_')[:2]
|
||||||
|
format = "-".join(format)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': extension,
|
||||||
|
'format': format,
|
||||||
|
'format_id': format,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@ -1,5 +1,4 @@
|
|||||||
import json
|
import json
|
||||||
import netrc
|
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
@ -19,7 +18,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
"""Information Extractor for Facebook"""
|
"""Information Extractor for Facebook"""
|
||||||
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
||||||
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
|
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||||
|
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
IE_NAME = u'facebook'
|
IE_NAME = u'facebook'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -36,50 +36,56 @@ class FacebookIE(InfoExtractor):
|
|||||||
"""Report attempt to log in."""
|
"""Report attempt to log in."""
|
||||||
self.to_screen(u'Logging in')
|
self.to_screen(u'Logging in')
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _login(self):
|
||||||
if self._downloader is None:
|
(useremail, password) = self._get_login_info()
|
||||||
return
|
|
||||||
|
|
||||||
useremail = None
|
|
||||||
password = None
|
|
||||||
downloader_params = self._downloader.params
|
|
||||||
|
|
||||||
# Attempt to use provided username and password or .netrc data
|
|
||||||
if downloader_params.get('username', None) is not None:
|
|
||||||
useremail = downloader_params['username']
|
|
||||||
password = downloader_params['password']
|
|
||||||
elif downloader_params.get('usenetrc', False):
|
|
||||||
try:
|
|
||||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
|
||||||
if info is not None:
|
|
||||||
useremail = info[0]
|
|
||||||
password = info[2]
|
|
||||||
else:
|
|
||||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
|
||||||
except (IOError, netrc.NetrcParseError) as err:
|
|
||||||
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
|
||||||
return
|
|
||||||
|
|
||||||
if useremail is None:
|
if useremail is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Log in
|
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||||
|
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||||
|
self.report_login()
|
||||||
|
login_page = self._download_webpage(login_page_req, None, note=False,
|
||||||
|
errnote=u'Unable to download login page')
|
||||||
|
lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
|
||||||
|
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
|
||||||
|
|
||||||
login_form = {
|
login_form = {
|
||||||
'email': useremail,
|
'email': useremail,
|
||||||
'pass': password,
|
'pass': password,
|
||||||
'login': 'Log+In'
|
'lsd': lsd,
|
||||||
|
'lgnrnd': lgnrnd,
|
||||||
|
'next': 'http://facebook.com/home.php',
|
||||||
|
'default_persistent': '0',
|
||||||
|
'legacy_return': '1',
|
||||||
|
'timezone': '-60',
|
||||||
|
'trynum': '1',
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
try:
|
try:
|
||||||
self.report_login()
|
|
||||||
login_results = compat_urllib_request.urlopen(request).read()
|
login_results = compat_urllib_request.urlopen(request).read()
|
||||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
check_form = {
|
||||||
|
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
|
||||||
|
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
|
||||||
|
'name_action_selected': 'dont_save',
|
||||||
|
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
|
||||||
|
}
|
||||||
|
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
||||||
|
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
check_response = compat_urllib_request.urlopen(check_req).read()
|
||||||
|
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||||
|
self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
@ -93,7 +99,13 @@ class FacebookIE(InfoExtractor):
|
|||||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||||
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||||
if not m:
|
if not m:
|
||||||
raise ExtractorError(u'Cannot parse data')
|
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||||
|
if m_msg is not None:
|
||||||
|
raise ExtractorError(
|
||||||
|
u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||||
|
expected=True)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(u'Cannot parse data')
|
||||||
data = dict(json.loads(m.group(1)))
|
data = dict(json.loads(m.group(1)))
|
||||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||||
params = json.loads(params_raw)
|
params = json.loads(params_raw)
|
||||||
|
@ -1,18 +1,15 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
clean_html,
|
|
||||||
get_element_by_attribute,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FazIE(InfoExtractor):
|
class FazIE(InfoExtractor):
|
||||||
IE_NAME = u'faz.net'
|
IE_NAME = u'faz.net'
|
||||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
|
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
||||||
@ -30,9 +27,8 @@ class FazIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
||||||
u'config xml url')
|
u'config xml url')
|
||||||
config_xml = self._download_webpage(config_xml_url, video_id,
|
config = self._download_xml(config_xml_url, video_id,
|
||||||
u'Downloading config xml')
|
u'Downloading config xml')
|
||||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
encodings = config.find('ENCODINGS')
|
encodings = config.find('ENCODINGS')
|
||||||
formats = []
|
formats = []
|
||||||
@ -47,14 +43,11 @@ class FazIE(InfoExtractor):
|
|||||||
'format_id': code.lower(),
|
'format_id': code.lower(),
|
||||||
})
|
})
|
||||||
|
|
||||||
descr_html = get_element_by_attribute('class', 'Content Copy', webpage)
|
descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': clean_html(descr_html),
|
'description': descr,
|
||||||
'thumbnail': config.find('STILL/STILL_BIG').text,
|
'thumbnail': config.find('STILL/STILL_BIG').text,
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
return info
|
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class FKTVIE(InfoExtractor):
|
class FKTVIE(InfoExtractor):
|
||||||
IE_NAME = u'fernsehkritik.tv'
|
IE_NAME = u'fernsehkritik.tv'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://fernsehkritik.tv/folge-1',
|
u'url': u'http://fernsehkritik.tv/folge-1',
|
||||||
@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor):
|
|||||||
for i, _ in enumerate(files, 1):
|
for i, _ in enumerate(files, 1):
|
||||||
video_id = '%04d%d' % (episode, i)
|
video_id = '%04d%d' % (episode, i)
|
||||||
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
||||||
video_title = 'Fernsehkritik %d.%d' % (episode, i)
|
|
||||||
videos.append({
|
videos.append({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@ -53,7 +52,7 @@ class FKTVIE(InfoExtractor):
|
|||||||
|
|
||||||
class FKTVPosteckeIE(InfoExtractor):
|
class FKTVPosteckeIE(InfoExtractor):
|
||||||
IE_NAME = u'fernsehkritik.tv:postecke'
|
IE_NAME = u'fernsehkritik.tv:postecke'
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
|
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
|
||||||
u'file': u'0120.flv',
|
u'file': u'0120.flv',
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -11,11 +10,10 @@ from ..utils import (
|
|||||||
|
|
||||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
xml_desc = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||||
'getInfosOeuvre.php?id-diffusion='
|
'getInfosOeuvre.php?id-diffusion='
|
||||||
+ video_id, video_id, 'Downloading XML config')
|
+ video_id, video_id, 'Downloading XML config')
|
||||||
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
|
|
||||||
|
|
||||||
manifest_url = info.find('videos/video/url').text
|
manifest_url = info.find('videos/video/url').text
|
||||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||||
@ -23,7 +21,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
thumbnail_path = info.find('image').text
|
thumbnail_path = info.find('image').text
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'ext': 'mp4',
|
'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': info.find('titre').text,
|
'title': info.find('titre').text,
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||||
@ -47,7 +45,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'francetvinfo.fr'
|
IE_NAME = u'francetvinfo.fr'
|
||||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
|
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||||
@ -68,35 +66,101 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
class France2IE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'france2.fr'
|
IE_NAME = u'francetv'
|
||||||
_VALID_URL = r'''(?x)https?://www\.france2\.fr/
|
IE_DESC = u'France 2, 3, 4, 5 and Ô'
|
||||||
|
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
||||||
(?:
|
(?:
|
||||||
emissions/.*?/videos/(?P<id>\d+)
|
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
||||||
| emission/(?P<key>[^/?]+)
|
| (emissions?|jt)/(?P<key>[^/?]+)
|
||||||
)'''
|
)'''
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
# france2
|
||||||
u'file': u'75540104.mp4',
|
{
|
||||||
u'info_dict': {
|
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||||
u'title': u'13h15, le samedi...',
|
u'file': u'75540104.mp4',
|
||||||
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
u'info_dict': {
|
||||||
|
u'title': u'13h15, le samedi...',
|
||||||
|
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# m3u8 download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
u'params': {
|
# france3
|
||||||
u'skip_download': True,
|
{
|
||||||
|
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'Le scandale du prix des médicaments',
|
||||||
|
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
# france4
|
||||||
|
{
|
||||||
|
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'Hero Corp Making of - Extrait 1',
|
||||||
|
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# france5
|
||||||
|
{
|
||||||
|
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'92837968',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'C à dire ?!',
|
||||||
|
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# m3u8 download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# franceo
|
||||||
|
{
|
||||||
|
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'92327925',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Infô-Afrique',
|
||||||
|
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# m3u8 download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
u'skip': u'The id changes frequently',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj.group('key'):
|
if mobj.group('key'):
|
||||||
webpage = self._download_webpage(url, mobj.group('key'))
|
webpage = self._download_webpage(url, mobj.group('key'))
|
||||||
video_id = self._html_search_regex(
|
id_res = [
|
||||||
r'''(?x)<div\s+class="video-player">\s*
|
(r'''(?x)<div\s+class="video-player">\s*
|
||||||
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
|
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
|
||||||
class="francetv-video-player">''',
|
class="francetv-video-player">'''),
|
||||||
webpage, u'video ID')
|
(r'<a id="player_direct" href="http://info\.francetelevisions'
|
||||||
|
'\.fr/\?id-video=([^"/&]+)'),
|
||||||
|
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||||
|
]
|
||||||
|
video_id = self._html_search_regex(id_res, webpage, u'video ID')
|
||||||
else:
|
else:
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
38
youtube_dl/extractor/gamekings.py
Normal file
38
youtube_dl/extractor/gamekings.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class GamekingsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
||||||
|
u'file': u'20130811.mp4',
|
||||||
|
# MD5 is flaky, seems to change regularly
|
||||||
|
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
|
||||||
|
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
name = mobj.group('name')
|
||||||
|
webpage = self._download_webpage(url, name)
|
||||||
|
video_url = self._og_search_video_url(webpage)
|
||||||
|
|
||||||
|
video = re.search(r'[0-9]+', video_url)
|
||||||
|
video_id = video.group(0)
|
||||||
|
|
||||||
|
# Todo: add medium format
|
||||||
|
video_url = video_url.replace(video_id, 'large/' + video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'url': video_url,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
@ -24,7 +24,7 @@ class GameSpotIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_id = video_id = mobj.group('page_id')
|
page_id = mobj.group('page_id')
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
|
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
|
||||||
data_video = json.loads(unescapeHTML(data_video_json))
|
data_video = json.loads(unescapeHTML(data_video_json))
|
||||||
@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor):
|
|||||||
'format_id': q,
|
'format_id': q,
|
||||||
})
|
})
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': data_video['guid'],
|
'id': data_video['guid'],
|
||||||
'title': compat_urllib_parse.unquote(data_video['title']),
|
'title': compat_urllib_parse.unquote(data_video['title']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': get_meta_content('description', webpage),
|
'description': get_meta_content('description', webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
return info
|
|
||||||
|
@ -1,13 +1,10 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .mtv import MTVIE, _media_xml_tag
|
from .mtv import MTVServicesInfoExtractor
|
||||||
|
|
||||||
class GametrailersIE(MTVIE):
|
|
||||||
"""
|
class GametrailersIE(MTVServicesInfoExtractor):
|
||||||
Gametrailers use the same videos system as MTVIE, it just changes the feed
|
_VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
||||||
url, where the uri is and the method to get the thumbnails.
|
|
||||||
"""
|
|
||||||
_VALID_URL = r'http://www.gametrailers.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer',
|
||||||
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
|
u'file': u'70e9a5d7-cf25-4a10-9104-6f3e7342ae0d.mp4',
|
||||||
@ -17,15 +14,9 @@ class GametrailersIE(MTVIE):
|
|||||||
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
|
u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
# Overwrite MTVIE properties we don't want
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
|
_FEED_URL = 'http://www.gametrailers.com/feeds/mrss'
|
||||||
|
|
||||||
def _get_thumbnail_url(self, uri, itemdoc):
|
|
||||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
|
||||||
return itemdoc.find(search_path).attrib['url']
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
@ -25,7 +25,7 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||||
u'file': u'13601338388002.mp4',
|
u'file': u'13601338388002.mp4',
|
||||||
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
|
u'md5': u'6e15c93721d7ec9e9ca3fdbf07982cfd',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"uploader": u"www.hodiho.fr",
|
u"uploader": u"www.hodiho.fr",
|
||||||
u"title": u"R\u00e9gis plante sa Jeep"
|
u"title": u"R\u00e9gis plante sa Jeep"
|
||||||
@ -33,6 +33,7 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
# embedded vimeo video
|
# embedded vimeo video
|
||||||
{
|
{
|
||||||
|
u'add_ie': ['Vimeo'],
|
||||||
u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
|
u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
|
||||||
u'file': u'22444065.mp4',
|
u'file': u'22444065.mp4',
|
||||||
u'md5': u'2903896e23df39722c33f015af0666e2',
|
u'md5': u'2903896e23df39722c33f015af0666e2',
|
||||||
@ -41,7 +42,35 @@ class GenericIE(InfoExtractor):
|
|||||||
u"uploader_id": u"skillsmatter",
|
u"uploader_id": u"skillsmatter",
|
||||||
u"uploader": u"Skills Matter",
|
u"uploader": u"Skills Matter",
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
# bandcamp page with custom domain
|
||||||
|
{
|
||||||
|
u'add_ie': ['Bandcamp'],
|
||||||
|
u'url': u'http://bronyrock.com/track/the-pony-mash',
|
||||||
|
u'file': u'3235767654.mp3',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'The Pony Mash',
|
||||||
|
u'uploader': u'M_Pallante',
|
||||||
|
},
|
||||||
|
u'skip': u'There is a limit of 200 free downloads / month for the test song',
|
||||||
|
},
|
||||||
|
# embedded brightcove video
|
||||||
|
# it also tests brightcove videos that need to set the 'Referer' in the
|
||||||
|
# http requests
|
||||||
|
{
|
||||||
|
u'add_ie': ['Brightcove'],
|
||||||
|
u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'2765128793001',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Le cours de bourse : l’analyse technique',
|
||||||
|
u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
|
||||||
|
u'uploader': u'BFM BUSINESS',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@ -133,11 +162,25 @@ class GenericIE(InfoExtractor):
|
|||||||
raise ExtractorError(u'Failed to download URL: %s' % url)
|
raise ExtractorError(u'Failed to download URL: %s' % url)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
|
# it's tempting to parse this further, but you would
|
||||||
|
# have to take into account all the variations like
|
||||||
|
# Video Title - Site Name
|
||||||
|
# Site Name | Video Title
|
||||||
|
# Video Title - Tagline | Site Name
|
||||||
|
# and so on and so forth; it's just not practical
|
||||||
|
video_title = self._html_search_regex(
|
||||||
|
r'(?s)<title>(.*?)</title>', webpage, u'video title',
|
||||||
|
default=u'video')
|
||||||
|
|
||||||
|
# video uploader is domain name
|
||||||
|
video_uploader = self._search_regex(
|
||||||
|
r'^(?:https?://)?([^/]*)/.*', url, u'video uploader')
|
||||||
|
|
||||||
# Look for BrightCove:
|
# Look for BrightCove:
|
||||||
m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
|
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||||
if m_brightcove is not None:
|
if bc_url is not None:
|
||||||
self.to_screen(u'Brightcove video detected.')
|
self.to_screen(u'Brightcove video detected.')
|
||||||
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
|
|
||||||
return self.url_result(bc_url, 'Brightcove')
|
return self.url_result(bc_url, 'Brightcove')
|
||||||
|
|
||||||
# Look for embedded Vimeo player
|
# Look for embedded Vimeo player
|
||||||
@ -149,11 +192,42 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(surl, 'Vimeo')
|
return self.url_result(surl, 'Vimeo')
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for embedded YouTube player
|
||||||
mobj = re.search(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src="(https?://(?:www\.)?youtube.com/embed/.+?)"', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
|
||||||
if mobj:
|
if matches:
|
||||||
surl = unescapeHTML(mobj.group(1))
|
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||||
return self.url_result(surl, 'Youtube')
|
for tuppl in matches]
|
||||||
|
return self.playlist_result(
|
||||||
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
|
# Look for embedded Dailymotion player
|
||||||
|
matches = re.findall(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||||
|
if matches:
|
||||||
|
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
|
||||||
|
for tuppl in matches]
|
||||||
|
return self.playlist_result(
|
||||||
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
|
# Look for embedded Wistia player
|
||||||
|
match = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||||
|
if match:
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': unescapeHTML(match.group('url')),
|
||||||
|
'ie_key': 'Wistia',
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'title': video_title,
|
||||||
|
'id': video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Look for Bandcamp pages with custom domain
|
||||||
|
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
burl = unescapeHTML(mobj.group(1))
|
||||||
|
# Don't set the extractor because it can be a track url or an album
|
||||||
|
return self.url_result(burl)
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
@ -162,7 +236,7 @@ class GenericIE(InfoExtractor):
|
|||||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Broaden the search a little bit: JWPlayer JS loader
|
# Broaden the search a little bit: JWPlayer JS loader
|
||||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
|
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Try to find twitter cards info
|
# Try to find twitter cards info
|
||||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||||
@ -189,27 +263,11 @@ class GenericIE(InfoExtractor):
|
|||||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||||
|
|
||||||
# here's a fun little line of code for you:
|
# here's a fun little line of code for you:
|
||||||
video_extension = os.path.splitext(video_id)[1][1:]
|
|
||||||
video_id = os.path.splitext(video_id)[0]
|
video_id = os.path.splitext(video_id)[0]
|
||||||
|
|
||||||
# it's tempting to parse this further, but you would
|
return {
|
||||||
# have to take into account all the variations like
|
|
||||||
# Video Title - Site Name
|
|
||||||
# Site Name | Video Title
|
|
||||||
# Video Title - Tagline | Site Name
|
|
||||||
# and so on and so forth; it's just not practical
|
|
||||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
|
||||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
|
||||||
|
|
||||||
# video uploader is domain name
|
|
||||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
|
||||||
url, u'video uploader')
|
|
||||||
|
|
||||||
return [{
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': None,
|
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension,
|
}
|
||||||
}]
|
|
||||||
|
@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
|
|||||||
u'file': u'1435540.mp3',
|
u'file': u'1435540.mp3',
|
||||||
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
|
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Freddie Gibbs - Lay It Down"
|
u"title": u'Freddie Gibbs "Lay It Down"'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||||
u'file': u'390161.mp4',
|
u'file': u'390161.mp4',
|
||||||
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
|
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
||||||
u"title": u"How to Tie a Square Knot Properly"
|
u"title": u"How to Tie a Square Knot Properly"
|
||||||
|
@ -30,7 +30,7 @@ class HypemIE(InfoExtractor):
|
|||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
track_id = mobj.group(1)
|
track_id = mobj.group(1)
|
||||||
|
|
||||||
data = { 'ax': 1, 'ts': time.time() }
|
data = {'ax': 1, 'ts': time.time()}
|
||||||
data_encoded = compat_urllib_parse.urlencode(data)
|
data_encoded = compat_urllib_parse.urlencode(data)
|
||||||
complete_url = url + "?" + data_encoded
|
complete_url = url + "?" + data_encoded
|
||||||
request = compat_urllib_request.Request(complete_url)
|
request = compat_urllib_request.Request(complete_url)
|
||||||
|
@ -103,7 +103,7 @@ class IGNIE(InfoExtractor):
|
|||||||
class OneUPIE(IGNIE):
|
class OneUPIE(IGNIE):
|
||||||
"""Extractor for 1up.com, it uses the ign videos system."""
|
"""Extractor for 1up.com, it uses the ign videos system."""
|
||||||
|
|
||||||
_VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||||
IE_NAME = '1up.com'
|
IE_NAME = '1up.com'
|
||||||
|
|
||||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||||
|
57
youtube_dl/extractor/imdb.py
Normal file
57
youtube_dl/extractor/imdb.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
get_element_by_attribute,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImdbIE(InfoExtractor):
|
||||||
|
IE_NAME = u'imdb'
|
||||||
|
IE_DESC = u'Internet Movie Database trailers'
|
||||||
|
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
|
u'md5': u'9f34fa777ade3a6e57a054fdbcb3a068',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'2524815897',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'Ice Age: Continental Drift Trailer (No. 2) - IMDb',
|
||||||
|
u'description': u'md5:9061c2219254e5d14e03c25c98e96a81',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url,video_id)
|
||||||
|
descr = get_element_by_attribute('itemprop', 'description', webpage)
|
||||||
|
available_formats = re.findall(
|
||||||
|
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
|
||||||
|
flags=re.MULTILINE)
|
||||||
|
formats = []
|
||||||
|
for f_id, f_path in available_formats:
|
||||||
|
f_path = f_path.strip()
|
||||||
|
format_page = self._download_webpage(
|
||||||
|
compat_urlparse.urljoin(url, f_path),
|
||||||
|
u'Downloading info for %s format' % f_id)
|
||||||
|
json_data = self._search_regex(
|
||||||
|
r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>',
|
||||||
|
format_page, u'json data', flags=re.DOTALL)
|
||||||
|
info = json.loads(json_data)
|
||||||
|
format_info = info['videoPlayerObject']['video']
|
||||||
|
formats.append({
|
||||||
|
'format_id': f_id,
|
||||||
|
'url': format_info['url'],
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'description': descr,
|
||||||
|
'thumbnail': format_info['slate'],
|
||||||
|
}
|
@ -3,7 +3,7 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
class InstagramIE(InfoExtractor):
|
class InstagramIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?instagram.com/p/(.*?)/'
|
_VALID_URL = r'(?:http://)?instagram\.com/p/(.*?)/'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
u'url': u'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||||
u'file': u'aye83DjauH.mp4',
|
u'file': u'aye83DjauH.mp4',
|
||||||
@ -26,7 +26,7 @@ class InstagramIE(InfoExtractor):
|
|||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': self._og_search_video_url(webpage),
|
'url': self._og_search_video_url(webpage, secure=False),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': u'Video by %s' % uploader_id,
|
'title': u'Video by %s' % uploader_id,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
video_id = query_dic['publishedid'][0]
|
video_id = query_dic['publishedid'][0]
|
||||||
url = self._build_url(query)
|
url = self._build_url(query)
|
||||||
|
|
||||||
flashconfiguration_xml = self._download_webpage(url, video_id,
|
flashconfiguration = self._download_xml(url, video_id,
|
||||||
u'Downloading flash configuration')
|
u'Downloading flash configuration')
|
||||||
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
|
|
||||||
file_url = flashconfiguration.find('file').text
|
file_url = flashconfiguration.find('file').text
|
||||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||||
# Replace some of the parameters in the query to get the best quality
|
# Replace some of the parameters in the query to get the best quality
|
||||||
@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
|
|||||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||||
lambda m: self._clean_query(m.group()),
|
lambda m: self._clean_query(m.group()),
|
||||||
file_url)
|
file_url)
|
||||||
info_xml = self._download_webpage(file_url, video_id,
|
info = self._download_xml(file_url, video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
item = info.find('channel/item')
|
item = info.find('channel/item')
|
||||||
|
|
||||||
def _bp(p):
|
def _bp(p):
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
@ -22,7 +21,7 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
title = re.match(self._VALID_URL, url).group(1)
|
title = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
xml_link = self._html_search_regex(
|
xml_link = self._html_search_regex(
|
||||||
r'<param name="flashvars" value="config=(.*?)" />',
|
r'<param name="flashvars" value="config=(.*?)" />',
|
||||||
@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
|
|||||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||||
xml_link, u'video ID')
|
xml_link, u'video ID')
|
||||||
|
|
||||||
xml_config = self._download_webpage(
|
config = self._download_xml(
|
||||||
xml_link, title, u'Downloading XML config')
|
xml_link, title, u'Downloading XML config')
|
||||||
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
info_json = config.find('format.json').text
|
||||||
info_json = self._search_regex(
|
|
||||||
r'(?sm)<format\.json>(.*?)</format\.json>',
|
|
||||||
xml_config, u'JSON information')
|
|
||||||
info = json.loads(info_json)['versions'][0]
|
info = json.loads(info_json)['versions'][0]
|
||||||
|
|
||||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||||
|
@ -8,7 +8,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class JukeboxIE(InfoExtractor):
|
class JukeboxIE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html'
|
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||||
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
||||||
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
||||||
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
|
|||||||
archive_id = m.group(1)
|
archive_id = m.group(1)
|
||||||
|
|
||||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||||
chapter_info_xml = self._download_webpage(api, chapter_id,
|
doc = self._download_xml(api, chapter_id,
|
||||||
note=u'Downloading chapter information',
|
note=u'Downloading chapter information',
|
||||||
errnote=u'Chapter information download failed')
|
errnote=u'Chapter information download failed')
|
||||||
doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
|
|
||||||
for a in doc.findall('.//archive'):
|
for a in doc.findall('.//archive'):
|
||||||
if archive_id == a.find('./id').text:
|
if archive_id == a.find('./id').text:
|
||||||
break
|
break
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
import re
|
import re
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
class KankanIE(InfoExtractor):
|
class KankanIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||||
@ -30,7 +32,10 @@ class KankanIE(InfoExtractor):
|
|||||||
video_id, u'Downloading video url info')
|
video_id, u'Downloading video url info')
|
||||||
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
|
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
|
||||||
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
|
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
|
||||||
video_url = 'http://%s%s' % (ip, path)
|
param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
|
||||||
|
param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
|
||||||
|
key = _md5('xl_mp43651' + param1 + param2)
|
||||||
|
video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
61
youtube_dl/extractor/keezmovies.py
Normal file
61
youtube_dl/extractor/keezmovies.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
from ..aes import (
|
||||||
|
aes_decrypt_text
|
||||||
|
)
|
||||||
|
|
||||||
|
class KeezMoviesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
||||||
|
u'file': u'1214711.mp4',
|
||||||
|
u'md5': u'6e297b7e789329923fcf83abb67c9289',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Petite Asian Lady Mai Playing In Bathtub",
|
||||||
|
u"age_limit": 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
url = 'http://www.' + mobj.group('url')
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
# embedded video
|
||||||
|
mobj = re.search(r'href="([^"]+)"></iframe>', webpage)
|
||||||
|
if mobj:
|
||||||
|
embedded_url = mobj.group(1)
|
||||||
|
return self.url_result(embedded_url)
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
|
||||||
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url'))
|
||||||
|
if webpage.find('encrypted=true')!=-1:
|
||||||
|
password = self._html_search_regex(r'video_title=(.+?)&', webpage, u'password')
|
||||||
|
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
|
||||||
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
|
extension = os.path.splitext(path)[1][1:]
|
||||||
|
format = path.split('/')[4].split('_')[:2]
|
||||||
|
format = "-".join(format)
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': extension,
|
||||||
|
'format': format,
|
||||||
|
'format_id': format,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@ -8,7 +8,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class LiveLeakIE(InfoExtractor):
|
class LiveLeakIE(InfoExtractor):
|
||||||
|
|
||||||
_VALID_URL = r'^(?:http?://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||||
IE_NAME = u'liveleak'
|
IE_NAME = u'liveleak'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.liveleak.com/view?i=757_1364311680',
|
u'url': u'http://www.liveleak.com/view?i=757_1364311680',
|
||||||
|
@ -5,13 +5,13 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
get_meta_content,
|
xpath_with_ns,
|
||||||
ExtractorError,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LivestreamIE(InfoExtractor):
|
class LivestreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
IE_NAME = u'livestream'
|
||||||
|
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||||
u'file': u'4719370.mp4',
|
u'file': u'4719370.mp4',
|
||||||
@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor):
|
|||||||
|
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
# This is an event page:
|
# This is an event page:
|
||||||
player = get_meta_content('twitter:player', webpage)
|
config_json = self._search_regex(r'window.config = ({.*?});',
|
||||||
if player is None:
|
webpage, u'window config')
|
||||||
raise ExtractorError('Couldn\'t extract event api url')
|
info = json.loads(config_json)['event']
|
||||||
api_url = player.replace('/player', '')
|
|
||||||
api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
|
|
||||||
info = json.loads(self._download_webpage(api_url, event_name,
|
|
||||||
u'Downloading event info'))
|
|
||||||
videos = [self._extract_video_info(video_data['data'])
|
videos = [self._extract_video_info(video_data['data'])
|
||||||
for video_data in info['feed']['data'] if video_data['type'] == u'video']
|
for video_data in info['feed']['data'] if video_data['type'] == u'video']
|
||||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||||
@ -58,3 +54,43 @@ class LivestreamIE(InfoExtractor):
|
|||||||
info = json.loads(self._download_webpage(api_url, video_id,
|
info = json.loads(self._download_webpage(api_url, video_id,
|
||||||
u'Downloading video info'))
|
u'Downloading video info'))
|
||||||
return self._extract_video_info(info)
|
return self._extract_video_info(info)
|
||||||
|
|
||||||
|
|
||||||
|
# The original version of Livestream uses a different system
|
||||||
|
class LivestreamOriginalIE(InfoExtractor):
|
||||||
|
IE_NAME = u'livestream:original'
|
||||||
|
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
user = mobj.group('user')
|
||||||
|
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||||
|
|
||||||
|
info = self._download_xml(api_url, video_id)
|
||||||
|
item = info.find('channel').find('item')
|
||||||
|
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||||
|
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||||
|
# Remove the extension and number from the path (like 1.jpg)
|
||||||
|
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': item.find('title').text,
|
||||||
|
'url': 'rtmp://extondemand.livestream.com/ondemand',
|
||||||
|
'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
|
||||||
|
'ext': 'flv',
|
||||||
|
'thumbnail': thumbnail_url,
|
||||||
|
}
|
||||||
|
@ -1,14 +1,10 @@
|
|||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_str,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@ -20,10 +16,12 @@ class MetacafeIE(InfoExtractor):
|
|||||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||||
IE_NAME = u'metacafe'
|
IE_NAME = u'metacafe'
|
||||||
_TESTS = [{
|
_TESTS = [
|
||||||
|
# Youtube video
|
||||||
|
{
|
||||||
u"add_ie": ["Youtube"],
|
u"add_ie": ["Youtube"],
|
||||||
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
||||||
u"file": u"_aUehQsCQtM.flv",
|
u"file": u"_aUehQsCQtM.mp4",
|
||||||
u"info_dict": {
|
u"info_dict": {
|
||||||
u"upload_date": u"20090102",
|
u"upload_date": u"20090102",
|
||||||
u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
|
u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
|
||||||
@ -32,15 +30,57 @@ class MetacafeIE(InfoExtractor):
|
|||||||
u"uploader_id": u"PBS"
|
u"uploader_id": u"PBS"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
# Normal metacafe video
|
||||||
|
{
|
||||||
|
u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
|
||||||
|
u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'11121940',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
|
||||||
|
u'uploader': u'ign',
|
||||||
|
u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# AnyClip video
|
||||||
{
|
{
|
||||||
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
||||||
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
||||||
u"info_dict": {
|
u"info_dict": {
|
||||||
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
||||||
u"uploader": u"anyclip",
|
u"uploader": u"anyclip",
|
||||||
u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
|
u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
|
||||||
}
|
},
|
||||||
}]
|
},
|
||||||
|
# age-restricted video
|
||||||
|
{
|
||||||
|
u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
|
||||||
|
u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'5186653',
|
||||||
|
u'ext': u'mp4',
|
||||||
|
u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
|
||||||
|
u'uploader': u'Dwayne Pipe',
|
||||||
|
u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
|
||||||
|
u'age_limit': 18,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# cbs video
|
||||||
|
{
|
||||||
|
u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'0rOxMBabDXN6',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
|
||||||
|
u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
|
||||||
|
u'duration': 129,
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def report_disclaimer(self):
|
def report_disclaimer(self):
|
||||||
@ -49,12 +89,8 @@ class MetacafeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
# Retrieve disclaimer
|
# Retrieve disclaimer
|
||||||
request = compat_urllib_request.Request(self._DISCLAIMER)
|
self.report_disclaimer()
|
||||||
try:
|
self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
|
||||||
self.report_disclaimer()
|
|
||||||
compat_urllib_request.urlopen(request).read()
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
|
|
||||||
|
|
||||||
# Confirm age
|
# Confirm age
|
||||||
disclaimer_form = {
|
disclaimer_form = {
|
||||||
@ -62,11 +98,9 @@ class MetacafeIE(InfoExtractor):
|
|||||||
'submit': "Continue - I'm over 18",
|
'submit': "Continue - I'm over 18",
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||||
try:
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
self.report_age_confirmation()
|
self.report_age_confirmation()
|
||||||
compat_urllib_request.urlopen(request).read()
|
self._download_webpage(request, None, False, u'Unable to confirm age')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract id and simplified title from URL
|
# Extract id and simplified title from URL
|
||||||
@ -76,14 +110,25 @@ class MetacafeIE(InfoExtractor):
|
|||||||
|
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
# Check if video comes from YouTube
|
# the video may come from an external site
|
||||||
mobj2 = re.match(r'^yt-(.*)$', video_id)
|
m_external = re.match('^(\w{2})-(.*)$', video_id)
|
||||||
if mobj2 is not None:
|
if m_external is not None:
|
||||||
return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
|
prefix, ext_id = m_external.groups()
|
||||||
|
# Check if video comes from YouTube
|
||||||
|
if prefix == 'yt':
|
||||||
|
return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
|
||||||
|
# CBS videos use theplatform.com
|
||||||
|
if prefix == 'cb':
|
||||||
|
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||||
req.headers['Cookie'] = 'flashVersion=0;'
|
|
||||||
|
# AnyClip videos require the flashversion cookie so that we get the link
|
||||||
|
# to the mp4 file
|
||||||
|
mobj_an = re.match(r'^an-(.*?)$', video_id)
|
||||||
|
if mobj_an:
|
||||||
|
req.headers['Cookie'] = 'flashVersion=0;'
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
# Extract URL, uploader and title from webpage
|
# Extract URL, uploader and title from webpage
|
||||||
@ -125,6 +170,11 @@ class MetacafeIE(InfoExtractor):
|
|||||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||||
webpage, u'uploader nickname', fatal=False)
|
webpage, u'uploader nickname', fatal=False)
|
||||||
|
|
||||||
|
if re.search(r'"contentRating":"restricted"', webpage) is not None:
|
||||||
|
age_limit = 18
|
||||||
|
else:
|
||||||
|
age_limit = 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -134,4 +184,5 @@ class MetacafeIE(InfoExtractor):
|
|||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_ext,
|
'ext': video_ext,
|
||||||
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor):
|
|||||||
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
||||||
webpage, u'description', flags=re.DOTALL)
|
webpage, u'description', flags=re.DOTALL)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': clip.find('title').text,
|
'title': clip.find('title').text,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': int(clip.find('duration').text),
|
'duration': int(clip.find('duration').text),
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
return info
|
|
||||||
|
@ -1,13 +1,10 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_request,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -31,9 +28,11 @@ class MixcloudIE(InfoExtractor):
|
|||||||
"""Returns 1st active url from list"""
|
"""Returns 1st active url from list"""
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
try:
|
try:
|
||||||
compat_urllib_request.urlopen(url)
|
# We only want to know if the request succeed
|
||||||
|
# don't download the whole file
|
||||||
|
self._request_webpage(url, None, False)
|
||||||
return url
|
return url
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
|
except ExtractorError:
|
||||||
url = None
|
url = None
|
||||||
|
|
||||||
return None
|
return None
|
||||||
@ -60,7 +59,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'title': info['name'],
|
'title': info['name'],
|
||||||
'url': final_song_url,
|
'url': final_song_url,
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'description': info['description'],
|
'description': info.get('description'),
|
||||||
'thumbnail': info['pictures'].get('extra_large'),
|
'thumbnail': info['pictures'].get('extra_large'),
|
||||||
'uploader': info['user']['name'],
|
'uploader': info['user']['name'],
|
||||||
'uploader_id': info['user']['username'],
|
'uploader_id': info['user']['username'],
|
||||||
|
49
youtube_dl/extractor/mofosex.py
Normal file
49
youtube_dl/extractor/mofosex.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
class MofosexIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||||
|
u'file': u'5018.mp4',
|
||||||
|
u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Japanese Teen Music Video",
|
||||||
|
u"age_limit": 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
url = 'http://www.' + mobj.group('url')
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
|
||||||
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
|
||||||
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
|
extension = os.path.splitext(path)[1][1:]
|
||||||
|
format = path.split('/')[5].split('_')[:2]
|
||||||
|
format = "-".join(format)
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': extension,
|
||||||
|
'format': format,
|
||||||
|
'format_id': format,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@ -10,34 +10,8 @@ from ..utils import (
|
|||||||
def _media_xml_tag(tag):
|
def _media_xml_tag(tag):
|
||||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||||
|
|
||||||
class MTVIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
|
|
||||||
|
|
||||||
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
|
|
||||||
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
|
||||||
u'file': u'853555.mp4',
|
|
||||||
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
|
|
||||||
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
|
||||||
u'file': u'USCJY1331283.mp4',
|
|
||||||
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u'Everything Has Changed',
|
|
||||||
u'upload_date': u'20130606',
|
|
||||||
u'uploader': u'Taylor Swift',
|
|
||||||
},
|
|
||||||
u'skip': u'VEVO is only available in some countries',
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
|
class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _id_from_uri(uri):
|
def _id_from_uri(uri):
|
||||||
return uri.split(':')[-1]
|
return uri.split(':')[-1]
|
||||||
@ -47,18 +21,22 @@ class MTVIE(InfoExtractor):
|
|||||||
def _transform_rtmp_url(rtmp_video_url):
|
def _transform_rtmp_url(rtmp_video_url):
|
||||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||||
if not m:
|
if not m:
|
||||||
raise ExtractorError(u'Cannot transform RTMP url')
|
return rtmp_video_url
|
||||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||||
return base + m.group('finalid')
|
return base + m.group('finalid')
|
||||||
|
|
||||||
def _get_thumbnail_url(self, uri, itemdoc):
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
return 'http://mtv.mtvnimages.com/uri/' + uri
|
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||||
|
thumb_node = itemdoc.find(search_path)
|
||||||
|
if thumb_node is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return thumb_node.attrib['url']
|
||||||
|
|
||||||
def _extract_video_formats(self, metadataXml):
|
def _extract_video_formats(self, metadataXml):
|
||||||
if '/error_country_block.swf' in metadataXml:
|
if '/error_country_block.swf' in metadataXml:
|
||||||
raise ExtractorError(u'This video is not available from your country.', expected=True)
|
raise ExtractorError(u'This video is not available from your country.', expected=True)
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
|
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
|
||||||
renditions = mdoc.findall('.//rendition')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for rendition in mdoc.findall('.//rendition'):
|
for rendition in mdoc.findall('.//rendition'):
|
||||||
@ -80,6 +58,8 @@ class MTVIE(InfoExtractor):
|
|||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
|
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
|
||||||
|
# Remove the templates, like &device={device}
|
||||||
|
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
|
||||||
if 'acceptMethods' not in mediagen_url:
|
if 'acceptMethods' not in mediagen_url:
|
||||||
mediagen_url += '&acceptMethods=fms'
|
mediagen_url += '&acceptMethods=fms'
|
||||||
mediagen_page = self._download_webpage(mediagen_url, video_id,
|
mediagen_page = self._download_webpage(mediagen_url, video_id,
|
||||||
@ -91,7 +71,7 @@ class MTVIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
description = None
|
description = None
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'title': itemdoc.find('title').text,
|
'title': itemdoc.find('title').text,
|
||||||
'formats': self._extract_video_formats(mediagen_page),
|
'formats': self._extract_video_formats(mediagen_page),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -99,19 +79,46 @@ class MTVIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(info['formats'][-1])
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|
||||||
def _get_videos_info(self, uri):
|
def _get_videos_info(self, uri):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||||
infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
|
idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
|
||||||
u'Downloading info')
|
u'Downloading info')
|
||||||
idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
|
|
||||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||||
|
|
||||||
|
|
||||||
|
class MTVIE(MTVServicesInfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$'
|
||||||
|
|
||||||
|
_FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
||||||
|
u'file': u'853555.mp4',
|
||||||
|
u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||||
|
u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'add_ie': ['Vevo'],
|
||||||
|
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
||||||
|
u'file': u'USCJY1331283.mp4',
|
||||||
|
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Everything Has Changed',
|
||||||
|
u'upload_date': u'20130606',
|
||||||
|
u'uploader': u'Taylor Swift',
|
||||||
|
},
|
||||||
|
u'skip': u'VEVO is only available in some countries',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
|
return 'http://mtv.mtvnimages.com/uri/' + uri
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MuzuTVIE(InfoExtractor):
|
class MuzuTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||||
IE_NAME = u'muzu.tv'
|
IE_NAME = u'muzu.tv'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
48
youtube_dl/extractor/myspace.py
Normal file
48
youtube_dl/extractor/myspace.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MySpaceIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'100008689',
|
||||||
|
u'ext': u'flv',
|
||||||
|
u'title': u'Viva La Vida',
|
||||||
|
u'description': u'The official Viva La Vida video, directed by Hype Williams',
|
||||||
|
u'uploader': u'Coldplay',
|
||||||
|
u'uploader_id': u'coldplay',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
# rtmp download
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
|
||||||
|
u'context'))
|
||||||
|
video = context['video']
|
||||||
|
rtmp_url, play_path = video['streamUrl'].split(';', 1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': compat_str(video['mediaId']),
|
||||||
|
'title': video['title'],
|
||||||
|
'url': rtmp_url,
|
||||||
|
'play_path': play_path,
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': video['description'],
|
||||||
|
'thumbnail': video['imageUrl'],
|
||||||
|
'uploader': video['artistName'],
|
||||||
|
'uploader_id': video['artistUsername'],
|
||||||
|
}
|
@ -1,5 +1,4 @@
|
|||||||
import os.path
|
import os.path
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -10,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MySpassIE(InfoExtractor):
|
class MySpassIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www.myspass.de/.*'
|
_VALID_URL = r'http://www\.myspass\.de/.*'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
u'url': u'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||||
u'file': u'11741.mp4',
|
u'file': u'11741.mp4',
|
||||||
@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
|
|||||||
|
|
||||||
# get metadata
|
# get metadata
|
||||||
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
metadata_url = META_DATA_URL_TEMPLATE % video_id
|
||||||
metadata_text = self._download_webpage(metadata_url, video_id)
|
metadata = self._download_xml(metadata_url, video_id)
|
||||||
metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
|
|
||||||
|
|
||||||
# extract values from metadata
|
# extract values from metadata
|
||||||
url_flv_el = metadata.find('url_flv')
|
url_flv_el = metadata.find('url_flv')
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
|
|||||||
'protocol': 'p2p',
|
'protocol': 'p2p',
|
||||||
'inKey': key,
|
'inKey': key,
|
||||||
})
|
})
|
||||||
info_xml = self._download_webpage(
|
info = self._download_xml(
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||||
video_id, u'Downloading video info')
|
video_id, u'Downloading video info')
|
||||||
urls_xml = self._download_webpage(
|
urls = self._download_xml(
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||||
video_id, u'Downloading video formats info')
|
video_id, u'Downloading video formats info')
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
|
||||||
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||||
@ -59,7 +56,7 @@ class NaverIE(InfoExtractor):
|
|||||||
'height': int(format_el.find('height').text),
|
'height': int(format_el.find('height').text),
|
||||||
})
|
})
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info.find('Subject').text,
|
'title': info.find('Subject').text,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@ -68,6 +65,3 @@ class NaverIE(InfoExtractor):
|
|||||||
'upload_date': info.find('WriteDate').text.replace('.', ''),
|
'upload_date': info.find('WriteDate').text.replace('.', ''),
|
||||||
'view_count': int(info.find('PlayCount').text),
|
'view_count': int(info.find('PlayCount').text),
|
||||||
}
|
}
|
||||||
# TODO: Remove when #980 has been merged
|
|
||||||
info.update(formats[-1])
|
|
||||||
return info
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import find_xpath_attr, compat_str
|
from ..utils import find_xpath_attr, compat_str
|
||||||
@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
|
info = all_info.find('video')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
'title': info.find('headline').text,
|
'title': info.find('headline').text,
|
||||||
|
66
youtube_dl/extractor/ndtv.py
Normal file
66
youtube_dl/extractor/ndtv.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import month_by_name
|
||||||
|
|
||||||
|
|
||||||
|
class NDTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710",
|
||||||
|
u"file": u"300710.mp4",
|
||||||
|
u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88",
|
||||||
|
u"info_dict": {
|
||||||
|
u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
|
||||||
|
u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.",
|
||||||
|
u"upload_date": u"20131208",
|
||||||
|
u"duration": 1327,
|
||||||
|
u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
filename = self._search_regex(
|
||||||
|
r"__filename='([^']+)'", webpage, u'video filename')
|
||||||
|
video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
|
||||||
|
filename)
|
||||||
|
|
||||||
|
duration_str = filename = self._search_regex(
|
||||||
|
r"__duration='([^']+)'", webpage, u'duration', fatal=False)
|
||||||
|
duration = None if duration_str is None else int(duration_str)
|
||||||
|
|
||||||
|
date_m = re.search(r'''(?x)
|
||||||
|
<p\s+class="vod_dateline">\s*
|
||||||
|
Published\s+On:\s*
|
||||||
|
(?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
|
||||||
|
''', webpage)
|
||||||
|
upload_date = None
|
||||||
|
assert date_m
|
||||||
|
if date_m is not None:
|
||||||
|
month = month_by_name(date_m.group('monthname'))
|
||||||
|
if month is not None:
|
||||||
|
upload_date = '%s%02d%02d' % (
|
||||||
|
date_m.group('year'), month, int(date_m.group('day')))
|
||||||
|
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
READ_MORE = u' (Read more)'
|
||||||
|
if description.endswith(READ_MORE):
|
||||||
|
description = description[:-len(READ_MORE)]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'duration': duration,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
@ -1,6 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
||||||
})
|
})
|
||||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
||||||
path_response = self._download_webpage(path_url, video_id,
|
path_doc = self._download_xml(path_url, video_id,
|
||||||
u'Downloading final video url')
|
u'Downloading final video url')
|
||||||
path_doc = xml.etree.ElementTree.fromstring(path_response)
|
|
||||||
video_url = path_doc.find('path').text
|
video_url = path_doc.find('path').text
|
||||||
|
|
||||||
join = compat_urlparse.urljoin
|
join = compat_urlparse.urljoin
|
||||||
@ -72,7 +70,7 @@ class NHLIE(NHLBaseInfoExtractor):
|
|||||||
|
|
||||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||||
IE_NAME = u'nhl.com:videocenter'
|
IE_NAME = u'nhl.com:videocenter'
|
||||||
IE_DESC = u'Download the first 12 videos from a videocenter category'
|
IE_DESC = u'NHL videocenter category'
|
||||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
|
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
127
youtube_dl/extractor/niconico.py
Normal file
127
youtube_dl/extractor/niconico.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_http_client,
|
||||||
|
compat_urllib_error,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
compat_str,
|
||||||
|
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoIE(InfoExtractor):
|
||||||
|
IE_NAME = u'niconico'
|
||||||
|
IE_DESC = u'ニコニコ動画'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
|
u'file': u'sm22312215.mp4',
|
||||||
|
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Big Buck Bunny',
|
||||||
|
u'uploader': u'takuya0301',
|
||||||
|
u'uploader_id': u'2698420',
|
||||||
|
u'upload_date': u'20131123',
|
||||||
|
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'username': u'ydl.niconico@gmail.com',
|
||||||
|
u'password': u'youtube-dl',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
|
||||||
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
# If True it will raise an error if no login info is provided
|
||||||
|
_LOGIN_REQUIRED = True
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
# No authentication to be performed
|
||||||
|
if username is None:
|
||||||
|
if self._LOGIN_REQUIRED:
|
||||||
|
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Log in
|
||||||
|
login_form_strs = {
|
||||||
|
u'mail': username,
|
||||||
|
u'password': password,
|
||||||
|
}
|
||||||
|
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||||
|
# chokes on unicode
|
||||||
|
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||||
|
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
u'https://secure.nicovideo.jp/secure/login', login_data)
|
||||||
|
login_results = self._download_webpage(
|
||||||
|
request, u'', note=u'Logging in', errnote=u'Unable to log in')
|
||||||
|
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||||
|
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
# Get video webpage. We are not actually interested in it, but need
|
||||||
|
# the cookies in order to be able to download the info webpage
|
||||||
|
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
|
|
||||||
|
video_info = self._download_xml(
|
||||||
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||||
|
note=u'Downloading video info page')
|
||||||
|
|
||||||
|
# Get flv info
|
||||||
|
flv_info_webpage = self._download_webpage(
|
||||||
|
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||||
|
video_id, u'Downloading flv info')
|
||||||
|
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||||
|
|
||||||
|
# Start extracting information
|
||||||
|
video_title = video_info.find('.//title').text
|
||||||
|
video_extension = video_info.find('.//movie_type').text
|
||||||
|
video_format = video_extension.upper()
|
||||||
|
video_thumbnail = video_info.find('.//thumbnail_url').text
|
||||||
|
video_description = video_info.find('.//description').text
|
||||||
|
video_uploader_id = video_info.find('.//user_id').text
|
||||||
|
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
|
||||||
|
video_view_count = video_info.find('.//view_counter').text
|
||||||
|
video_webpage_url = video_info.find('.//watch_url').text
|
||||||
|
|
||||||
|
# uploader
|
||||||
|
video_uploader = video_uploader_id
|
||||||
|
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||||
|
try:
|
||||||
|
user_info = self._download_xml(
|
||||||
|
url, video_id, note=u'Downloading user information')
|
||||||
|
video_uploader = user_info.find('.//nickname').text
|
||||||
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_real_url,
|
||||||
|
'title': video_title,
|
||||||
|
'ext': video_extension,
|
||||||
|
'format': video_format,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
'description': video_description,
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'upload_date': video_upload_date,
|
||||||
|
'uploader_id': video_uploader_id,
|
||||||
|
'view_count': video_view_count,
|
||||||
|
'webpage_url': video_webpage_url,
|
||||||
|
}
|
43
youtube_dl/extractor/ninegag.py
Normal file
43
youtube_dl/extractor/ninegag.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class NineGagIE(InfoExtractor):
|
||||||
|
IE_NAME = '9gag'
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://9gag.tv/v/1912",
|
||||||
|
u"file": u"1912.mp4",
|
||||||
|
u"info_dict": {
|
||||||
|
u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||||
|
u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
|
||||||
|
},
|
||||||
|
u'add_ie': [u'Youtube']
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data_json = self._html_search_regex(r'''(?x)
|
||||||
|
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
|
||||||
|
data-video-meta="([^"]+)"''', webpage, u'video metadata')
|
||||||
|
|
||||||
|
data = json.loads(data_json)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': data['youtubeVideoId'],
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
'id': video_id,
|
||||||
|
'title': data['title'],
|
||||||
|
'description': data['description'],
|
||||||
|
'view_count': int(data['view_count']),
|
||||||
|
'like_count': int(data['statistic']['like']),
|
||||||
|
'dislike_count': int(data['statistic']['dislike']),
|
||||||
|
'thumbnail': data['thumbnail_url'],
|
||||||
|
}
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class ORFIE(InfoExtractor):
|
class ORFIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
_VALID_URL = r'https?://tvthek\.orf\.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class PBSIE(InfoExtractor):
|
class PBSIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
|
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
u'url': u'http://video.pbs.org/video/2365006249/',
|
||||||
|
49
youtube_dl/extractor/podomatic.py
Normal file
49
youtube_dl/extractor/podomatic.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class PodomaticIE(InfoExtractor):
|
||||||
|
IE_NAME = 'podomatic'
|
||||||
|
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
||||||
|
u"file": u"2009-01-02T16_03_35-08_00.mp3",
|
||||||
|
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
|
||||||
|
u"info_dict": {
|
||||||
|
u"uploader": u"Science Teaching Tips",
|
||||||
|
u"uploader_id": u"scienceteachingtips",
|
||||||
|
u"title": u"64. When the Moon Hits Your Eye",
|
||||||
|
u"duration": 446,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
channel = mobj.group('channel')
|
||||||
|
|
||||||
|
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
|
||||||
|
'?permalink=true&rtmp=0') %
|
||||||
|
(mobj.group('proto'), channel, video_id))
|
||||||
|
data_json = self._download_webpage(
|
||||||
|
json_url, video_id, note=u'Downloading video info')
|
||||||
|
data = json.loads(data_json)
|
||||||
|
|
||||||
|
video_url = data['downloadLink']
|
||||||
|
uploader = data['podcast']
|
||||||
|
title = data['title']
|
||||||
|
thumbnail = data['imageLocation']
|
||||||
|
duration = int(data['length'] / 1000.0)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': channel,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
68
youtube_dl/extractor/pornhub.py
Normal file
68
youtube_dl/extractor/pornhub.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
from ..aes import (
|
||||||
|
aes_decrypt_text
|
||||||
|
)
|
||||||
|
|
||||||
|
class PornHubIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||||
|
u'file': u'648719015.mp4',
|
||||||
|
u'md5': u'882f488fa1f0026f023f33576004a2ed',
|
||||||
|
u'info_dict': {
|
||||||
|
u"uploader": u"BABES-COM",
|
||||||
|
u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
|
||||||
|
u"age_limit": 18
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
url = 'http://www.' + mobj.group('url')
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, u'title')
|
||||||
|
video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
|
||||||
|
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
|
||||||
|
if thumbnail:
|
||||||
|
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||||
|
|
||||||
|
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||||
|
if webpage.find('"encrypted":true') != -1:
|
||||||
|
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password').replace('+', ' ')
|
||||||
|
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video_url in video_urls:
|
||||||
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
|
extension = os.path.splitext(path)[1][1:]
|
||||||
|
format = path.split('/')[5].split('_')[:2]
|
||||||
|
format = "-".join(format)
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'ext': extension,
|
||||||
|
'format': format,
|
||||||
|
'format_id': format,
|
||||||
|
})
|
||||||
|
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'title': video_title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
|
|||||||
u'md5': u'374dd6dcedd24234453b295209aa69b6',
|
u'md5': u'374dd6dcedd24234453b295209aa69b6',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"upload_date": u"20090708",
|
u"upload_date": u"20090708",
|
||||||
u"title": u"Marilyn-Monroe-Bathing"
|
u"title": u"Marilyn-Monroe-Bathing",
|
||||||
|
u"age_limit": 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
51
youtube_dl/extractor/pyvideo.py
Normal file
51
youtube_dl/extractor/pyvideo.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import re
|
||||||
|
import os
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class PyvideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
||||||
|
_TESTS = [{
|
||||||
|
u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
||||||
|
u'file': u'24_4WWkSmNo.mp4',
|
||||||
|
u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Become a logging expert in 30 minutes",
|
||||||
|
u"description": u"md5:9665350d466c67fb5b1598de379021f7",
|
||||||
|
u"upload_date": u"20130320",
|
||||||
|
u"uploader": u"NextDayVideo",
|
||||||
|
u"uploader_id": u"NextDayVideo",
|
||||||
|
},
|
||||||
|
u'add_ie': ['Youtube'],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
||||||
|
u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'2542',
|
||||||
|
u'ext': u'm4v',
|
||||||
|
u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
||||||
|
|
||||||
|
if m_youtube is not None:
|
||||||
|
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||||
|
|
||||||
|
title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
|
||||||
|
webpage, u'title', flags=re.DOTALL)
|
||||||
|
video_url = self._search_regex([r'<source src="(.*?)"',
|
||||||
|
r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
||||||
|
webpage, u'video url', flags=re.DOTALL)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': os.path.splitext(title)[0],
|
||||||
|
'url': video_url,
|
||||||
|
}
|
@ -8,7 +8,9 @@ class RedTubeIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.redtube.com/66418',
|
u'url': u'http://www.redtube.com/66418',
|
||||||
u'file': u'66418.mp4',
|
u'file': u'66418.mp4',
|
||||||
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
# md5 varies from time to time, as in
|
||||||
|
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
|
||||||
|
#u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"title": u"Sucked on a toilet",
|
u"title": u"Sucked on a toilet",
|
||||||
u"age_limit": 18,
|
u"age_limit": 18,
|
||||||
@ -28,7 +30,7 @@ class RedTubeIE(InfoExtractor):
|
|||||||
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
|
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
|
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
|
||||||
webpage, u'title')
|
webpage, u'title')
|
||||||
|
|
||||||
# No self-labeling, but they describe themselves as
|
# No self-labeling, but they describe themselves as
|
||||||
|
@ -7,9 +7,10 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RTLnowIE(InfoExtractor):
|
class RTLnowIE(InfoExtractor):
|
||||||
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
|
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
|
||||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/|(?:www\.)?n-tvnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||||
u'file': u'90419.flv',
|
u'file': u'90419.flv',
|
||||||
@ -62,19 +63,6 @@ class RTLnowIE(InfoExtractor):
|
|||||||
u'skip_download': True,
|
u'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1',
|
|
||||||
u'file': u'127367.flv',
|
|
||||||
u'info_dict': {
|
|
||||||
u'upload_date': u'20130926',
|
|
||||||
u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...',
|
|
||||||
u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin',
|
|
||||||
u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg',
|
|
||||||
},
|
|
||||||
u'params': {
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
|
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
|
||||||
u'file': u'124903.flv',
|
u'file': u'124903.flv',
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user