Compare commits
475 Commits
2013.10.06
...
rtmp_test
Author | SHA1 | Date | |
---|---|---|---|
00b350d209 | |||
d8ec4959c8 | |||
d31209a144 | |||
529a2e2cc3 | |||
781a7d0546 | |||
fb04e40396 | |||
d9b011f201 | |||
b0b9eaa196 | |||
8b134b1062 | |||
0c75c3fa7a | |||
a3927cf7ee | |||
1a62c18f65 | |||
2a15e7063b | |||
d46cc192d7 | |||
bb2bebdbe1 | |||
5db07df634 | |||
ea36cbac5e | |||
d0d2b49ab7 | |||
31cb6d8fef | |||
daa0dd2973 | |||
de79c46c8f | |||
94ccb6fa2e | |||
07e4035879 | |||
d0efb9ec9a | |||
ac05067d3d | |||
113577e155 | |||
79d09f47c2 | |||
c059bdd432 | |||
02dbf93f0e | |||
1fb2bcbbf7 | |||
16e055849e | |||
66cfab4226 | |||
6d88bc37a3 | |||
b7553b2554 | |||
e03db0a077 | |||
a1ee09e815 | |||
267ed0c5d3 | |||
f459d17018 | |||
dc65dcbb6d | |||
d214fdb8fe | |||
138df537ff | |||
0c7c19d6bc | |||
eaaafc59c2 | |||
382ed50e0e | |||
66ec019240 | |||
bd49928f7a | |||
23e6d50d73 | |||
2e767313e4 | |||
38b2db6a66 | |||
13ebea791f | |||
4c9c57428f | |||
8bf9319e9c | |||
4914120727 | |||
36de0a0e1a | |||
e5c146d586 | |||
52ad14aeb0 | |||
43afe28588 | |||
a87b0615aa | |||
d7386f6276 | |||
081640940e | |||
7012b23c94 | |||
d3b30148ed | |||
9f79463803 | |||
d35dc6d3b5 | |||
50123be421 | |||
3f8ced5144 | |||
00ea0f11eb | |||
dca0872056 | |||
0b63aed8df | |||
15c3adbb16 | |||
f143a42fe6 | |||
241650c7ff | |||
bfe7439a20 | |||
cffa6aa107 | |||
02e4ebbbad | |||
ab009f59ef | |||
0980426559 | |||
b1c9c66936 | |||
a6a173c2fd | |||
2bb683c201 | |||
64bb5187f5 | |||
9e4f50a8ae | |||
0190eecc00 | |||
ca872a4c0b | |||
f2e87ef4fa | |||
0ad97bbc05 | |||
c4864091a1 | |||
9a98a466b3 | |||
f99e0f1ed6 | |||
d323bcb152 | |||
da6a795fdb | |||
c5edcde21f | |||
15ff3c831e | |||
100959a6d9 | |||
0a120f74b2 | |||
8f05351984 | |||
4eb92208a3 | |||
71791f414c | |||
f3682997d7 | |||
cc13cc0251 | |||
86bd5f2ca9 | |||
8694c60000 | |||
9d1538182f | |||
5904088811 | |||
69545c2aff | |||
495da337ae | |||
34b3afc7be | |||
00373a4c5d | |||
cb7dfeeac4 | |||
efd6c574a2 | |||
4113e6ab56 | |||
9a942a4671 | |||
9906d397a0 | |||
ae8f787141 | |||
a81b4d5c8f | |||
887c6acdf2 | |||
83aa529330 | |||
96b31b6533 | |||
fccd377198 | |||
2b35c9ef74 | |||
73c566695f | |||
63b7b7224a | |||
ce80c8b8ee | |||
749febf4d1 | |||
bdde425cbe | |||
746f491f82 | |||
1672647ade | |||
90b6bbc38c | |||
ce02ed60f2 | |||
1e5b9a95fd | |||
1d699755e0 | |||
ddf49c6344 | |||
ba3881dffd | |||
d1c252048b | |||
eab2724138 | |||
21ea3e06c9 | |||
52d703d3d1 | |||
ce152341a1 | |||
f058e34011 | |||
b5349e8721 | |||
7150858d49 | |||
91c7271aab | |||
aa13b2dffd | |||
fc2ef392be | |||
463a908705 | |||
d24ffe1cfa | |||
78fb87b283 | |||
ab2d524780 | |||
85d61685f1 | |||
b9643eed7c | |||
feee2ecfa9 | |||
a25a5cfeec | |||
0e145dd541 | |||
9f9be844fc | |||
e3b9ab5e18 | |||
c66d2baa9c | |||
08bc37cdd0 | |||
9771cceb2c | |||
ca715127a2 | |||
ea7a7af1d4 | |||
880e1c529d | |||
dcbb45803f | |||
80b9bbce86 | |||
d37936386f | |||
c3a3028f9f | |||
6c5ad80cdc | |||
b5bdc2699a | |||
384b98cd8f | |||
eb9b5bffef | |||
0bd59f3723 | |||
8b8cbd8f6d | |||
72b18c5d34 | |||
eb0a839866 | |||
1777d5a952 | |||
d4b7da84c3 | |||
801dbbdffd | |||
0ed05a1d2d | |||
1008bebade | |||
ae84f879d7 | |||
be6dfd1b49 | |||
231516b6c9 | |||
fb53d58dcf | |||
2a9e9b210b | |||
897d6cc43a | |||
f470c6c812 | |||
566d4e0425 | |||
81be02d2f9 | |||
c2b6a482d5 | |||
12c167c881 | |||
20aafee7fa | |||
be07375b66 | |||
4894fe8c5b | |||
dd5bcdc4c9 | |||
6161d17579 | |||
4ac5306ae7 | |||
b1a80ec1a9 | |||
672fe94dcb | |||
51040b72ed | |||
4f045eef8f | |||
5d7b253ea0 | |||
b0759f0c19 | |||
065472936a | |||
fc4a0c2aec | |||
eeb165e674 | |||
9ee2b5f6f2 | |||
da54be877a | |||
50a886b7ab | |||
76e67c2cb6 | |||
5137ebac0b | |||
a8eeb0597b | |||
4ed3e51080 | |||
7f34001d57 | |||
2dcf7d8f99 | |||
19b0668251 | |||
e7e6b54d8a | |||
2a1a8ffe41 | |||
08fb86c49b | |||
3633d77c0f | |||
165e179764 | |||
12ebdd1506 | |||
1baf9a5938 | |||
a56f9de156 | |||
fa5d47af4b | |||
d607038753 | |||
9ac6a01aaf | |||
be97abc247 | |||
9103bbc5cd | |||
b6c45014ae | |||
a3dd924871 | |||
137bbb3e37 | |||
86ad94bb2e | |||
3e56add7c9 | |||
f52f01b5d2 | |||
98d7efb537 | |||
cf51923545 | |||
38fcd4597a | |||
165e3bb67a | |||
38db46794f | |||
a9a3876d55 | |||
1f343eaabb | |||
72a5b4f702 | |||
0a43ddf320 | |||
31366066bd | |||
aa2484e390 | |||
8eddf3e91d | |||
60d142aa8d | |||
66cf3ac342 | |||
ab4e151347 | |||
ac2547f5ff | |||
5f1ea943ab | |||
0ef7ad5cd4 | |||
9f1109a564 | |||
33b1d9595d | |||
7193498811 | |||
72321ead7b | |||
b5d0d817bc | |||
94badb2599 | |||
b9a836515f | |||
21c924f406 | |||
e54fd4b23b | |||
57dd9a8f2f | |||
912cbf5d4e | |||
43d7895ea0 | |||
f7ff55aa78 | |||
795f28f871 | |||
f6cc16f5d8 | |||
321a01f971 | |||
646e17a53d | |||
dd508b7c4f | |||
2563bcc85c | |||
702665c085 | |||
dcc2a706ef | |||
2bc67c35ac | |||
77ae65877e | |||
32a35e4418 | |||
369a759acc | |||
79b3f61228 | |||
216d71d001 | |||
78a3a9f89e | |||
a7685f3bf4 | |||
f088ea5486 | |||
1003d108d5 | |||
8abeeb9449 | |||
c1002e96e9 | |||
77d0a82fef | |||
ebc14f251c | |||
d41e6efc85 | |||
8ffa13e03e | |||
db477d3a37 | |||
750e9833b8 | |||
82f0ac657c | |||
eb6a2277a2 | |||
f8778fb0fa | |||
e2f9de207c | |||
a93cc0d943 | |||
7d8c2e07f2 | |||
efb4c36b18 | |||
29526d0d2b | |||
198e370f23 | |||
c19f7764a5 | |||
bc63d9d329 | |||
aa929c37d5 | |||
af4d506eb3 | |||
5da0549581 | |||
749a4fd2fd | |||
6f71ef580c | |||
67874aeffa | |||
3e6a330d38 | |||
aee5e18c8f | |||
5b11143d05 | |||
7b2212e954 | |||
71865091ab | |||
125cfd78e8 | |||
8cb57d9b91 | |||
14e10b2b6e | |||
6e76104d66 | |||
1d45a23b74 | |||
7df286540f | |||
5d0c97541a | |||
49a25557b0 | |||
b5936c0059 | |||
600cc1a4f0 | |||
ea32fbacc8 | |||
00fe14fc75 | |||
fcc28edb2f | |||
fac6be2dd5 | |||
1cf64ee468 | |||
cdec0190c4 | |||
2450bcb28b | |||
3126050c0f | |||
93b22c7828 | |||
0a89b2852e | |||
55b3e45bba | |||
365bcf6d97 | |||
71907db3ba | |||
6803655ced | |||
df1c39ec5c | |||
80f55a9511 | |||
7853cc5ae1 | |||
586a91b67f | |||
b028e96144 | |||
ce68b5907c | |||
fe7e0c9825 | |||
12893efe01 | |||
a6387bfd3c | |||
f6a54188c2 | |||
cbbd9a9c69 | |||
685a9cd2f1 | |||
182a107877 | |||
8c51aa6506 | |||
3fd39e37f2 | |||
49e86983e7 | |||
a9c58ad945 | |||
f8b45beacc | |||
9d92015d43 | |||
50a6150ed9 | |||
d5a9bb4ea9 | |||
b0505eb611 | |||
284acd57d6 | |||
8ed6b34477 | |||
f6f1fc9286 | |||
8e590a117f | |||
d5594202aa | |||
b186d949cf | |||
3d2986063c | |||
41fd7c7e60 | |||
fdefe96bf2 | |||
16f36a6fc9 | |||
f44415360e | |||
cce722b79c | |||
82697fb2ab | |||
53c1d3ef49 | |||
8e55e9abfc | |||
7c58ef3275 | |||
416a5efce7 | |||
f4d96df0f1 | |||
5d254f776a | |||
1c1218fefc | |||
d21ab29200 | |||
54ed626cf8 | |||
a733eb6c53 | |||
591454798d | |||
38604f1a4f | |||
2d0efe70a6 | |||
bfd14b1b2f | |||
76965512da | |||
996d1c3242 | |||
8abbf43f21 | |||
10eaae48ff | |||
9d4660cab1 | |||
9d74e308f7 | |||
e772692ffd | |||
8381a92120 | |||
cd054fc491 | |||
f219743e33 | |||
4f41664de8 | |||
a4fd04158e | |||
44a5f1718a | |||
a623df4c7b | |||
7cf67fbe29 | |||
3ddf1a6d01 | |||
850555c484 | |||
9ed3bdc64d | |||
c45aa56080 | |||
7394b8db3b | |||
f9b3d7af47 | |||
ea62a2da46 | |||
7468b6b71d | |||
1fb07d10a3 | |||
9378ae6e1d | |||
06723d47c4 | |||
69a0c470b5 | |||
c40f5cf45c | |||
4b7b839f24 | |||
3d60d33773 | |||
d7e66d39a0 | |||
d3f46b9aa5 | |||
f5e54a1fda | |||
4eb7f1d12e | |||
0f6d12e43c | |||
b4cdc245cf | |||
3283533149 | |||
8032e31f2d | |||
d2f9cdb205 | |||
8016c92297 | |||
e028d0d1e3 | |||
79819f58f2 | |||
6ff000b888 | |||
99e206d508 | |||
dd82ffea0c | |||
3823342d9d | |||
91dbaef406 | |||
9026dd3858 | |||
81d7f1928c | |||
bc4f29170f | |||
cb354c8f62 | |||
1cbb27b151 | |||
0ab4ff6378 | |||
63da13e829 | |||
4193a453c2 | |||
2e1fa03bf5 | |||
8f1ae18a18 | |||
57da92b7df | |||
df4f632dbc | |||
a34c2faae4 | |||
1d368c7589 | |||
88bd97e34c | |||
2ae3edb1cf | |||
b2ad967e45 | |||
a27b9e8bd5 | |||
4481a754e4 | |||
faa6ef6bc8 | |||
15870e90b0 | |||
8e4f824365 | |||
387ae5f30b | |||
ad7a071ab6 | |||
1310bf2474 | |||
b24f347190 | |||
ee6c9f95e1 | |||
2a69c6b879 | |||
cfadd183c4 | |||
e484c81f0c | |||
8dbe9899a9 | |||
c1c9a79c49 | |||
ca215e0a4f | |||
91a26ca559 | |||
1ece880d7c | |||
400afddaf4 | |||
73b4fafd82 | |||
b039775057 | |||
5c1d63b737 | |||
cc6943e86a | |||
8f77093262 | |||
d79a0e233a | |||
0025da15cf |
@ -3,6 +3,9 @@ python:
|
||||
- "2.6"
|
||||
- "2.7"
|
||||
- "3.3"
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq rtmpdump
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
13
Makefile
13
Makefile
@ -13,13 +13,13 @@ PYTHON=/usr/bin/env python
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
ifeq ($(PREFIX),/usr)
|
||||
SYSCONFDIR=/etc
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
ifeq ($(PREFIX),/usr/local)
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
SYSCONFDIR=$(PREFIX)/etc
|
||||
endif
|
||||
ifeq ($(PREFIX),/usr/local)
|
||||
SYSCONFDIR=/etc
|
||||
else
|
||||
SYSCONFDIR=$(PREFIX)/etc
|
||||
endif
|
||||
endif
|
||||
|
||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
||||
@ -71,6 +71,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude '*~' \
|
||||
--exclude '__pycache' \
|
||||
--exclude '.git' \
|
||||
--exclude 'testdata' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl \
|
||||
CHANGELOG LICENSE README.md README.txt \
|
||||
|
28
README.md
28
README.md
@ -21,6 +21,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
sudo if needed)
|
||||
-i, --ignore-errors continue on download errors, for example to to
|
||||
skip unavailable videos in a playlist
|
||||
--abort-on-error Abort downloading of further videos (in the
|
||||
playlist or the command line) if an error occurs
|
||||
--dump-user-agent display the current browser identification
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video access
|
||||
@ -30,7 +32,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--extractor-descriptions Output descriptions of all supported extractors
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--cache-dir None Location in the filesystem where youtube-dl can
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl can
|
||||
store downloaded information permanently. By
|
||||
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
|
||||
/youtube-dl .
|
||||
@ -52,11 +54,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--datebefore DATE download only videos uploaded before this date
|
||||
--dateafter DATE download only videos uploaded after this date
|
||||
--no-playlist download only the currently playing video
|
||||
--age-limit YEARS download only videos suitable for the given age
|
||||
--download-archive FILE Download only videos not present in the archive
|
||||
file. Record all downloaded videos in it.
|
||||
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)
|
||||
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g.
|
||||
50K or 4.2M)
|
||||
-R, --retries RETRIES number of retries (default is 10)
|
||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k)
|
||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer do not automatically adjust the buffer size. By
|
||||
default, the buffer size is automatically resized
|
||||
@ -72,7 +78,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
%(uploader_id)s for the uploader nickname if
|
||||
different, %(autonumber)s to get an automatically
|
||||
incremented number, %(ext)s for the filename
|
||||
extension, %(upload_date)s for the upload date
|
||||
extension, %(format)s for the format description
|
||||
(like "22 - 1280x720" or "HD"),%(format_id)s for
|
||||
the unique id of the format (like Youtube's
|
||||
itags: "137"),%(upload_date)s for the upload date
|
||||
(YYYYMMDD), %(extractor)s for the provider
|
||||
(youtube, metacafe, etc), %(id)s for the video id
|
||||
, %(playlist)s for the playlist the video is in,
|
||||
@ -83,12 +92,14 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
ownloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s
|
||||
when it is present in output filename template or
|
||||
--autonumber option is given
|
||||
--auto-number option is given
|
||||
--restrict-filenames Restrict filenames to only ASCII characters, and
|
||||
avoid "&" and spaces in filenames
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||
-w, --no-overwrites do not overwrite files
|
||||
-c, --continue resume partially downloaded files
|
||||
-c, --continue force resume of partially downloaded files. By
|
||||
default, youtube-dl will resume downloads if
|
||||
possible.
|
||||
--no-continue do not resume partially downloaded files (restart
|
||||
from beginning)
|
||||
--cookies FILE file to read cookies from and dump cookie jar in
|
||||
@ -97,6 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
file modification time
|
||||
--write-description write video description to a .description file
|
||||
--write-info-json write video metadata to a .info.json file
|
||||
--write-annotations write video annotations to a .annotation file
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
@ -111,12 +123,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--get-description simulate, quiet but print video description
|
||||
--get-filename simulate, quiet but print output filename
|
||||
--get-format simulate, quiet but print output format
|
||||
-j, --dump-json simulate, quiet but print JSON information
|
||||
--newline output progress bar as new lines
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
-v, --verbose print various debugging information
|
||||
--dump-intermediate-pages print downloaded pages to debug problems(very
|
||||
verbose)
|
||||
--write-pages Write downloaded pages to files in the current
|
||||
directory
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specifiy the order of
|
||||
@ -163,6 +178,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
processed files are overwritten by default
|
||||
--embed-subs embed subtitles in the video (only for mp4
|
||||
videos)
|
||||
--add-metadata add metadata to the files
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
__youtube-dl()
|
||||
__youtube_dl()
|
||||
{
|
||||
local cur prev opts
|
||||
COMPREPLY=()
|
||||
@ -15,4 +15,4 @@ __youtube-dl()
|
||||
fi
|
||||
}
|
||||
|
||||
complete -F __youtube-dl youtube-dl
|
||||
complete -F __youtube_dl youtube-dl
|
||||
|
39
devscripts/check-porn.py
Normal file
39
devscripts/check-porn.py
Normal file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
|
||||
if we are not 'age_limit' tagging some porn site
|
||||
"""
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_testcases
|
||||
from youtube_dl.utils import compat_urllib_request
|
||||
|
||||
for test in get_testcases():
|
||||
try:
|
||||
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||
except:
|
||||
print('\nFail: {0}'.format(test['name']))
|
||||
continue
|
||||
|
||||
webpage = webpage.decode('utf8', 'replace')
|
||||
|
||||
if 'porn' in webpage.lower() and ('info_dict' not in test
|
||||
or 'age_limit' not in test['info_dict']
|
||||
or test['info_dict']['age_limit'] != 18):
|
||||
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
||||
|
||||
elif 'porn' not in webpage.lower() and ('info_dict' in test and
|
||||
'age_limit' in test['info_dict'] and
|
||||
test['info_dict']['age_limit'] == 18):
|
||||
print('\nPotential false negative: {0}'.format(test['name']))
|
||||
|
||||
else:
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
|
||||
print()
|
@ -16,10 +16,11 @@ def main():
|
||||
ie_htmls = []
|
||||
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
|
||||
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
||||
try:
|
||||
ie_desc = getattr(ie, 'IE_DESC', None)
|
||||
if ie_desc is False:
|
||||
continue
|
||||
elif ie_desc is not None:
|
||||
ie_html += ': {}'.format(ie.IE_DESC)
|
||||
except AttributeError:
|
||||
pass
|
||||
if ie.working() == False:
|
||||
ie_html += ' (Currently broken)'
|
||||
ie_htmls.append('<li>{}</li>'.format(ie_html))
|
||||
|
@ -88,10 +88,6 @@ ROOT=$(pwd)
|
||||
"$ROOT/devscripts/gh-pages/update-sites.py"
|
||||
git add *.html *.html.in update
|
||||
git commit -m "release $version"
|
||||
git show HEAD
|
||||
read -p "Is it good, can I push? (y/n) " -n 1
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
||||
echo
|
||||
git push "$ROOT" gh-pages
|
||||
git push "$ORIGIN_URL" gh-pages
|
||||
)
|
||||
|
10
setup.py
10
setup.py
@ -8,8 +8,10 @@ import sys
|
||||
|
||||
try:
|
||||
from setuptools import setup
|
||||
setuptools_available = True
|
||||
except ImportError:
|
||||
from distutils.core import setup
|
||||
setuptools_available = False
|
||||
|
||||
try:
|
||||
# This will create an exe that needs Microsoft Visual C++ 2008
|
||||
@ -43,13 +45,16 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||
params = py2exe_params
|
||||
else:
|
||||
params = {
|
||||
'scripts': ['bin/youtube-dl'],
|
||||
'data_files': [ # Installing system-wide would require sudo...
|
||||
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
||||
('share/doc/youtube_dl', ['README.txt']),
|
||||
('share/man/man1/', ['youtube-dl.1'])
|
||||
('share/man/man1', ['youtube-dl.1'])
|
||||
]
|
||||
}
|
||||
if setuptools_available:
|
||||
params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
|
||||
else:
|
||||
params['scripts'] = ['bin/youtube-dl']
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
@ -63,6 +68,7 @@ setup(
|
||||
' YouTube.com and other video sites.',
|
||||
url='https://github.com/rg3/youtube-dl',
|
||||
author='Ricardo Garcia',
|
||||
author_email='ytdl@yt-dl.org',
|
||||
maintainer='Philipp Hagemeister',
|
||||
maintainer_email='phihag@phihag.de',
|
||||
packages=['youtube_dl', 'youtube_dl.extractor'],
|
||||
|
0
test/__init__.py
Normal file
0
test/__init__.py
Normal file
@ -1,27 +1,25 @@
|
||||
import errno
|
||||
import io
|
||||
import hashlib
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
import types
|
||||
import sys
|
||||
|
||||
import youtube_dl.extractor
|
||||
from youtube_dl import YoutubeDL, YoutubeDLHandler
|
||||
from youtube_dl.utils import (
|
||||
compat_cookiejar,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.utils import preferredencoding
|
||||
|
||||
# General configuration (from __init__, not very elegant...)
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
parameters = json.load(pf)
|
||||
def get_params(override=None):
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||
"parameters.json")
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
parameters = json.load(pf)
|
||||
if override:
|
||||
parameters.update(override)
|
||||
return parameters
|
||||
|
||||
|
||||
def try_rm(filename):
|
||||
@ -33,11 +31,26 @@ def try_rm(filename):
|
||||
raise
|
||||
|
||||
|
||||
def report_warning(message):
|
||||
'''
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = u'WARNING:'
|
||||
output = u'%s %s\n' % (_msg_header, message)
|
||||
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
|
||||
output = output.encode(preferredencoding())
|
||||
sys.stderr.write(output)
|
||||
|
||||
|
||||
class FakeYDL(YoutubeDL):
|
||||
def __init__(self):
|
||||
def __init__(self, override=None):
|
||||
# Different instances of the downloader can't share the same dictionary
|
||||
# some test set the "sublang" parameter, which would break the md5 checks.
|
||||
params = dict(parameters)
|
||||
params = get_params(override=override)
|
||||
super(FakeYDL, self).__init__(params)
|
||||
self.result = []
|
||||
|
||||
@ -67,3 +80,6 @@ def get_testcases():
|
||||
for t in getattr(ie, '_TESTS', []):
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
||||
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
145
test/test_YoutubeDL.py
Normal file
145
test/test_YoutubeDL.py
Normal file
@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
|
||||
|
||||
class YDL(FakeYDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(YDL, self).__init__(*args, **kwargs)
|
||||
self.downloaded_info_dicts = []
|
||||
self.msgs = []
|
||||
|
||||
def process_info(self, info_dict):
|
||||
self.downloaded_info_dicts.append(info_dict)
|
||||
|
||||
def to_screen(self, msg):
|
||||
self.msgs.append(msg)
|
||||
|
||||
|
||||
class TestFormatSelection(unittest.TestCase):
|
||||
def test_prefer_free_formats(self):
|
||||
# Same resolution => download webm
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{u'ext': u'webm', u'height': 460},
|
||||
{u'ext': u'mp4', u'height': 460},
|
||||
]
|
||||
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'webm')
|
||||
|
||||
# Different resolution => download best quality (mp4)
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{u'ext': u'webm', u'height': 720},
|
||||
{u'ext': u'mp4', u'height': 1080},
|
||||
]
|
||||
info_dict[u'formats'] = formats
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'mp4')
|
||||
|
||||
# No prefer_free_formats => keep original formats order
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{u'ext': u'webm', u'height': 720},
|
||||
{u'ext': u'flv', u'height': 720},
|
||||
]
|
||||
info_dict[u'formats'] = formats
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'flv')
|
||||
|
||||
def test_format_limit(self):
|
||||
formats = [
|
||||
{u'format_id': u'meh', u'url': u'http://example.com/meh'},
|
||||
{u'format_id': u'good', u'url': u'http://example.com/good'},
|
||||
{u'format_id': u'great', u'url': u'http://example.com/great'},
|
||||
{u'format_id': u'excellent', u'url': u'http://example.com/exc'},
|
||||
]
|
||||
info_dict = {
|
||||
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||
|
||||
ydl = YDL({'format_limit': 'good'})
|
||||
assert ydl.params['format_limit'] == 'good'
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'format_id'], u'good')
|
||||
|
||||
ydl = YDL({'format_limit': 'great', 'format': 'all'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
|
||||
self.assertTrue('3' in ydl.msgs[0])
|
||||
|
||||
ydl = YDL()
|
||||
ydl.params['format_limit'] = 'excellent'
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{u'format_id': u'35', u'ext': u'mp4'},
|
||||
{u'format_id': u'45', u'ext': u'webm'},
|
||||
{u'format_id': u'47', u'ext': u'webm'},
|
||||
{u'format_id': u'2', u'ext': u'flv'},
|
||||
]
|
||||
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||
|
||||
ydl = YDL({'format': u'20/47'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'47')
|
||||
|
||||
ydl = YDL({'format': u'20/71/worst'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'35')
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'2')
|
||||
|
||||
ydl = YDL({'format': u'webm/mp4'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'47')
|
||||
|
||||
ydl = YDL({'format': u'3gp/40/mp4'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'35')
|
||||
|
||||
def test_add_extra_info(self):
|
||||
test_dict = {
|
||||
'extractor': 'Foo',
|
||||
}
|
||||
extra_info = {
|
||||
'extractor': 'Bar',
|
||||
'playlist': 'funny videos',
|
||||
}
|
||||
YDL.add_extra_info(test_dict, extra_info)
|
||||
self.assertEqual(test_dict['extractor'], 'Foo')
|
||||
self.assertEqual(test_dict['playlist'], 'funny videos')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
54
test/test_age_restriction.py
Normal file
54
test/test_age_restriction.py
Normal file
@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
|
||||
|
||||
def _download_restricted(url, filename, age):
|
||||
""" Returns true iff the file has been downloaded """
|
||||
|
||||
params = {
|
||||
'age_limit': age,
|
||||
'skip_download': True,
|
||||
'writeinfojson': True,
|
||||
"outtmpl": "%(id)s.%(ext)s",
|
||||
}
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_default_info_extractors()
|
||||
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
||||
try_rm(json_filename)
|
||||
ydl.download([url])
|
||||
res = os.path.exists(json_filename)
|
||||
try_rm(json_filename)
|
||||
return res
|
||||
|
||||
|
||||
class TestAgeRestriction(unittest.TestCase):
|
||||
def _assert_restricted(self, url, filename, age, old_age=None):
|
||||
self.assertTrue(_download_restricted(url, filename, old_age))
|
||||
self.assertFalse(_download_restricted(url, filename, age))
|
||||
|
||||
def test_youtube(self):
|
||||
self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
|
||||
|
||||
def test_youporn(self):
|
||||
self._assert_restricted(
|
||||
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
'505835.mp4', 2, old_age=25)
|
||||
|
||||
def test_pornotube(self):
|
||||
self._assert_restricted(
|
||||
'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
||||
'1689755.flv', 13)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,14 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from test.helper import get_testcases
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
gen_extractors,
|
||||
JustinTVIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE, gen_extractors
|
||||
from helper import get_testcases
|
||||
|
||||
class TestAllURLsMatching(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@ -94,10 +100,11 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_keywords(self):
|
||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
||||
self.assertMatch(':tds', ['ComedyCentral'])
|
||||
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
||||
self.assertMatch(':cr', ['ComedyCentral'])
|
||||
self.assertMatch(':ythistory', ['youtube:history'])
|
||||
self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
|
||||
self.assertMatch(':tds', ['ComedyCentralShows'])
|
||||
self.assertMatch(':colbertreport', ['ComedyCentralShows'])
|
||||
self.assertMatch(':cr', ['ComedyCentralShows'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1,72 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import hashlib
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import DailymotionIE
|
||||
from youtube_dl.utils import *
|
||||
from helper import FakeYDL
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
class TestDailymotionSubtitles(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.DL = FakeYDL()
|
||||
self.url = 'http://www.dailymotion.com/video/xczg00'
|
||||
def getInfoDict(self):
|
||||
IE = DailymotionIE(self.DL)
|
||||
info_dict = IE.extract(self.url)
|
||||
return info_dict
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict[0]['subtitles']
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 5)
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,34 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import (
|
||||
get_params,
|
||||
get_testcases,
|
||||
try_rm,
|
||||
md5,
|
||||
report_warning
|
||||
)
|
||||
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import os
|
||||
import json
|
||||
import unittest
|
||||
import sys
|
||||
import socket
|
||||
import binascii
|
||||
|
||||
# Allow direct execution
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
from youtube_dl.utils import *
|
||||
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
from youtube_dl.utils import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_HTTPError,
|
||||
DownloadError,
|
||||
ExtractorError,
|
||||
UnavailableVideoError,
|
||||
)
|
||||
from youtube_dl.extractor import get_info_extractor
|
||||
|
||||
RETRIES = 3
|
||||
|
||||
# General configuration (from __init__, not very elegant...)
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(10)
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.to_stderr = self.to_screen
|
||||
@ -45,17 +49,12 @@ def _file_md5(fn):
|
||||
with open(fn, 'rb') as f:
|
||||
return hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
from helper import get_testcases, try_rm
|
||||
defs = get_testcases()
|
||||
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
parameters = json.load(pf)
|
||||
|
||||
|
||||
class TestDownload(unittest.TestCase):
|
||||
maxDiff = None
|
||||
def setUp(self):
|
||||
self.parameters = parameters
|
||||
self.defs = defs
|
||||
|
||||
### Dynamically generate tests
|
||||
@ -63,20 +62,27 @@ def generator(test_case):
|
||||
|
||||
def test_template(self):
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
||||
def print_skipping(reason):
|
||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||
if not ie._WORKING:
|
||||
if not ie.working():
|
||||
print_skipping('IE marked as not _WORKING')
|
||||
return
|
||||
if 'playlist' not in test_case and not test_case['file']:
|
||||
print_skipping('No output file specified')
|
||||
return
|
||||
if 'playlist' not in test_case:
|
||||
info_dict = test_case.get('info_dict', {})
|
||||
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||
print_skipping('The output file cannot be know, the "file" '
|
||||
'key is missing or the info_dict is incomplete')
|
||||
return
|
||||
if 'skip' in test_case:
|
||||
print_skipping(test_case['skip'])
|
||||
return
|
||||
for other_ie in other_ies:
|
||||
if not other_ie.working():
|
||||
print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
|
||||
return
|
||||
|
||||
params = self.parameters.copy()
|
||||
params.update(test_case.get('params', {}))
|
||||
params = get_params(test_case.get('params', {}))
|
||||
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_default_info_extractors()
|
||||
@ -86,35 +92,48 @@ def generator(test_case):
|
||||
finished_hook_called.add(status['filename'])
|
||||
ydl.fd.add_progress_hook(_hook)
|
||||
|
||||
def get_tc_filename(tc):
|
||||
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
|
||||
test_cases = test_case.get('playlist', [test_case])
|
||||
for tc in test_cases:
|
||||
try_rm(tc['file'])
|
||||
try_rm(tc['file'] + '.part')
|
||||
try_rm(tc['file'] + '.info.json')
|
||||
def try_rm_tcs_files():
|
||||
for tc in test_cases:
|
||||
tc_filename = get_tc_filename(tc)
|
||||
try_rm(tc_filename)
|
||||
try_rm(tc_filename + '.part')
|
||||
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
||||
try_rm_tcs_files()
|
||||
try:
|
||||
for retry in range(1, RETRIES + 1):
|
||||
try_num = 1
|
||||
while True:
|
||||
try:
|
||||
ydl.download([test_case['url']])
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
if retry == RETRIES: raise
|
||||
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
raise
|
||||
|
||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry))
|
||||
if try_num == RETRIES:
|
||||
report_warning(u'Failed due to network errors, skipping...')
|
||||
return
|
||||
|
||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
|
||||
|
||||
try_num += 1
|
||||
else:
|
||||
break
|
||||
|
||||
for tc in test_cases:
|
||||
tc_filename = get_tc_filename(tc)
|
||||
if not test_case.get('params', {}).get('skip_download', False):
|
||||
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
|
||||
self.assertTrue(tc['file'] in finished_hook_called)
|
||||
self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
|
||||
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||
self.assertTrue(tc_filename in finished_hook_called)
|
||||
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||
self.assertTrue(os.path.exists(info_json_fn))
|
||||
if 'md5' in tc:
|
||||
md5_for_file = _file_md5(tc['file'])
|
||||
md5_for_file = _file_md5(tc_filename)
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
|
||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||
info_dict = json.load(infof)
|
||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
@ -134,11 +153,11 @@ def generator(test_case):
|
||||
# Check for the presence of mandatory fields
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
|
||||
finally:
|
||||
for tc in test_cases:
|
||||
try_rm(tc['file'])
|
||||
try_rm(tc['file'] + '.part')
|
||||
try_rm(tc['file'] + '.info.json')
|
||||
try_rm_tcs_files()
|
||||
|
||||
return test_template
|
||||
|
||||
|
@ -1,25 +1,29 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import json
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
DailymotionPlaylistIE,
|
||||
DailymotionUserIE,
|
||||
VimeoChannelIE,
|
||||
UstreamChannelIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
LivestreamIE,
|
||||
NHLVideocenterIE,
|
||||
BambuserChannelIE,
|
||||
BandcampAlbumIE
|
||||
)
|
||||
from youtube_dl.utils import *
|
||||
|
||||
from helper import FakeYDL
|
||||
|
||||
class TestPlaylists(unittest.TestCase):
|
||||
def assertIsPlaylist(self, info):
|
||||
@ -58,6 +62,14 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['id'], u'5124905')
|
||||
self.assertTrue(len(result['entries']) >= 11)
|
||||
|
||||
def test_soundcloud_set(self):
|
||||
dl = FakeYDL()
|
||||
ie = SoundcloudSetIE(dl)
|
||||
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'The Royal Concept EP')
|
||||
self.assertTrue(len(result['entries']) >= 6)
|
||||
|
||||
def test_soundcloud_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = SoundcloudUserIE(dl)
|
||||
@ -74,5 +86,30 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], u'TEDCity2.0 (English)')
|
||||
self.assertTrue(len(result['entries']) >= 4)
|
||||
|
||||
def test_nhl_videocenter(self):
|
||||
dl = FakeYDL()
|
||||
ie = NHLVideocenterIE(dl)
|
||||
result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'999')
|
||||
self.assertEqual(result['title'], u'Highlights')
|
||||
self.assertEqual(len(result['entries']), 12)
|
||||
|
||||
def test_bambuser_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = BambuserChannelIE(dl)
|
||||
result = ie.extract('http://bambuser.com/channel/pixelversity')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'pixelversity')
|
||||
self.assertTrue(len(result['entries']) >= 60)
|
||||
|
||||
def test_bandcamp_album(self):
|
||||
dl = FakeYDL()
|
||||
ie = BandcampAlbumIE(dl)
|
||||
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], u'Nightmare Night EP')
|
||||
self.assertTrue(len(result['entries']) >= 4)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
210
test/test_subtitles.py
Normal file
210
test/test_subtitles.py
Normal file
@ -0,0 +1,210 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, md5
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
YoutubeIE,
|
||||
DailymotionIE,
|
||||
TEDIE,
|
||||
)
|
||||
|
||||
|
||||
class BaseTestSubtitles(unittest.TestCase):
|
||||
url = None
|
||||
IE = None
|
||||
def setUp(self):
|
||||
self.DL = FakeYDL()
|
||||
self.ie = self.IE(self.DL)
|
||||
|
||||
def getInfoDict(self):
|
||||
info_dict = self.ie.extract(self.url)
|
||||
return info_dict
|
||||
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict['subtitles']
|
||||
|
||||
|
||||
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
url = 'QRS8MkLhQmM'
|
||||
IE = YoutubeIE
|
||||
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict[0]['subtitles']
|
||||
|
||||
def test_youtube_no_writesubtitles(self):
|
||||
self.DL.params['writesubtitles'] = False
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_youtube_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
|
||||
def test_youtube_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
|
||||
def test_youtube_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'sbv'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'vtt'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
|
||||
def test_youtube_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_youtube_automatic_captions(self):
|
||||
self.url = '8YoUxe5ncPo'
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(subtitles['it'] is not None)
|
||||
|
||||
def test_youtube_nosubtitles(self):
|
||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||
self.url = 'sAjKT8FhjI8'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_youtube_multiple_langs(self):
|
||||
self.url = 'QRS8MkLhQmM'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['it', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
||||
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.dailymotion.com/video/xczg00'
|
||||
IE = DailymotionIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 5)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
|
||||
class TestTedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||
IE = TEDIE
|
||||
|
||||
def test_no_writesubtitles(self):
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['fr']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 28)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
|
||||
def test_automatic_captions(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslang'] = ['en']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(len(subtitles.keys()) == 0)
|
||||
|
||||
def test_multiple_langs(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['es', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,14 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Various small unit tests
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import xml.etree.ElementTree
|
||||
# coding: utf-8
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
# Various small unit tests
|
||||
import xml.etree.ElementTree
|
||||
|
||||
#from youtube_dl.utils import htmlentity_transform
|
||||
from youtube_dl.utils import (
|
||||
@ -20,6 +21,11 @@ from youtube_dl.utils import (
|
||||
unified_strdate,
|
||||
find_xpath_attr,
|
||||
get_meta_content,
|
||||
xpath_with_ns,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
shell_quote,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
@ -141,5 +147,35 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(get_meta('description'), u'foo & bar')
|
||||
self.assertEqual(get_meta('author'), 'Plato')
|
||||
|
||||
def test_xpath_with_ns(self):
|
||||
testxml = u'''<root xmlns:media="http://example.com/">
|
||||
<media:song>
|
||||
<media:author>The Author</media:author>
|
||||
<url>http://server.com/download.mp3</url>
|
||||
</media:song>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
|
||||
self.assertTrue(find('media:song') is not None)
|
||||
self.assertEqual(find('media:song/media:author').text, u'The Author')
|
||||
self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
|
||||
|
||||
def test_smuggle_url(self):
|
||||
data = {u"ö": u"ö", u"abc": [3]}
|
||||
url = 'https://foo.bar/baz?x=y#a'
|
||||
smug_url = smuggle_url(url, data)
|
||||
unsmug_url, unsmug_data = unsmuggle_url(smug_url)
|
||||
self.assertEqual(url, unsmug_url)
|
||||
self.assertEqual(data, unsmug_data)
|
||||
|
||||
res_url, res_data = unsmuggle_url(url)
|
||||
self.assertEqual(res_url, url)
|
||||
self.assertEqual(res_data, None)
|
||||
|
||||
def test_shell_quote(self):
|
||||
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
|
||||
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
79
test/test_write_annotations.py
Normal file
79
test/test_write_annotations.py
Normal file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import get_params, try_rm
|
||||
|
||||
|
||||
import io
|
||||
|
||||
import xml.etree.ElementTree
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
import youtube_dl.extractor
|
||||
|
||||
|
||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
||||
self.to_stderr = self.to_screen
|
||||
|
||||
params = get_params({
|
||||
'writeannotations': True,
|
||||
'skip_download': True,
|
||||
'writeinfojson': False,
|
||||
'format': 'flv',
|
||||
})
|
||||
|
||||
|
||||
|
||||
TEST_ID = 'gr51aVj-mLg'
|
||||
ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml'
|
||||
EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label']
|
||||
|
||||
class TestAnnotations(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Clear old files
|
||||
self.tearDown()
|
||||
|
||||
|
||||
def test_info_json(self):
|
||||
expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text.
|
||||
ie = youtube_dl.extractor.YoutubeIE()
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_info_extractor(ie)
|
||||
ydl.download([TEST_ID])
|
||||
self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
|
||||
annoxml = None
|
||||
with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
|
||||
annoxml = xml.etree.ElementTree.parse(annof)
|
||||
self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
|
||||
root = annoxml.getroot()
|
||||
self.assertEqual(root.tag, 'document')
|
||||
annotationsTag = root.find('annotations')
|
||||
self.assertEqual(annotationsTag.tag, 'annotations')
|
||||
annotations = annotationsTag.findall('annotation')
|
||||
|
||||
#Not all the annotations have TEXT children and the annotations are returned unsorted.
|
||||
for a in annotations:
|
||||
self.assertEqual(a.tag, 'annotation')
|
||||
if a.get('type') == 'text':
|
||||
textTag = a.find('TEXT')
|
||||
text = textTag.text
|
||||
self.assertTrue(text in expected) #assertIn only added in python 2.7
|
||||
#remove the first occurance, there could be more than one annotation with the same text
|
||||
expected.remove(text)
|
||||
#We should have seen (and removed) all the expected annotation texts.
|
||||
self.assertEqual(len(expected), 0, 'Not all expected annotations were found.')
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
try_rm(ANNOTATIONS_FILE)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -1,40 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
import json
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Allow direct execution
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from test.helper import get_params
|
||||
|
||||
|
||||
import io
|
||||
import json
|
||||
|
||||
import youtube_dl.YoutubeDL
|
||||
import youtube_dl.extractor
|
||||
from youtube_dl.utils import *
|
||||
|
||||
PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")
|
||||
|
||||
# General configuration (from __init__, not very elegant...)
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
proxy_handler = compat_urllib_request.ProxyHandler()
|
||||
opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
compat_urllib_request.install_opener(opener)
|
||||
|
||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
||||
self.to_stderr = self.to_screen
|
||||
|
||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||
params = json.load(pf)
|
||||
params['writeinfojson'] = True
|
||||
params['skip_download'] = True
|
||||
params['writedescription'] = True
|
||||
params = get_params({
|
||||
'writeinfojson': True,
|
||||
'skip_download': True,
|
||||
'writedescription': True,
|
||||
})
|
||||
|
||||
|
||||
TEST_ID = 'BaW_jenozKc'
|
||||
INFO_JSON_FILE = TEST_ID + '.mp4.info.json'
|
||||
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
||||
|
||||
@ -42,6 +38,7 @@ This is a test video for youtube-dl.
|
||||
|
||||
For more information, contact phihag@phihag.de .'''
|
||||
|
||||
|
||||
class TestInfoJSON(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Clear old files
|
||||
|
@ -1,27 +1,32 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import json
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE, YoutubeShowIE
|
||||
from youtube_dl.utils import *
|
||||
from test.helper import FakeYDL
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
YoutubeUserIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
)
|
||||
|
||||
from helper import FakeYDL
|
||||
|
||||
class TestYoutubeLists(unittest.TestCase):
|
||||
def assertIsPlaylist(self,info):
|
||||
def assertIsPlaylist(self, info):
|
||||
"""Make sure the info has '_type' set to 'playlist'"""
|
||||
self.assertEqual(info['_type'], 'playlist')
|
||||
|
||||
def test_youtube_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'ytdl test PL')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
@ -38,13 +43,13 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_issue_673(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('PLBB231211A4F62143')[0]
|
||||
result = ie.extract('PLBB231211A4F62143')
|
||||
self.assertTrue(len(result['entries']) > 25)
|
||||
|
||||
def test_youtube_playlist_long(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertTrue(len(result['entries']) >= 799)
|
||||
|
||||
@ -52,7 +57,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
#651
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||
@ -60,7 +65,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_youtube_playlist_empty(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(len(result['entries']), 0)
|
||||
|
||||
@ -68,7 +73,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = result['entries']
|
||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
@ -78,29 +83,29 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeChannelIE(dl)
|
||||
#test paginated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
|
||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
||||
self.assertTrue(len(result['entries']) > 90)
|
||||
#test autogenerated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
|
||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
self.assertTrue(len(result['entries']) >= 18)
|
||||
|
||||
def test_youtube_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeUserIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
|
||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||
self.assertTrue(len(result['entries']) >= 320)
|
||||
|
||||
def test_youtube_safe_search(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
|
||||
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
|
||||
self.assertEqual(len(result['entries']), 2)
|
||||
|
||||
def test_youtube_show(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeShowIE(dl)
|
||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||
self.assertTrue(len(result) >= 4)
|
||||
self.assertTrue(len(result) >= 3)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -1,14 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import io
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.utils import compat_str, compat_urlretrieve
|
||||
|
@ -1,84 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
import hashlib
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.utils import *
|
||||
from helper import FakeYDL
|
||||
|
||||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
class TestYoutubeSubtitles(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.DL = FakeYDL()
|
||||
self.url = 'QRS8MkLhQmM'
|
||||
def getInfoDict(self):
|
||||
IE = YoutubeIE(self.DL)
|
||||
info_dict = IE.extract(self.url)
|
||||
return info_dict
|
||||
def getSubtitles(self):
|
||||
info_dict = self.getInfoDict()
|
||||
return info_dict[0]['subtitles']
|
||||
def test_youtube_no_writesubtitles(self):
|
||||
self.DL.params['writesubtitles'] = False
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(subtitles, None)
|
||||
def test_youtube_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
def test_youtube_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
def test_youtube_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'sbv'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'vtt'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
|
||||
def test_youtube_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
|
||||
self.DL.params['listsubtitles'] = True
|
||||
info_dict = self.getInfoDict()
|
||||
self.assertEqual(info_dict, None)
|
||||
def test_youtube_automatic_captions(self):
|
||||
self.url = '8YoUxe5ncPo'
|
||||
self.DL.params['writeautomaticsub'] = True
|
||||
self.DL.params['subtitleslangs'] = ['it']
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertTrue(subtitles['it'] is not None)
|
||||
def test_youtube_nosubtitles(self):
|
||||
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||
self.url = 'sAjKT8FhjI8'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles), 0)
|
||||
def test_youtube_multiple_langs(self):
|
||||
self.url = 'QRS8MkLhQmM'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
langs = ['it', 'fr', 'de']
|
||||
self.DL.params['subtitleslangs'] = langs
|
||||
subtitles = self.getSubtitles()
|
||||
for lang in langs:
|
||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
7
tox.ini
7
tox.ini
@ -1,5 +1,8 @@
|
||||
[tox]
|
||||
envlist = py26,py27,py33
|
||||
[testenv]
|
||||
deps = nose
|
||||
commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test
|
||||
deps =
|
||||
nose
|
||||
coverage
|
||||
commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
|
||||
# test.test_download:TestDownload.test_NowVideo
|
||||
|
@ -1,15 +1,19 @@
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .utils import *
|
||||
from .utils import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
ContentTooShortError,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
sanitize_open,
|
||||
timeconvert,
|
||||
)
|
||||
|
||||
|
||||
class FileDownloader(object):
|
||||
@ -49,20 +53,6 @@ class FileDownloader(object):
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
|
||||
@staticmethod
|
||||
def format_bytes(bytes):
|
||||
if bytes is None:
|
||||
return 'N/A'
|
||||
if type(bytes) is str:
|
||||
bytes = float(bytes)
|
||||
if bytes == 0.0:
|
||||
exponent = 0
|
||||
else:
|
||||
exponent = int(math.log(bytes, 1024.0))
|
||||
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
|
||||
converted = float(bytes) / float(1024 ** exponent)
|
||||
return '%.2f%s' % (converted, suffix)
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
@ -113,7 +103,7 @@ class FileDownloader(object):
|
||||
def format_speed(speed):
|
||||
if speed is None:
|
||||
return '%10s' % '---b/s'
|
||||
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
|
||||
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
@ -144,16 +134,8 @@ class FileDownloader(object):
|
||||
def to_stderr(self, message):
|
||||
self.ydl.to_screen(message)
|
||||
|
||||
def to_cons_title(self, message):
|
||||
"""Set console/terminal window title to message."""
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
elif 'TERM' in os.environ:
|
||||
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
|
||||
def to_console_title(self, message):
|
||||
self.ydl.to_console_title(message)
|
||||
|
||||
def trouble(self, *args, **kargs):
|
||||
self.ydl.trouble(*args, **kargs)
|
||||
@ -194,7 +176,7 @@ class FileDownloader(object):
|
||||
if old_filename == new_filename:
|
||||
return
|
||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||
except (IOError, OSError) as err:
|
||||
except (IOError, OSError):
|
||||
self.report_error(u'unable to rename file')
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
@ -227,8 +209,14 @@ class FileDownloader(object):
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
|
||||
eta_str = self.format_eta(eta)
|
||||
percent_str = self.format_percent(percent)
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
eta_str = 'Unknown ETA'
|
||||
if percent is not None:
|
||||
percent_str = self.format_percent(percent)
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(u'[download] %s of %s at %s ETA %s' %
|
||||
@ -236,7 +224,7 @@ class FileDownloader(object):
|
||||
else:
|
||||
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
|
||||
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
|
||||
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
||||
self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
|
||||
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
@ -251,7 +239,7 @@ class FileDownloader(object):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||
except (UnicodeEncodeError) as err:
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen(u'[download] The file has already been downloaded')
|
||||
|
||||
def report_unable_to_resume(self):
|
||||
@ -267,9 +255,65 @@ class FileDownloader(object):
|
||||
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
|
||||
(clear_line, data_len_str, self.format_seconds(tot_time)))
|
||||
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = u''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = u'~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen(u'[rtmpdump] '+line)
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
return proc.returncode
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
# Check for rtmpdump first
|
||||
try:
|
||||
@ -277,12 +321,11 @@ class FileDownloader(object):
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
|
||||
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
@ -291,31 +334,53 @@ class FileDownloader(object):
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
|
||||
if self.params.get('verbose', False):
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
|
||||
retval = subprocess.call(args)
|
||||
while retval == 2 or retval == 1:
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
|
||||
while (retval == 2 or retval == 1) and not test:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == 1:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = 0
|
||||
break
|
||||
if retval == 0:
|
||||
if retval == 0 or (test and retval == 2):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
@ -363,15 +428,20 @@ class FileDownloader(object):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
|
||||
# Check for ffmpeg first
|
||||
try:
|
||||
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||
|
||||
retval = subprocess.call(args)
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
try:
|
||||
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
break
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
@ -408,7 +478,8 @@ class FileDownloader(object):
|
||||
info_dict.get('player_url', None),
|
||||
info_dict.get('page_url', None),
|
||||
info_dict.get('play_path', None),
|
||||
info_dict.get('tc_url', None))
|
||||
info_dict.get('tc_url', None),
|
||||
info_dict.get('rtmp_live', False))
|
||||
|
||||
# Attempt to download using mplayer
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
@ -512,7 +583,7 @@ class FileDownloader(object):
|
||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = self.format_bytes(data_len)
|
||||
data_len_str = format_bytes(data_len)
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
@ -547,12 +618,11 @@ class FileDownloader(object):
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
|
||||
eta = None
|
||||
eta = percent = None
|
||||
else:
|
||||
percent = self.calc_percent(byte_counter, data_len)
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
|
@ -3,7 +3,14 @@ import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .utils import *
|
||||
|
||||
from .utils import (
|
||||
compat_subprocess_get_DEVNULL,
|
||||
encodeFilename,
|
||||
PostProcessingError,
|
||||
shell_quote,
|
||||
subtitles_filename,
|
||||
)
|
||||
|
||||
|
||||
class PostProcessor(object):
|
||||
@ -82,6 +89,8 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
+ opts +
|
||||
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
|
||||
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout,stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
@ -177,7 +186,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
extension = self._preferredcodec
|
||||
more_opts = []
|
||||
if self._preferredquality is not None:
|
||||
if int(self._preferredquality) < 10:
|
||||
# The opus codec doesn't support the -aq option
|
||||
if int(self._preferredquality) < 10 and extension != 'opus':
|
||||
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
|
||||
else:
|
||||
more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
|
||||
@ -467,3 +477,35 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
return True, information
|
||||
|
||||
|
||||
class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
def run(self, info):
|
||||
metadata = {}
|
||||
if info.get('title') is not None:
|
||||
metadata['title'] = info['title']
|
||||
if info.get('upload_date') is not None:
|
||||
metadata['date'] = info['upload_date']
|
||||
if info.get('uploader') is not None:
|
||||
metadata['artist'] = info['uploader']
|
||||
elif info.get('uploader_id') is not None:
|
||||
metadata['artist'] = info['uploader_id']
|
||||
|
||||
if not metadata:
|
||||
self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
|
||||
return True, info
|
||||
|
||||
filename = info['filepath']
|
||||
ext = os.path.splitext(filename)[1][1:]
|
||||
temp_filename = filename + u'.temp'
|
||||
|
||||
options = ['-c', 'copy']
|
||||
for (name, value) in metadata.items():
|
||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||
options.extend(['-f', ext])
|
||||
|
||||
self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
return True, info
|
||||
|
@ -3,18 +3,55 @@
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import errno
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from .utils import *
|
||||
if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .utils import (
|
||||
compat_cookiejar,
|
||||
compat_http_client,
|
||||
compat_print,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
determine_ext,
|
||||
DownloadError,
|
||||
encodeFilename,
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
PostProcessingError,
|
||||
platform_name,
|
||||
preferredencoding,
|
||||
SameFileError,
|
||||
sanitize_filename,
|
||||
subtitles_filename,
|
||||
takewhile_inclusive,
|
||||
UnavailableVideoError,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .FileDownloader import FileDownloader
|
||||
from .version import __version__
|
||||
|
||||
|
||||
class YoutubeDL(object):
|
||||
@ -56,6 +93,7 @@ class YoutubeDL(object):
|
||||
forcethumbnail: Force printing thumbnail URL.
|
||||
forcedescription: Force printing description.
|
||||
forcefilename: Force printing final filename.
|
||||
forcejson: Force printing info_dict as JSON.
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code.
|
||||
format_limit: Highest quality format to try.
|
||||
@ -67,9 +105,11 @@ class YoutubeDL(object):
|
||||
playlistend: Playlist item to end at.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logger: Log messages to a logging.Logger instance.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
writedescription: Write the video description to a .description file
|
||||
writeinfojson: Write the video description to a .info.json file
|
||||
writeannotations: Write the video annotations to a .annotations.xml file
|
||||
writethumbnail: Write the thumbnail image to a file
|
||||
writesubtitles: Write the video subtitles to a file
|
||||
writeautomaticsub: Write the automatic subtitles to a file
|
||||
@ -84,7 +124,15 @@ class YoutubeDL(object):
|
||||
cachedir: Location of the cache files in the filesystem.
|
||||
None to disable filesystem cache.
|
||||
noplaylist: Download single video instead of a playlist if in doubt.
|
||||
|
||||
age_limit: An integer representing the user's age in years.
|
||||
Unsuitable videos for the given age are skipped.
|
||||
download_archive: File name of a file where all downloads are recorded.
|
||||
Videos already present in the file are not downloaded
|
||||
again.
|
||||
cookiefile: File name where cookies should be read from and dumped to.
|
||||
nocheckcertificate:Do not verify SSL certificates
|
||||
proxy: URL of the proxy server to use
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
@ -98,7 +146,7 @@ class YoutubeDL(object):
|
||||
_num_downloads = None
|
||||
_screen_file = None
|
||||
|
||||
def __init__(self, params):
|
||||
def __init__(self, params={}):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self._ies = []
|
||||
self._ies_instances = {}
|
||||
@ -113,7 +161,7 @@ class YoutubeDL(object):
|
||||
and not params['restrictfilenames']):
|
||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||
self.report_warning(
|
||||
u'Assuming --restrict-filenames isnce file system encoding '
|
||||
u'Assuming --restrict-filenames since file system encoding '
|
||||
u'cannot encode all charactes. '
|
||||
u'Set the LC_ALL environment variable to fix this.')
|
||||
params['restrictfilenames'] = True
|
||||
@ -121,9 +169,11 @@ class YoutubeDL(object):
|
||||
self.params = params
|
||||
self.fd = FileDownloader(self, self.params)
|
||||
|
||||
if '%(stitle)s' in self.params['outtmpl']:
|
||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
def add_info_extractor(self, ie):
|
||||
"""Add an InfoExtractor object to the end of the list."""
|
||||
self._ies.append(ie)
|
||||
@ -156,7 +206,9 @@ class YoutubeDL(object):
|
||||
|
||||
def to_screen(self, message, skip_eol=False):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
if not self.params.get('quiet', False):
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].debug(message)
|
||||
elif not self.params.get('quiet', False):
|
||||
terminator = [u'\n', u''][skip_eol]
|
||||
output = message + terminator
|
||||
write_string(output, self._screen_file)
|
||||
@ -164,14 +216,47 @@ class YoutubeDL(object):
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
assert type(message) == type(u'')
|
||||
output = message + u'\n'
|
||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||
output = output.encode(preferredencoding())
|
||||
sys.stderr.write(output)
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].error(message)
|
||||
else:
|
||||
output = message + u'\n'
|
||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||
output = output.encode(preferredencoding())
|
||||
sys.stderr.write(output)
|
||||
|
||||
def fixed_template(self):
|
||||
"""Checks if the output template is fixed."""
|
||||
return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
elif 'TERM' in os.environ:
|
||||
write_string(u'\033]0;%s\007' % message, self._screen_file)
|
||||
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
# Save the title on stack
|
||||
write_string(u'\033[22;0t', self._screen_file)
|
||||
|
||||
def restore_console_title(self):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
if 'TERM' in os.environ:
|
||||
# Restore the title from stack
|
||||
write_string(u'\033[23;0t', self._screen_file)
|
||||
|
||||
def __enter__(self):
|
||||
self.save_console_title()
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.restore_console_title()
|
||||
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save()
|
||||
|
||||
def trouble(self, message=None, tb=None):
|
||||
"""Determine action to take when a download problem appears.
|
||||
@ -209,10 +294,10 @@ class YoutubeDL(object):
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
_msg_header=u'\033[0;33mWARNING:\033[0m'
|
||||
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header=u'WARNING:'
|
||||
warning_message=u'%s %s' % (_msg_header,message)
|
||||
_msg_header = u'WARNING:'
|
||||
warning_message = u'%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
|
||||
def report_error(self, message, tb=None):
|
||||
@ -227,19 +312,6 @@ class YoutubeDL(object):
|
||||
error_message = u'%s %s' % (_msg_header, message)
|
||||
self.trouble(error_message, tb)
|
||||
|
||||
def slow_down(self, start_time, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit', None)
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
now = time.time()
|
||||
elapsed = now - start_time
|
||||
if elapsed <= 0.0:
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
||||
|
||||
def report_writedescription(self, descfn):
|
||||
""" Report that the description file is being written """
|
||||
self.to_screen(u'[info] Writing video description to: ' + descfn)
|
||||
@ -252,11 +324,15 @@ class YoutubeDL(object):
|
||||
""" Report that the metadata file has been written """
|
||||
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
|
||||
|
||||
def report_writeannotations(self, annofn):
|
||||
""" Report that the annotations file has been written. """
|
||||
self.to_screen(u'[info] Writing video annotations to: ' + annofn)
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||
except (UnicodeEncodeError) as err:
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen(u'[download] The file has already been downloaded')
|
||||
|
||||
def increment_downloads(self):
|
||||
@ -274,16 +350,18 @@ class YoutubeDL(object):
|
||||
autonumber_size = 5
|
||||
autonumber_templ = u'%0' + str(autonumber_size) + u'd'
|
||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||
if template_dict['playlist_index'] is not None:
|
||||
if template_dict.get('playlist_index') is not None:
|
||||
template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
|
||||
|
||||
sanitize = lambda k,v: sanitize_filename(
|
||||
sanitize = lambda k, v: sanitize_filename(
|
||||
u'NA' if v is None else compat_str(v),
|
||||
restricted=self.params.get('restrictfilenames'),
|
||||
is_id=(k==u'id'))
|
||||
template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
|
||||
is_id=(k == u'id'))
|
||||
template_dict = dict((k, sanitize(k, v))
|
||||
for k, v in template_dict.items())
|
||||
|
||||
filename = self.params['outtmpl'] % template_dict
|
||||
tmpl = os.path.expanduser(self.params['outtmpl'])
|
||||
filename = tmpl % template_dict
|
||||
return filename
|
||||
except KeyError as err:
|
||||
self.report_error(u'Erroneous output template')
|
||||
@ -295,29 +373,44 @@ class YoutubeDL(object):
|
||||
def _match_entry(self, info_dict):
|
||||
""" Returns None iff the file should be downloaded """
|
||||
|
||||
title = info_dict['title']
|
||||
matchtitle = self.params.get('matchtitle', False)
|
||||
if matchtitle:
|
||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||
rejecttitle = self.params.get('rejecttitle', False)
|
||||
if rejecttitle:
|
||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
if 'title' in info_dict:
|
||||
# This can happen when we're just evaluating the playlist
|
||||
title = info_dict['title']
|
||||
matchtitle = self.params.get('matchtitle', False)
|
||||
if matchtitle:
|
||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||
rejecttitle = self.params.get('rejecttitle', False)
|
||||
if rejecttitle:
|
||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
date = info_dict.get('upload_date', None)
|
||||
if date is not None:
|
||||
dateRange = self.params.get('daterange', DateRange())
|
||||
if date not in dateRange:
|
||||
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||
age_limit = self.params.get('age_limit')
|
||||
if age_limit is not None:
|
||||
if age_limit < info_dict.get('age_limit', 0):
|
||||
return u'Skipping "' + title + '" because it is age restricted'
|
||||
if self.in_download_archive(info_dict):
|
||||
return (u'%s has already been recorded in archive'
|
||||
% info_dict.get('title', info_dict.get('id', u'video')))
|
||||
return None
|
||||
|
||||
|
||||
@staticmethod
|
||||
def add_extra_info(info_dict, extra_info):
|
||||
'''Set the keys from extra_info in info dict if they are missing'''
|
||||
for key, value in extra_info.items():
|
||||
info_dict.setdefault(key, value)
|
||||
|
||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
||||
'''
|
||||
Returns a list with a dictionary for each video we find.
|
||||
If 'download', also downloads the videos.
|
||||
extra_info is a dict containing the extra values to add to each result
|
||||
'''
|
||||
|
||||
|
||||
if ie_key:
|
||||
ies = [self.get_info_extractor(ie_key)]
|
||||
else:
|
||||
@ -337,17 +430,17 @@ class YoutubeDL(object):
|
||||
break
|
||||
if isinstance(ie_result, list):
|
||||
# Backwards compatibility: old IE result format
|
||||
for result in ie_result:
|
||||
result.update(extra_info)
|
||||
ie_result = {
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
else:
|
||||
ie_result.update(extra_info)
|
||||
if 'extractor' not in ie_result:
|
||||
ie_result['extractor'] = ie.IE_NAME
|
||||
return self.process_ie_result(ie_result, download=download)
|
||||
self.add_extra_info(ie_result,
|
||||
{
|
||||
'extractor': ie.IE_NAME,
|
||||
'webpage_url': url,
|
||||
'extractor_key': ie.ie_key(),
|
||||
})
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
except ExtractorError as de: # An error we somewhat expected
|
||||
self.report_error(compat_str(de), de.format_traceback())
|
||||
break
|
||||
@ -359,7 +452,7 @@ class YoutubeDL(object):
|
||||
raise
|
||||
else:
|
||||
self.report_error(u'no suitable InfoExtractor: %s' % url)
|
||||
|
||||
|
||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||
"""
|
||||
Take the result of the ie(may be modified) and resolve all unresolved
|
||||
@ -371,14 +464,8 @@ class YoutubeDL(object):
|
||||
|
||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
||||
if result_type == 'video':
|
||||
ie_result.update(extra_info)
|
||||
if 'playlist' not in ie_result:
|
||||
# It isn't part of a playlist
|
||||
ie_result['playlist'] = None
|
||||
ie_result['playlist_index'] = None
|
||||
if download:
|
||||
self.process_info(ie_result)
|
||||
return ie_result
|
||||
self.add_extra_info(ie_result, extra_info)
|
||||
return self.process_video_result(ie_result, download=download)
|
||||
elif result_type == 'url':
|
||||
# We have to add extra_info to the results because it may be
|
||||
# contained in a playlist
|
||||
@ -387,9 +474,10 @@ class YoutubeDL(object):
|
||||
ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info)
|
||||
elif result_type == 'playlist':
|
||||
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
@ -407,17 +495,21 @@ class YoutubeDL(object):
|
||||
self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
|
||||
for i,entry in enumerate(entries,1):
|
||||
self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
|
||||
extra = {
|
||||
'playlist': playlist,
|
||||
'playlist_index': i + playliststart,
|
||||
}
|
||||
if not 'extractor' in entry:
|
||||
# We set the extractor, if it's an url it will be set then to
|
||||
# the new extractor, but if it's already a video we must make
|
||||
# sure it's present: see issue #877
|
||||
entry['extractor'] = ie_result['extractor']
|
||||
'playlist': playlist,
|
||||
'playlist_index': i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry)
|
||||
if reason is not None:
|
||||
self.to_screen(u'[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.process_ie_result(entry,
|
||||
download=download,
|
||||
extra_info=extra)
|
||||
@ -426,16 +518,122 @@ class YoutubeDL(object):
|
||||
return ie_result
|
||||
elif result_type == 'compat_list':
|
||||
def _fixup(r):
|
||||
r.setdefault('extractor', ie_result['extractor'])
|
||||
self.add_extra_info(r,
|
||||
{
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
})
|
||||
return r
|
||||
ie_result['entries'] = [
|
||||
self.process_ie_result(_fixup(r), download=download)
|
||||
self.process_ie_result(_fixup(r), download, extra_info)
|
||||
for r in ie_result['entries']
|
||||
]
|
||||
return ie_result
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
def select_format(self, format_spec, available_formats):
|
||||
if format_spec == 'best' or format_spec is None:
|
||||
return available_formats[-1]
|
||||
elif format_spec == 'worst':
|
||||
return available_formats[0]
|
||||
else:
|
||||
extensions = [u'mp4', u'flv', u'webm', u'3gp']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
filter_f = lambda f: f['format_id'] == format_spec
|
||||
matches = list(filter(filter_f, available_formats))
|
||||
if matches:
|
||||
return matches[-1]
|
||||
return None
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
|
||||
if 'playlist' not in info_dict:
|
||||
# It isn't part of a playlist
|
||||
info_dict['playlist'] = None
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in [u'youtube', u'Youku']:
|
||||
if download:
|
||||
self.process_info(info_dict)
|
||||
return info_dict
|
||||
|
||||
# We now pick which formats have to be downloaded
|
||||
if info_dict.get('formats') is None:
|
||||
# There's only one format available
|
||||
formats = [info_dict]
|
||||
else:
|
||||
formats = info_dict['formats']
|
||||
|
||||
# We check that all the formats have the format and format_id fields
|
||||
for (i, format) in enumerate(formats):
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
if format.get('format') is None:
|
||||
format['format'] = u'{id} - {res}{note}'.format(
|
||||
id=format['format_id'],
|
||||
res=self.format_resolution(format),
|
||||
note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
|
||||
)
|
||||
# Automatically determine file extension if missing
|
||||
if 'ext' not in format:
|
||||
format['ext'] = determine_ext(format['url'])
|
||||
|
||||
if self.params.get('listformats', None):
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
|
||||
format_limit = self.params.get('format_limit', None)
|
||||
if format_limit:
|
||||
formats = list(takewhile_inclusive(
|
||||
lambda f: f['format_id'] != format_limit, formats
|
||||
))
|
||||
if self.params.get('prefer_free_formats'):
|
||||
def _free_formats_key(f):
|
||||
try:
|
||||
ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
|
||||
except ValueError:
|
||||
ext_ord = -1
|
||||
# We only compare the extension if they have the same height and width
|
||||
return (f.get('height'), f.get('width'), ext_ord)
|
||||
formats = sorted(formats, key=_free_formats_key)
|
||||
|
||||
req_format = self.params.get('format', 'best')
|
||||
if req_format is None:
|
||||
req_format = 'best'
|
||||
formats_to_download = []
|
||||
# The -1 is for supporting YoutubeIE
|
||||
if req_format in ('-1', 'all'):
|
||||
formats_to_download = formats
|
||||
else:
|
||||
# We can accept formats requestd in the format: 34/5/best, we pick
|
||||
# the first that is available, starting from left
|
||||
req_formats = req_format.split('/')
|
||||
for rf in req_formats:
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if selected_format is not None:
|
||||
formats_to_download = [selected_format]
|
||||
break
|
||||
if not formats_to_download:
|
||||
raise ExtractorError(u'requested format not available',
|
||||
expected=True)
|
||||
|
||||
if download:
|
||||
if len(formats_to_download) > 1:
|
||||
self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
|
||||
for format in formats_to_download:
|
||||
new_info = dict(info_dict)
|
||||
new_info.update(format)
|
||||
self.process_info(new_info)
|
||||
# We update the info dict with the best quality format (backwards compatibility)
|
||||
info_dict.update(formats_to_download[-1])
|
||||
return info_dict
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result."""
|
||||
|
||||
@ -467,20 +665,22 @@ class YoutubeDL(object):
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
compat_print(info_dict['title'])
|
||||
compat_print(info_dict['fulltitle'])
|
||||
if self.params.get('forceid', False):
|
||||
compat_print(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
# For RTMP URLs, also include the playpath
|
||||
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
|
||||
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
|
||||
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||
compat_print(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and 'description' in info_dict:
|
||||
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||
compat_print(info_dict['description'])
|
||||
if self.params.get('forcefilename', False) and filename is not None:
|
||||
compat_print(filename)
|
||||
if self.params.get('forceformat', False):
|
||||
compat_print(info_dict['format'])
|
||||
if self.params.get('forcejson', False):
|
||||
compat_print(json.dumps(info_dict))
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
@ -509,14 +709,26 @@ class YoutubeDL(object):
|
||||
self.report_error(u'Cannot write description file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('writeannotations', False):
|
||||
try:
|
||||
annofn = filename + u'.annotations.xml'
|
||||
self.report_writeannotations(annofn)
|
||||
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
||||
annofile.write(info_dict['annotations'])
|
||||
except (KeyError, TypeError):
|
||||
self.report_warning(u'There are no annotations to write.')
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write annotations file: ' + annofn)
|
||||
return
|
||||
|
||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||
self.params.get('writeautomaticsub')])
|
||||
|
||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
||||
# subtitles download errors are already managed as troubles in relevant IE
|
||||
# that way it will silently go on when used with unsupporting IE
|
||||
subtitles = info_dict['subtitles']
|
||||
sub_format = self.params.get('subtitlesformat')
|
||||
sub_format = self.params.get('subtitlesformat', 'srt')
|
||||
for sub_lang in subtitles.keys():
|
||||
sub = subtitles[sub_lang]
|
||||
if sub is None:
|
||||
@ -531,10 +743,10 @@ class YoutubeDL(object):
|
||||
return
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = filename + u'.info.json'
|
||||
infofn = os.path.splitext(filename)[0] + u'.info.json'
|
||||
self.report_writeinfojson(infofn)
|
||||
try:
|
||||
json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
|
||||
json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
|
||||
write_json_file(json_info_dict, encodeFilename(infofn))
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'Cannot write metadata to JSON file ' + infofn)
|
||||
@ -578,15 +790,19 @@ class YoutubeDL(object):
|
||||
self.report_error(u'postprocessing: %s' % str(err))
|
||||
return
|
||||
|
||||
self.record_download_archive(info_dict)
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
if len(url_list) > 1 and self.fixed_template():
|
||||
if (len(url_list) > 1 and
|
||||
'%' not in self.params['outtmpl']
|
||||
and self.params.get('max_downloads') != 1):
|
||||
raise SameFileError(self.params['outtmpl'])
|
||||
|
||||
for url in url_list:
|
||||
try:
|
||||
#It also downloads the videos
|
||||
videos = self.extract_info(url)
|
||||
self.extract_info(url)
|
||||
except UnavailableVideoError:
|
||||
self.report_error(u'unable to download video')
|
||||
except MaxDownloadsReached:
|
||||
@ -602,7 +818,7 @@ class YoutubeDL(object):
|
||||
keep_video = None
|
||||
for pp in self._pps:
|
||||
try:
|
||||
keep_video_wish,new_info = pp.run(info)
|
||||
keep_video_wish, new_info = pp.run(info)
|
||||
if keep_video_wish is not None:
|
||||
if keep_video_wish:
|
||||
keep_video = keep_video_wish
|
||||
@ -617,3 +833,177 @@ class YoutubeDL(object):
|
||||
os.remove(encodeFilename(filename))
|
||||
except (IOError, OSError):
|
||||
self.report_warning(u'Unable to remove downloaded video file')
|
||||
|
||||
def _make_archive_id(self, info_dict):
|
||||
# Future-proof against any change in case
|
||||
# and backwards compatibility with prior versions
|
||||
extractor = info_dict.get('extractor_key')
|
||||
if extractor is None:
|
||||
if 'id' in info_dict:
|
||||
extractor = info_dict.get('ie_key') # key in a playlist
|
||||
if extractor is None:
|
||||
return None # Incomplete video information
|
||||
return extractor.lower() + u' ' + info_dict['id']
|
||||
|
||||
def in_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return False
|
||||
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
if vid_id is None:
|
||||
return False # Incomplete video information
|
||||
|
||||
try:
|
||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||
for line in archive_file:
|
||||
if line.strip() == vid_id:
|
||||
return True
|
||||
except IOError as ioe:
|
||||
if ioe.errno != errno.ENOENT:
|
||||
raise
|
||||
return False
|
||||
|
||||
def record_download_archive(self, info_dict):
|
||||
fn = self.params.get('download_archive')
|
||||
if fn is None:
|
||||
return
|
||||
vid_id = self._make_archive_id(info_dict)
|
||||
assert vid_id
|
||||
with locked_file(fn, 'a', encoding='utf-8') as archive_file:
|
||||
archive_file.write(vid_id + u'\n')
|
||||
|
||||
@staticmethod
|
||||
def format_resolution(format, default='unknown'):
|
||||
if format.get('vcodec') == 'none':
|
||||
return 'audio only'
|
||||
if format.get('_resolution') is not None:
|
||||
return format['_resolution']
|
||||
if format.get('height') is not None:
|
||||
if format.get('width') is not None:
|
||||
res = u'%sx%s' % (format['width'], format['height'])
|
||||
else:
|
||||
res = u'%sp' % format['height']
|
||||
else:
|
||||
res = default
|
||||
return res
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def format_note(fdict):
|
||||
res = u''
|
||||
if fdict.get('format_note') is not None:
|
||||
res += fdict['format_note'] + u' '
|
||||
if (fdict.get('vcodec') is not None and
|
||||
fdict.get('vcodec') != 'none'):
|
||||
res += u'%-5s' % fdict['vcodec']
|
||||
elif fdict.get('vbr') is not None:
|
||||
res += u'video'
|
||||
if fdict.get('vbr') is not None:
|
||||
res += u'@%4dk' % fdict['vbr']
|
||||
if fdict.get('acodec') is not None:
|
||||
if res:
|
||||
res += u', '
|
||||
res += u'%-5s' % fdict['acodec']
|
||||
elif fdict.get('abr') is not None:
|
||||
if res:
|
||||
res += u', '
|
||||
res += 'audio'
|
||||
if fdict.get('abr') is not None:
|
||||
res += u'@%3dk' % fdict['abr']
|
||||
if fdict.get('filesize') is not None:
|
||||
if res:
|
||||
res += u', '
|
||||
res += format_bytes(fdict['filesize'])
|
||||
return res
|
||||
|
||||
def line(format, idlen=20):
|
||||
return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
|
||||
format['format_id'],
|
||||
format['ext'],
|
||||
self.format_resolution(format),
|
||||
format_note(format),
|
||||
))
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
idlen = max(len(u'format code'),
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [line(f, idlen) for f in formats]
|
||||
if len(formats) > 1:
|
||||
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
||||
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
||||
|
||||
header_line = line({
|
||||
'format_id': u'format code', 'ext': u'extension',
|
||||
'_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
|
||||
self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
|
||||
(info_dict['id'], header_line, u"\n".join(formats_s)))
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
return self._opener.open(req)
|
||||
|
||||
def print_debug_header(self):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||
try:
|
||||
sp = subprocess.Popen(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
out, err = sp.communicate()
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||
except:
|
||||
try:
|
||||
sys.exc_clear()
|
||||
except:
|
||||
pass
|
||||
write_string(u'[debug] Python version %s - %s' %
|
||||
(platform.python_version(), platform_name()) + u'\n')
|
||||
|
||||
proxy_map = {}
|
||||
for handler in self._opener.handlers:
|
||||
if hasattr(handler, 'proxies'):
|
||||
proxy_map.update(handler.proxies)
|
||||
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
|
||||
|
||||
def _setup_opener(self, timeout=20):
|
||||
opts_cookiefile = self.params.get('cookiefile')
|
||||
opts_proxy = self.params.get('proxy')
|
||||
|
||||
if opts_cookiefile is None:
|
||||
self.cookiejar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||
opts_cookiefile)
|
||||
if os.access(opts_cookiefile, os.R_OK):
|
||||
self.cookiejar.load()
|
||||
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(
|
||||
self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
if opts_proxy == '':
|
||||
proxies = {}
|
||||
else:
|
||||
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||
else:
|
||||
proxies = compat_urllib_request.getproxies()
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
https_handler = make_HTTPS_handler(
|
||||
self.params.get('nocheckcertificate', False))
|
||||
opener = compat_urllib_request.build_opener(
|
||||
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders = []
|
||||
self._opener = opener
|
||||
|
||||
# TODO remove this global modification
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(timeout)
|
||||
|
@ -31,6 +31,11 @@ __authors__ = (
|
||||
'Huarong Huo',
|
||||
'Ismael Mejía',
|
||||
'Steffan \'Ruirize\' James',
|
||||
'Andras Elso',
|
||||
'Jelle van der Waa',
|
||||
'Marcin Cieślak',
|
||||
'Anton Larionov',
|
||||
'Takuya Tsuchida',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@ -42,20 +47,37 @@ import os
|
||||
import random
|
||||
import re
|
||||
import shlex
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import warnings
|
||||
import platform
|
||||
|
||||
|
||||
from .utils import *
|
||||
from .utils import (
|
||||
compat_print,
|
||||
DateRange,
|
||||
decodeOption,
|
||||
determine_ext,
|
||||
DownloadError,
|
||||
get_cachedir,
|
||||
MaxDownloadsReached,
|
||||
preferredencoding,
|
||||
SameFileError,
|
||||
std_headers,
|
||||
write_string,
|
||||
)
|
||||
from .update import update_self
|
||||
from .version import __version__
|
||||
from .FileDownloader import *
|
||||
from .FileDownloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors
|
||||
from .version import __version__
|
||||
from .YoutubeDL import YoutubeDL
|
||||
from .PostProcessor import *
|
||||
from .PostProcessor import (
|
||||
FFmpegMetadataPP,
|
||||
FFmpegVideoConvertor,
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegEmbedSubtitlePP,
|
||||
)
|
||||
|
||||
|
||||
def parseOpts(overrideArguments=None):
|
||||
def _readOptions(filename_bytes):
|
||||
@ -105,7 +127,7 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
def _hide_login_info(opts):
|
||||
opts = list(opts)
|
||||
for private_opt in ['-p', '--password', '-u', '--username']:
|
||||
for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
|
||||
try:
|
||||
i = opts.index(private_opt)
|
||||
opts[i+1] = '<PRIVATE>'
|
||||
@ -151,6 +173,9 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
|
||||
general.add_option('-i', '--ignore-errors',
|
||||
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
|
||||
general.add_option('--abort-on-error',
|
||||
action='store_false', dest='ignoreerrors',
|
||||
help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
|
||||
general.add_option('--dump-user-agent',
|
||||
action='store_true', dest='dump_user_agent',
|
||||
help='display the current browser identification', default=False)
|
||||
@ -168,7 +193,7 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(),
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
|
||||
general.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
@ -181,13 +206,21 @@ def parseOpts(overrideArguments=None):
|
||||
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
|
||||
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
|
||||
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
|
||||
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
|
||||
selection.add_option('--max-downloads', metavar='NUMBER',
|
||||
dest='max_downloads', type=int, default=None,
|
||||
help='Abort after downloading NUMBER files')
|
||||
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
||||
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
||||
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
|
||||
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
|
||||
help='download only videos suitable for the given age',
|
||||
default=None, type=int)
|
||||
selection.add_option('--download-archive', metavar='FILE',
|
||||
dest='download_archive',
|
||||
help='Download only videos not present in the archive file. Record all downloaded videos in it.')
|
||||
|
||||
|
||||
authentication.add_option('-u', '--username',
|
||||
@ -201,7 +234,7 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
|
||||
video_format.add_option('-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT',
|
||||
action='store', dest='format', metavar='FORMAT', default='best',
|
||||
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||
video_format.add_option('--all-formats',
|
||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||
@ -233,11 +266,11 @@ def parseOpts(overrideArguments=None):
|
||||
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
|
||||
|
||||
downloader.add_option('-r', '--rate-limit',
|
||||
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
|
||||
dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)')
|
||||
downloader.add_option('-R', '--retries',
|
||||
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
|
||||
downloader.add_option('--buffer-size',
|
||||
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
|
||||
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024")
|
||||
downloader.add_option('--no-resize-buffer',
|
||||
action='store_true', dest='noresizebuffer',
|
||||
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
|
||||
@ -267,6 +300,9 @@ def parseOpts(overrideArguments=None):
|
||||
verbosity.add_option('--get-format',
|
||||
action='store_true', dest='getformat',
|
||||
help='simulate, quiet but print output format', default=False)
|
||||
verbosity.add_option('-j', '--dump-json',
|
||||
action='store_true', dest='dumpjson',
|
||||
help='simulate, quiet but print JSON information', default=False)
|
||||
verbosity.add_option('--newline',
|
||||
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
||||
verbosity.add_option('--no-progress',
|
||||
@ -279,6 +315,9 @@ def parseOpts(overrideArguments=None):
|
||||
verbosity.add_option('--dump-intermediate-pages',
|
||||
action='store_true', dest='dump_intermediate_pages', default=False,
|
||||
help='print downloaded pages to debug problems(very verbose)')
|
||||
verbosity.add_option('--write-pages',
|
||||
action='store_true', dest='write_pages', default=False,
|
||||
help='Write downloaded pages to files in the current directory')
|
||||
verbosity.add_option('--youtube-print-sig-code',
|
||||
action='store_true', dest='youtube_print_sig_code', default=False,
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
@ -298,7 +337,10 @@ def parseOpts(overrideArguments=None):
|
||||
help=('output filename template. Use %(title)s to get the title, '
|
||||
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
|
||||
'%(autonumber)s to get an automatically incremented number, '
|
||||
'%(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), '
|
||||
'%(ext)s for the filename extension, '
|
||||
'%(format)s for the format description (like "22 - 1280x720" or "HD"),'
|
||||
'%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"),'
|
||||
'%(upload_date)s for the upload date (YYYYMMDD), '
|
||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||
'%(id)s for the video id , %(playlist)s for the playlist the video is in, '
|
||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||
@ -306,7 +348,7 @@ def parseOpts(overrideArguments=None):
|
||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||
filesystem.add_option('--autonumber-size',
|
||||
dest='autonumber_size', metavar='NUMBER',
|
||||
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
|
||||
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
|
||||
filesystem.add_option('--restrict-filenames',
|
||||
action='store_true', dest='restrictfilenames',
|
||||
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
||||
@ -315,7 +357,7 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('-w', '--no-overwrites',
|
||||
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
||||
filesystem.add_option('-c', '--continue',
|
||||
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
|
||||
action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
|
||||
filesystem.add_option('--no-continue',
|
||||
action='store_false', dest='continue_dl',
|
||||
help='do not resume partially downloaded files (restart from beginning)')
|
||||
@ -332,6 +374,9 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--write-info-json',
|
||||
action='store_true', dest='writeinfojson',
|
||||
help='write video metadata to a .info.json file', default=False)
|
||||
filesystem.add_option('--write-annotations',
|
||||
action='store_true', dest='writeannotations',
|
||||
help='write video annotations to a .annotation file', default=False)
|
||||
filesystem.add_option('--write-thumbnail',
|
||||
action='store_true', dest='writethumbnail',
|
||||
help='write thumbnail image to disk', default=False)
|
||||
@ -351,6 +396,8 @@ def parseOpts(overrideArguments=None):
|
||||
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
|
||||
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
|
||||
help='embed subtitles in the video (only for mp4 videos)')
|
||||
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
|
||||
help='add metadata to the files')
|
||||
|
||||
|
||||
parser.add_option_group(general)
|
||||
@ -397,19 +444,6 @@ def _real_main(argv=None):
|
||||
|
||||
parser, opts, args = parseOpts(argv)
|
||||
|
||||
# Open appropriate CookieJar
|
||||
if opts.cookiefile is None:
|
||||
jar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
try:
|
||||
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
|
||||
if os.access(opts.cookiefile, os.R_OK):
|
||||
jar.load()
|
||||
except (IOError, OSError) as err:
|
||||
if opts.verbose:
|
||||
traceback.print_exc()
|
||||
write_string(u'ERROR: unable to open cookie file\n')
|
||||
sys.exit(101)
|
||||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
std_headers['User-Agent'] = opts.user_agent
|
||||
@ -441,28 +475,6 @@ def _real_main(argv=None):
|
||||
all_urls = batchurls + args
|
||||
all_urls = [url.strip() for url in all_urls]
|
||||
|
||||
# General configuration
|
||||
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
|
||||
if opts.proxy is not None:
|
||||
if opts.proxy == '':
|
||||
proxies = {}
|
||||
else:
|
||||
proxies = {'http': opts.proxy, 'https': opts.proxy}
|
||||
else:
|
||||
proxies = compat_urllib_request.getproxies()
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
|
||||
https_handler = make_HTTPS_handler(opts)
|
||||
opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders =[]
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
|
||||
|
||||
extractors = gen_extractors()
|
||||
|
||||
if opts.list_extractors:
|
||||
@ -478,6 +490,8 @@ def _real_main(argv=None):
|
||||
if not ie._WORKING:
|
||||
continue
|
||||
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
|
||||
if desc is False:
|
||||
continue
|
||||
if hasattr(ie, 'SEARCH_KEY'):
|
||||
_SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise')
|
||||
_COUNTS = (u'', u'5', u'10', u'all')
|
||||
@ -515,7 +529,7 @@ def _real_main(argv=None):
|
||||
if opts.retries is not None:
|
||||
try:
|
||||
opts.retries = int(opts.retries)
|
||||
except (TypeError, ValueError) as err:
|
||||
except (TypeError, ValueError):
|
||||
parser.error(u'invalid retry count specified')
|
||||
if opts.buffersize is not None:
|
||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||
@ -526,13 +540,13 @@ def _real_main(argv=None):
|
||||
opts.playliststart = int(opts.playliststart)
|
||||
if opts.playliststart <= 0:
|
||||
raise ValueError(u'Playlist start must be positive')
|
||||
except (TypeError, ValueError) as err:
|
||||
except (TypeError, ValueError):
|
||||
parser.error(u'invalid playlist start number specified')
|
||||
try:
|
||||
opts.playlistend = int(opts.playlistend)
|
||||
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
|
||||
raise ValueError(u'Playlist end must be greater than playlist start')
|
||||
except (TypeError, ValueError) as err:
|
||||
except (TypeError, ValueError):
|
||||
parser.error(u'invalid playlist end number specified')
|
||||
if opts.extractaudio:
|
||||
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
|
||||
@ -571,13 +585,12 @@ def _real_main(argv=None):
|
||||
u' file! Use "%%(ext)s" instead of %r' %
|
||||
determine_ext(outtmpl, u''))
|
||||
|
||||
# YoutubeDL
|
||||
ydl = YoutubeDL({
|
||||
ydl_opts = {
|
||||
'usenetrc': opts.usenetrc,
|
||||
'username': opts.username,
|
||||
'password': opts.password,
|
||||
'videopassword': opts.videopassword,
|
||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
||||
'forceurl': opts.geturl,
|
||||
'forcetitle': opts.gettitle,
|
||||
'forceid': opts.getid,
|
||||
@ -585,8 +598,9 @@ def _real_main(argv=None):
|
||||
'forcedescription': opts.getdescription,
|
||||
'forcefilename': opts.getfilename,
|
||||
'forceformat': opts.getformat,
|
||||
'forcejson': opts.dumpjson,
|
||||
'simulate': opts.simulate,
|
||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
|
||||
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.dumpjson),
|
||||
'format': opts.format,
|
||||
'format_limit': opts.format_limit,
|
||||
'listformats': opts.listformats,
|
||||
@ -610,6 +624,7 @@ def _real_main(argv=None):
|
||||
'nopart': opts.nopart,
|
||||
'updatetime': opts.updatetime,
|
||||
'writedescription': opts.writedescription,
|
||||
'writeannotations': opts.writeannotations,
|
||||
'writeinfojson': opts.writeinfojson,
|
||||
'writethumbnail': opts.writethumbnail,
|
||||
'writesubtitles': opts.writesubtitles,
|
||||
@ -624,6 +639,7 @@ def _real_main(argv=None):
|
||||
'prefer_free_formats': opts.prefer_free_formats,
|
||||
'verbose': opts.verbose,
|
||||
'dump_intermediate_pages': opts.dump_intermediate_pages,
|
||||
'write_pages': opts.write_pages,
|
||||
'test': opts.test,
|
||||
'keepvideo': opts.keepvideo,
|
||||
'min_filesize': opts.min_filesize,
|
||||
@ -631,63 +647,47 @@ def _real_main(argv=None):
|
||||
'daterange': date,
|
||||
'cachedir': opts.cachedir,
|
||||
'youtube_print_sig_code': opts.youtube_print_sig_code,
|
||||
})
|
||||
'age_limit': opts.age_limit,
|
||||
'download_archive': opts.download_archive,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.print_debug_header()
|
||||
ydl.add_default_info_extractors()
|
||||
|
||||
# PostProcessors
|
||||
# Add the metadata pp first, the other pps will copy it
|
||||
if opts.addmetadata:
|
||||
ydl.add_post_processor(FFmpegMetadataPP())
|
||||
if opts.extractaudio:
|
||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||
if opts.recodevideo:
|
||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||
if opts.embedsubtitles:
|
||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
||||
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose)
|
||||
|
||||
# Maybe do nothing
|
||||
if len(all_urls) < 1:
|
||||
if not opts.update_self:
|
||||
parser.error(u'you must provide at least one URL')
|
||||
else:
|
||||
sys.exit()
|
||||
|
||||
if opts.verbose:
|
||||
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
|
||||
try:
|
||||
sp = subprocess.Popen(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
out, err = sp.communicate()
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
write_string(u'[debug] Git HEAD: ' + out + u'\n')
|
||||
except:
|
||||
try:
|
||||
sys.exc_clear()
|
||||
except:
|
||||
pass
|
||||
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
|
||||
write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
|
||||
|
||||
ydl.add_default_info_extractors()
|
||||
|
||||
# PostProcessors
|
||||
if opts.extractaudio:
|
||||
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
|
||||
if opts.recodevideo:
|
||||
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
|
||||
if opts.embedsubtitles:
|
||||
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
|
||||
|
||||
# Update version
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose)
|
||||
|
||||
# Maybe do nothing
|
||||
if len(all_urls) < 1:
|
||||
if not opts.update_self:
|
||||
parser.error(u'you must provide at least one URL')
|
||||
else:
|
||||
sys.exit()
|
||||
|
||||
try:
|
||||
retcode = ydl.download(all_urls)
|
||||
except MaxDownloadsReached:
|
||||
ydl.to_screen(u'--max-download limit reached, aborting.')
|
||||
retcode = 101
|
||||
|
||||
# Dump cookie jar if requested
|
||||
if opts.cookiefile is not None:
|
||||
try:
|
||||
jar.save()
|
||||
except (IOError, OSError) as err:
|
||||
sys.exit(u'ERROR: unable to save cookie jar')
|
||||
retcode = ydl.download(all_urls)
|
||||
except MaxDownloadsReached:
|
||||
ydl.to_screen(u'--max-download limit reached, aborting.')
|
||||
retcode = 101
|
||||
|
||||
sys.exit(retcode)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
try:
|
||||
_real_main(argv)
|
||||
|
@ -1,10 +1,17 @@
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .anitube import AnitubeIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import ARDIE
|
||||
from .arte import ArteTvIE
|
||||
from .arte import (
|
||||
ArteTvIE,
|
||||
ArteTVPlus7IE,
|
||||
ArteTVCreativeIE,
|
||||
ArteTVFutureIE,
|
||||
)
|
||||
from .auengine import AUEngineIE
|
||||
from .bandcamp import BandcampIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .breakcom import BreakIE
|
||||
@ -12,12 +19,15 @@ from .brightcove import BrightcoveIE
|
||||
from .c56 import C56IE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cnn import CNNIE
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .cspan import CSpanIE
|
||||
from .d8 import D8IE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
@ -31,9 +41,12 @@ from .defense import DefenseGouvFrIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .escapist import EscapistIE
|
||||
from .exfm import ExfmIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fktv import (
|
||||
FKTVIE,
|
||||
FKTVPosteckeIE,
|
||||
@ -47,6 +60,7 @@ from .francetv import (
|
||||
)
|
||||
from .freesound import FreesoundIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .generic import GenericIE
|
||||
@ -60,56 +74,77 @@ from .ign import IGNIE, OneUPIE
|
||||
from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .kankan import KankanIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE
|
||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mit import TechTVMITIE, MITIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mtv import MTVIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspace import MySpaceIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import NBCNewsIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pornhub import PornHubIE
|
||||
from .pornotube import PornotubeIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rutube import RutubeIE
|
||||
from .sina import SinaIE
|
||||
from .slashdot import SlashdotIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .sohu import SohuIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
||||
from .southparkstudios import SouthParkStudiosIE
|
||||
from .southparkstudios import (
|
||||
SouthParkStudiosIE,
|
||||
SouthparkDeIE,
|
||||
)
|
||||
from .space import SpaceIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .statigram import StatigramIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tf1 import TF1IE
|
||||
from .thisav import ThisAVIE
|
||||
from .toutv import TouTvIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .tvp import TvpIE
|
||||
from .unistra import UnistraIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .vbox7 import Vbox7IE
|
||||
@ -118,16 +153,22 @@ from .veoh import VeohIE
|
||||
from .vevo import VevoIE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .vimeo import VimeoIE, VimeoChannelIE
|
||||
from .vine import VineIE
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .wat import WatIE
|
||||
from .websurg import WeBSurgIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeIE
|
||||
from .yahoo import YahooIE, YahooSearchIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
@ -136,13 +177,16 @@ from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
@ -17,8 +17,8 @@ class AddAnimeIE(InfoExtractor):
|
||||
IE_NAME = u'AddAnime'
|
||||
_TEST = {
|
||||
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
u'file': u'24MR3YO5SAS9.flv',
|
||||
u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
|
||||
u'file': u'24MR3YO5SAS9.mp4',
|
||||
u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
|
||||
u'info_dict': {
|
||||
u"description": u"One Piece 606",
|
||||
u"title": u"One Piece 606"
|
||||
@ -31,7 +31,8 @@ class AddAnimeIE(InfoExtractor):
|
||||
video_id = mobj.group('video_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError):
|
||||
if not isinstance(ee.cause, compat_HTTPError) or \
|
||||
ee.cause.code != 503:
|
||||
raise
|
||||
|
||||
redir_webpage = ee.cause.read().decode('utf-8')
|
||||
@ -60,16 +61,26 @@ class AddAnimeIE(InfoExtractor):
|
||||
note=u'Confirming after redirect')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
|
||||
webpage, u'video file URL')
|
||||
formats = []
|
||||
for format_id in ('normal', 'hq'):
|
||||
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
||||
video_url = self._search_regex(rex, webpage, u'video file URLx',
|
||||
fatal=False)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError(u'Cannot find any video format!')
|
||||
video_title = self._og_search_title(webpage)
|
||||
video_description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'description': video_description
|
||||
}
|
||||
|
55
youtube_dl/extractor/anitube.py
Normal file
55
youtube_dl/extractor/anitube.py
Normal file
@ -0,0 +1,55 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AnitubeIE(InfoExtractor):
|
||||
IE_NAME = u'anitube.se'
|
||||
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.anitube.se/video/36621',
|
||||
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
|
||||
u'file': u'36621.mp4',
|
||||
u'info_dict': {
|
||||
u'id': u'36621',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Recorder to Randoseru 01',
|
||||
},
|
||||
u'skip': u'Blocked in the US',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||
webpage, u'key')
|
||||
|
||||
webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||
key)
|
||||
config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
|
||||
|
||||
video_title = config_xml.find('title').text
|
||||
|
||||
formats = []
|
||||
video_url = config_xml.find('file')
|
||||
if video_url is not None:
|
||||
formats.append({
|
||||
'format_id': 'sd',
|
||||
'url': video_url.text,
|
||||
})
|
||||
video_url = config_xml.find('filehd')
|
||||
if video_url is not None:
|
||||
formats.append({
|
||||
'format_id': 'hd',
|
||||
'url': video_url.text,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
@ -7,15 +8,16 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
unified_strdate,
|
||||
determine_ext,
|
||||
get_element_by_id,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
# There are different sources of video in arte.tv, the extraction process
|
||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
"""
|
||||
There are two sources of video in arte.tv: videos.arte.tv and
|
||||
www.arte.tv/guide, the extraction process is different for each one.
|
||||
The videos expire in 7 days, so we can't add tests.
|
||||
"""
|
||||
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
|
||||
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
||||
@ -24,7 +26,7 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
|
||||
return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
|
||||
|
||||
# TODO implement Live Stream
|
||||
# from ..utils import compat_urllib_parse
|
||||
@ -55,14 +57,6 @@ class ArteTvIE(InfoExtractor):
|
||||
# video_url = u'%s/%s' % (info.get('url'), info.get('path'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._EMISSION_URL, url)
|
||||
if mobj is not None:
|
||||
lang = mobj.group('lang')
|
||||
# This is not a real id, it can be for example AJT for the news
|
||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||
video_id = mobj.group('id')
|
||||
return self._extract_emission(url, video_id, lang)
|
||||
|
||||
mobj = re.match(self._VIDEOS_URL, url)
|
||||
if mobj is not None:
|
||||
id = mobj.group('id')
|
||||
@ -75,54 +69,11 @@ class ArteTvIE(InfoExtractor):
|
||||
lang = mobj.group('lang')
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, video_id) is not None:
|
||||
if re.search(self._LIVE_URL, url) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
# return
|
||||
|
||||
def _extract_emission(self, url, video_id, lang):
|
||||
"""Extract from www.arte.tv/guide"""
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
|
||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
||||
self.report_extraction(video_id)
|
||||
info = json.loads(json_info)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
info_dict = {'id': player_info['VID'],
|
||||
'title': player_info['VTI'],
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
|
||||
'thumbnail': player_info['programImage'],
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
||||
formats = player_info['VSR'].values()
|
||||
def _match_lang(f):
|
||||
# Return true if that format is in the language of the url
|
||||
if lang == 'fr':
|
||||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
formats = filter(_match_lang, formats)
|
||||
# We order the formats by quality
|
||||
formats = sorted(formats, key=lambda f: int(f['height']))
|
||||
# Prefer videos without subtitles in the same language
|
||||
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
|
||||
# Pick the best quality
|
||||
format_info = formats[-1]
|
||||
if format_info['mediaType'] == u'rtmp':
|
||||
info_dict['url'] = format_info['streamer']
|
||||
info_dict['play_path'] = 'mp4:' + format_info['url']
|
||||
else:
|
||||
info_dict['url'] = format_info['url']
|
||||
|
||||
return info_dict
|
||||
|
||||
def _extract_video(self, url, video_id, lang):
|
||||
"""Extract from videos.arte.tv"""
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
@ -164,7 +115,7 @@ class ArteTvIE(InfoExtractor):
|
||||
event_doc = config_doc.find('event')
|
||||
url_node = event_doc.find('video').find('urlHd')
|
||||
if url_node is None:
|
||||
url_node = video_doc.find('urlSd')
|
||||
url_node = event_doc.find('urlSd')
|
||||
|
||||
return {'id': video_id,
|
||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
||||
@ -172,3 +123,140 @@ class ArteTvIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
IE_NAME = u'arte.tv:+7'
|
||||
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url)
|
||||
lang = mobj.group('lang')
|
||||
# This is not a real id, it can be for example AJT for the news
|
||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||
video_id = mobj.group('id')
|
||||
return video_id, lang
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
|
||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
||||
self.report_extraction(video_id)
|
||||
info = json.loads(json_info)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
info_dict = {
|
||||
'id': player_info['VID'],
|
||||
'title': player_info['VTI'],
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
}
|
||||
|
||||
all_formats = player_info['VSR'].values()
|
||||
# Some formats use the m3u8 protocol
|
||||
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
|
||||
def _match_lang(f):
|
||||
if f.get('versionCode') is None:
|
||||
return True
|
||||
# Return true if that format is in the language of the url
|
||||
if lang == 'fr':
|
||||
l = 'F'
|
||||
elif lang == 'de':
|
||||
l = 'A'
|
||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
formats = filter(_match_lang, all_formats)
|
||||
formats = list(formats) # in python3 filter returns an iterator
|
||||
if not formats:
|
||||
# Some videos are only available in the 'Originalversion'
|
||||
# they aren't tagged as being in French or German
|
||||
if all(f['versionCode'] == 'VO' for f in all_formats):
|
||||
formats = all_formats
|
||||
else:
|
||||
raise ExtractorError(u'The formats list is empty')
|
||||
|
||||
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
||||
def sort_key(f):
|
||||
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
||||
else:
|
||||
def sort_key(f):
|
||||
return (
|
||||
# Sort first by quality
|
||||
int(f.get('height',-1)),
|
||||
int(f.get('bitrate',-1)),
|
||||
# The original version with subtitles has lower relevance
|
||||
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
||||
# The version with sourds/mal subtitles has also lower relevance
|
||||
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
||||
)
|
||||
formats = sorted(formats, key=sort_key)
|
||||
def _format(format_info):
|
||||
quality = ''
|
||||
height = format_info.get('height')
|
||||
if height is not None:
|
||||
quality = compat_str(height)
|
||||
bitrate = format_info.get('bitrate')
|
||||
if bitrate is not None:
|
||||
quality += '-%d' % bitrate
|
||||
if format_info.get('versionCode') is not None:
|
||||
format_id = u'%s-%s' % (quality, format_info['versionCode'])
|
||||
else:
|
||||
format_id = quality
|
||||
info = {
|
||||
'format_id': format_id,
|
||||
'format_note': format_info.get('versionLibelle'),
|
||||
'width': format_info.get('width'),
|
||||
'height': height,
|
||||
}
|
||||
if format_info['mediaType'] == u'rtmp':
|
||||
info['url'] = format_info['streamer']
|
||||
info['play_path'] = 'mp4:' + format_info['url']
|
||||
info['ext'] = 'flv'
|
||||
else:
|
||||
info['url'] = format_info['url']
|
||||
info['ext'] = determine_ext(info['url'])
|
||||
return info
|
||||
info_dict['formats'] = [_format(f) for f in formats]
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = u'arte.tv:creative'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||
u'file': u'050489-002.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
IE_NAME = u'arte.tv:future'
|
||||
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
|
||||
u'file': u'050940-003.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Les champignons au secours de la planète',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
anchor_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, anchor_id)
|
||||
row = get_element_by_id(anchor_id, webpage)
|
||||
return self._extract_from_webpage(row, anchor_id, lang)
|
||||
|
@ -1,10 +1,10 @@
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class AUEngineIE(InfoExtractor):
|
||||
@ -25,22 +25,25 @@ class AUEngineIE(InfoExtractor):
|
||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
||||
webpage, u'title')
|
||||
title = title.strip()
|
||||
links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage)
|
||||
links = [compat_urllib_parse.unquote(l) for l in links]
|
||||
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
||||
links = map(compat_urllib_parse.unquote, links)
|
||||
|
||||
thumbnail = None
|
||||
video_url = None
|
||||
for link in links:
|
||||
root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path)
|
||||
if pathext == '.png':
|
||||
if link.endswith('.png'):
|
||||
thumbnail = link
|
||||
elif pathext == '.mp4':
|
||||
url = link
|
||||
ext = pathext
|
||||
elif '/videos/' in link:
|
||||
video_url = link
|
||||
if not video_url:
|
||||
raise ExtractorError(u'Could not find video URL')
|
||||
ext = u'.' + determine_ext(video_url)
|
||||
if ext == title[-len(ext):]:
|
||||
title = title[:-len(ext)]
|
||||
ext = ext[1:]
|
||||
return [{
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}]
|
||||
}
|
||||
|
86
youtube_dl/extractor/bambuser.py
Normal file
86
youtube_dl/extractor/bambuser.py
Normal file
@ -0,0 +1,86 @@
|
||||
import re
|
||||
import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class BambuserIE(InfoExtractor):
|
||||
IE_NAME = u'bambuser'
|
||||
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
|
||||
_API_KEY = '005f64509e19a868399060af746a00aa'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://bambuser.com/v/4050584',
|
||||
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
|
||||
#u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
|
||||
u'info_dict': {
|
||||
u'id': u'4050584',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Education engineering days - lightning talks',
|
||||
u'duration': 3741,
|
||||
u'uploader': u'pixelversity',
|
||||
u'uploader_id': u'344706',
|
||||
},
|
||||
u'params': {
|
||||
# It doesn't respect the 'Range' header, it would download the whole video
|
||||
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
|
||||
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)['result']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['url'],
|
||||
'thumbnail': info.get('preview'),
|
||||
'duration': int(info['length']),
|
||||
'view_count': int(info['views_total']),
|
||||
'uploader': info['username'],
|
||||
'uploader_id': info['uid'],
|
||||
}
|
||||
|
||||
|
||||
class BambuserChannelIE(InfoExtractor):
|
||||
IE_NAME = u'bambuser:channel'
|
||||
_VALID_URL = r'http://bambuser.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
||||
# The maximum number we can get with each request
|
||||
_STEP = 50
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
urls = []
|
||||
last_id = ''
|
||||
for i in itertools.count(1):
|
||||
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
|
||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||
'&method=broadcast&format=json&vid_older_than={last}'
|
||||
).format(user=user, count=self._STEP, last=last_id)
|
||||
req = compat_urllib_request.Request(req_url)
|
||||
# Without setting this header, we wouldn't get any result
|
||||
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
|
||||
info_json = self._download_webpage(req, user,
|
||||
u'Downloading page %d' % i)
|
||||
results = json.loads(info_json)['result']
|
||||
if len(results) == 0:
|
||||
break
|
||||
last_id = results[-1]['vid']
|
||||
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': user,
|
||||
'entries': urls,
|
||||
}
|
@ -3,13 +3,16 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
IE_NAME = u'Bandcamp'
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
u'file': u'1812978515.mp3',
|
||||
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c',
|
||||
@ -17,7 +20,7 @@ class BandcampIE(InfoExtractor):
|
||||
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
||||
},
|
||||
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -26,6 +29,23 @@ class BandcampIE(InfoExtractor):
|
||||
# We get the link to the free download page
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if m_download is None:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)
|
||||
|
||||
for d in data:
|
||||
formats = [{
|
||||
'format_id': 'format_id',
|
||||
'url': format_url,
|
||||
'ext': format_id.partition('-')[0]
|
||||
} for format_id, format_url in sorted(d['file'].items())]
|
||||
return {
|
||||
'id': compat_str(d['id']),
|
||||
'title': d['title'],
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
raise ExtractorError(u'No free songs found')
|
||||
|
||||
download_link = m_download.group(1)
|
||||
@ -61,3 +81,49 @@ class BandcampIE(InfoExtractor):
|
||||
}
|
||||
|
||||
return [track_info]
|
||||
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = u'Bandcamp:album'
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
u'playlist': [
|
||||
{
|
||||
u'file': u'1353101989.mp3',
|
||||
u'md5': u'39bc1eded3476e927c724321ddf116cf',
|
||||
u'info_dict': {
|
||||
u'title': u'Intro',
|
||||
}
|
||||
},
|
||||
{
|
||||
u'file': u'38097443.mp3',
|
||||
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
|
||||
u'info_dict': {
|
||||
u'title': u'Kero One - Keep It Alive (Blazo remix)',
|
||||
}
|
||||
},
|
||||
],
|
||||
u'params': {
|
||||
u'playlistend': 2
|
||||
},
|
||||
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError(u'The page doesn\'t contain any track')
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@ -9,10 +9,13 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
find_xpath_attr,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class BrightcoveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
|
||||
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
|
||||
@ -23,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
|
||||
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
|
||||
u'file': u'2371591881001.mp4',
|
||||
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
|
||||
u'md5': u'8eccab865181d29ec2958f32a6a754f5',
|
||||
u'note': u'Test Brightcove downloads and detection in GenericIE',
|
||||
u'info_dict': {
|
||||
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
||||
@ -41,6 +44,17 @@ class BrightcoveIE(InfoExtractor):
|
||||
u'uploader': u'Oracle',
|
||||
},
|
||||
},
|
||||
{
|
||||
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
||||
u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
|
||||
u'info_dict': {
|
||||
u'id': u'2750934548001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'This Bracelet Acts as a Personal Thermostat',
|
||||
u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||
u'uploader': u'Mashable',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
@ -53,37 +67,69 @@ class BrightcoveIE(InfoExtractor):
|
||||
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
|
||||
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
|
||||
lambda m: m.group(1) + '/>', object_str)
|
||||
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
||||
object_str = object_str.replace(u'<--', u'<!--')
|
||||
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str)
|
||||
assert u'BrightcoveExperience' in object_doc.attrib['class']
|
||||
params = {'flashID': object_doc.attrib['id'],
|
||||
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
||||
}
|
||||
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
|
||||
def find_param(name):
|
||||
node = find_xpath_attr(object_doc, './param', 'name', name)
|
||||
if node is not None:
|
||||
return node.attrib['value']
|
||||
return None
|
||||
playerKey = find_param('playerKey')
|
||||
# Not all pages define this value
|
||||
if playerKey is not None:
|
||||
params['playerKey'] = playerKey.attrib['value']
|
||||
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
|
||||
params['playerKey'] = playerKey
|
||||
# The three fields hold the id of the video
|
||||
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
||||
if videoPlayer is not None:
|
||||
params['@videoPlayer'] = videoPlayer.attrib['value']
|
||||
params['@videoPlayer'] = videoPlayer
|
||||
linkBase = find_param('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
params['linkBaseURL'] = linkBase
|
||||
data = compat_urllib_parse.urlencode(params)
|
||||
return cls._FEDERATED_URL_TEMPLATE % data
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
"""Try to extract the brightcove url from the wepbage, returns None
|
||||
if it can't be found
|
||||
"""
|
||||
m_brightcove = re.search(
|
||||
r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
|
||||
webpage, re.DOTALL)
|
||||
if m_brightcove is not None:
|
||||
return cls._build_brighcove_url(m_brightcove.group())
|
||||
else:
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Change the 'videoId' and others field to '@videoPlayer'
|
||||
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
|
||||
# Change bckey (used by bcove.me urls) to playerKey
|
||||
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query_str = mobj.group('query')
|
||||
query = compat_urlparse.parse_qs(query_str)
|
||||
|
||||
videoPlayer = query.get('@videoPlayer')
|
||||
if videoPlayer:
|
||||
return self._get_video_info(videoPlayer[0], query_str)
|
||||
return self._get_video_info(videoPlayer[0], query_str, query)
|
||||
else:
|
||||
player_key = query['playerKey']
|
||||
return self._get_playlist_info(player_key[0])
|
||||
|
||||
def _get_video_info(self, video_id, query):
|
||||
request_url = self._FEDERATED_URL_TEMPLATE % query
|
||||
webpage = self._download_webpage(request_url, video_id)
|
||||
def _get_video_info(self, video_id, query_str, query):
|
||||
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||
req = compat_urllib_request.Request(request_url)
|
||||
linkBase = query.get('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
req.add_header('Referer', linkBase[0])
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
||||
@ -96,7 +142,10 @@ class BrightcoveIE(InfoExtractor):
|
||||
playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
|
||||
player_key, u'Downloading playlist information')
|
||||
|
||||
playlist_info = json.loads(playlist_info)['videoList']
|
||||
json_data = json.loads(playlist_info)
|
||||
if 'videoList' not in json_data:
|
||||
raise ExtractorError(u'Empty playlist')
|
||||
playlist_info = json_data['videoList']
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
||||
@ -104,7 +153,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
info = {
|
||||
'id': video_info['id'],
|
||||
'id': compat_str(video_info['id']),
|
||||
'title': video_info['displayName'],
|
||||
'description': video_info.get('shortDescription'),
|
||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||
@ -114,15 +163,14 @@ class BrightcoveIE(InfoExtractor):
|
||||
renditions = video_info.get('renditions')
|
||||
if renditions:
|
||||
renditions = sorted(renditions, key=lambda r: r['size'])
|
||||
best_format = renditions[-1]
|
||||
info.update({
|
||||
'url': best_format['defaultURL'],
|
||||
'ext': 'mp4',
|
||||
})
|
||||
info['formats'] = [{
|
||||
'url': rend['defaultURL'],
|
||||
'height': rend.get('frameHeight'),
|
||||
'width': rend.get('frameWidth'),
|
||||
} for rend in renditions]
|
||||
elif video_info.get('FLVFullLengthURL') is not None:
|
||||
info.update({
|
||||
'url': video_info['FLVFullLengthURL'],
|
||||
'ext': 'flv',
|
||||
})
|
||||
else:
|
||||
raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
|
||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
class Canalc2IE(InfoExtractor):
|
||||
IE_NAME = 'canalc2.tv'
|
||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
|
||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||
@ -18,7 +18,9 @@ class Canalc2IE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = re.match(self._VALID_URL, url).group(1)
|
||||
video_id = re.match(self._VALID_URL, url).group('id')
|
||||
# We need to set the voir field for getting the file name
|
||||
url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
file_name = self._search_regex(
|
||||
r"so\.addVariable\('file','(.*?)'\);",
|
||||
|
@ -5,6 +5,7 @@ import xml.etree.ElementTree
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||
@ -25,7 +26,7 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = mobj.groupdict().get('id')
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, mobj.group('path'))
|
||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
||||
|
84
youtube_dl/extractor/cinemassacre.py
Normal file
84
youtube_dl/extractor/cinemassacre.py
Normal file
@ -0,0 +1,84 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?'
|
||||
_TESTS = [{
|
||||
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
u'file': u'19911.flv',
|
||||
u'md5': u'f9bb7ede54d1229c9846e197b4737e06',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20121110',
|
||||
u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
u'file': u'521be8ef82b16.flv',
|
||||
u'md5': u'9509ee44dcaa7c1068604817c19a9e50',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20131002',
|
||||
u'title': u'The Mummy’s Hand (1940)',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage_url = u'http://' + mobj.group('url')
|
||||
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
|
||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
||||
if not mobj:
|
||||
raise ExtractorError(u'Can\'t extract embed url and video id')
|
||||
playerdata_url = mobj.group(u'embed_url')
|
||||
video_id = mobj.group(u'video_id')
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
|
||||
webpage, u'title')
|
||||
video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, u'description', flags=re.DOTALL, fatal=False)
|
||||
if len(video_description) == 0:
|
||||
video_description = None
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url')
|
||||
|
||||
sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file')
|
||||
hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file')
|
||||
video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False)
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': url,
|
||||
'play_path': 'mp4:' + sd_file,
|
||||
'rtmp_live': True, # workaround
|
||||
'ext': 'flv',
|
||||
'format': 'sd',
|
||||
'format_id': 'sd',
|
||||
},
|
||||
{
|
||||
'url': url,
|
||||
'play_path': 'mp4:' + hd_file,
|
||||
'rtmp_live': True, # workaround
|
||||
'ext': 'flv',
|
||||
'format': 'hd',
|
||||
'format_id': 'hd',
|
||||
},
|
||||
]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'description': video_description,
|
||||
'upload_date': video_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
}
|
53
youtube_dl/extractor/clipfish.py
Normal file
53
youtube_dl/extractor/clipfish.py
Normal file
@ -0,0 +1,53 @@
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ClipfishIE(InfoExtractor):
|
||||
IE_NAME = u'clipfish'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||
_TEST = {
|
||||
u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
|
||||
u'file': u'4028320.f4v',
|
||||
u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
|
||||
u'info_dict': {
|
||||
u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
|
||||
u'duration': 399,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||
(video_id, int(time.time())))
|
||||
info_xml = self._download_webpage(
|
||||
info_url, video_id, note=u'Downloading info page')
|
||||
doc = xml.etree.ElementTree.fromstring(info_xml)
|
||||
title = doc.find('title').text
|
||||
video_url = doc.find('filename').text
|
||||
thumbnail = doc.find('imageurl').text
|
||||
duration_str = doc.find('duration').text
|
||||
m = re.match(
|
||||
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
||||
duration_str)
|
||||
if m:
|
||||
duration = (
|
||||
(int(m.group('hours')) * 60 * 60) +
|
||||
(int(m.group('minutes')) * 60) +
|
||||
(int(m.group('seconds')))
|
||||
)
|
||||
else:
|
||||
duration = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
}
|
@ -6,7 +6,7 @@ from ..utils import determine_ext
|
||||
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
|
||||
_VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
|
||||
|
||||
_TESTS = [{
|
||||
|
@ -1,5 +1,4 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
||||
mdoc = self._download_xml(xmlUrl, video_id,
|
||||
u'Downloading info XML',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||
try:
|
||||
videoNode = mdoc.findall('./video')[0]
|
||||
youtubeIdNode = videoNode.find('./youtubeID')
|
||||
@ -65,16 +63,13 @@ class CollegeHumorIE(InfoExtractor):
|
||||
|
||||
if next_url.endswith(u'manifest.f4m'):
|
||||
manifest_url = next_url + '?hdcore=2.10.3'
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
adoc = self._download_xml(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
|
||||
node_id = media_node.attrib['url']
|
||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||
except IndexError as err:
|
||||
except IndexError:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
|
@ -2,6 +2,7 @@ import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .mtv import MTVIE, _media_xml_tag
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
@ -11,7 +12,37 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class ComedyCentralIE(InfoExtractor):
|
||||
class ComedyCentralIE(MTVIE):
|
||||
_VALID_URL = r'http://www.comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)'
|
||||
_FEED_URL = u'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||
u'md5': u'4167875aae411f903b751a21f357f1ee',
|
||||
u'info_dict': {
|
||||
u'id': u'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Uncensored - Greg Fitzsimmons - Too Good of a Mother',
|
||||
u'description': u'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
},
|
||||
}
|
||||
# Overwrite MTVIE properties we don't want
|
||||
_TESTS = []
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||
return itemdoc.find(search_path).attrib['url']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
mgid = self._search_regex(r'data-mgid="(?P<mgid>mgid:.*?)"',
|
||||
webpage, u'mgid')
|
||||
return self._get_videos_info(mgid)
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(InfoExtractor):
|
||||
IE_DESC = u'The Daily Show / Colbert Report'
|
||||
# urls can be abbreviations like :thedailyshow or :colbert
|
||||
# urls for episodes like:
|
||||
|
@ -4,19 +4,22 @@ import re
|
||||
import socket
|
||||
import sys
|
||||
import netrc
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class InfoExtractor(object):
|
||||
"""Information Extractor class.
|
||||
|
||||
@ -54,17 +57,29 @@ class InfoExtractor(object):
|
||||
view_count: How many users have watched the video on the platform.
|
||||
urlhandle: [internal] The urlHandle to be used to download the file,
|
||||
like returned by urllib.request.urlopen
|
||||
age_limit: Age restriction for the video, as an integer (years)
|
||||
formats: A list of dictionaries for each format available, it must
|
||||
be ordered from worst to best quality. Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* ext Will be calculated from url if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
Calculated from width and height if missing.
|
||||
Calculated from the format_id, width, height.
|
||||
and format_note fields if missing.
|
||||
* format_id A short description of the format
|
||||
("mp4_h264_opus" or "19")
|
||||
* format_note Additional info about the format
|
||||
("3D" or "DASH video")
|
||||
* width Width of the video, if known
|
||||
* height Height of the video, if known
|
||||
* abr Average audio bitrate in KBit/s
|
||||
* acodec Name of the audio codec in use
|
||||
* vbr Average video bitrate in KBit/s
|
||||
* vcodec Name of the video codec in use
|
||||
* filesize The number of bytes, if known in advance
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
by YoutubeDL if it's missing)
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@ -143,7 +158,7 @@ class InfoExtractor(object):
|
||||
elif note is not False:
|
||||
self.to_screen(u'%s: %s' % (video_id, note))
|
||||
try:
|
||||
return compat_urllib_request.urlopen(url_or_request)
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if errnote is None:
|
||||
errnote = u'Unable to download webpage'
|
||||
@ -177,6 +192,17 @@ class InfoExtractor(object):
|
||||
self.to_screen(u'Dumping request to ' + url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self._downloader.params.get('write_pages', False):
|
||||
try:
|
||||
url = url_or_request.get_full_url()
|
||||
except AttributeError:
|
||||
url = url_or_request
|
||||
raw_filename = ('%s_%s.dump' % (video_id, url))
|
||||
filename = sanitize_filename(raw_filename, restricted=True)
|
||||
self.to_screen(u'Saving request to ' + filename)
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
|
||||
content = webpage_bytes.decode(encoding, 'replace')
|
||||
return (content, urlh)
|
||||
|
||||
@ -184,6 +210,11 @@ class InfoExtractor(object):
|
||||
""" Returns the data of the page as a string """
|
||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
||||
|
||||
def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||
|
||||
def to_screen(self, msg):
|
||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
||||
@ -205,12 +236,14 @@ class InfoExtractor(object):
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
#Methods for following #608
|
||||
def url_result(self, url, ie=None):
|
||||
def url_result(self, url, ie=None, video_id=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
#TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
'url': url,
|
||||
'ie_key': ie}
|
||||
if video_id is not None:
|
||||
video_info['id'] = video_id
|
||||
return video_info
|
||||
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
||||
"""Returns a playlist"""
|
||||
@ -227,7 +260,7 @@ class InfoExtractor(object):
|
||||
Perform a regex search on the given string, using a single or a list of
|
||||
patterns returning the first matching group.
|
||||
In case of failure return a default value or raise a WARNING or a
|
||||
ExtractorError, depending on fatal, specifying the field name.
|
||||
RegexNotFoundError, depending on fatal, specifying the field name.
|
||||
"""
|
||||
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||
mobj = re.search(pattern, string, flags)
|
||||
@ -247,7 +280,7 @@ class InfoExtractor(object):
|
||||
elif default is not None:
|
||||
return default
|
||||
elif fatal:
|
||||
raise ExtractorError(u'Unable to extract %s' % _name)
|
||||
raise RegexNotFoundError(u'Unable to extract %s' % _name)
|
||||
else:
|
||||
self._downloader.report_warning(u'unable to extract %s; '
|
||||
u'please report this issue on http://yt-dl.org/bug' % _name)
|
||||
@ -295,13 +328,21 @@ class InfoExtractor(object):
|
||||
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
def _og_regex(prop):
|
||||
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
||||
property_re = r'property=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
return [
|
||||
template % (property_re, content_re),
|
||||
template % (content_re, property_re),
|
||||
]
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
if name is None:
|
||||
name = 'OpenGraph %s' % prop
|
||||
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
|
||||
escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
|
||||
if escaped is None:
|
||||
return None
|
||||
return unescapeHTML(escaped)
|
||||
|
||||
def _og_search_thumbnail(self, html, **kargs):
|
||||
@ -313,10 +354,47 @@ class InfoExtractor(object):
|
||||
def _og_search_title(self, html, **kargs):
|
||||
return self._og_search_property('title', html, **kargs)
|
||||
|
||||
def _og_search_video_url(self, html, name='video url', **kargs):
|
||||
return self._html_search_regex([self._og_regex('video:secure_url'),
|
||||
self._og_regex('video')],
|
||||
html, name, **kargs)
|
||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||
regexes = self._og_regexes('video')
|
||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||
return self._html_search_regex(regexes, html, name, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
|
||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||
html, display_name, fatal=False)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||
|
||||
def _rta_search(self, html):
|
||||
# See http://www.rtalabel.org/index.php?content=howtofaq#single
|
||||
if re.search(r'(?ix)<meta\s+name="rating"\s+'
|
||||
r' content="RTA-5042-1996-1400-1577-RTA"',
|
||||
html):
|
||||
return 18
|
||||
return 0
|
||||
|
||||
def _media_rating_search(self, html):
|
||||
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
|
||||
rating = self._html_search_meta('rating', html)
|
||||
|
||||
if not rating:
|
||||
return None
|
||||
|
||||
RATING_TABLE = {
|
||||
'safe for kids': 0,
|
||||
'general': 8,
|
||||
'14 years': 14,
|
||||
'mature': 17,
|
||||
'restricted': 19,
|
||||
}
|
||||
return RATING_TABLE.get(rating.lower(), None)
|
||||
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
@ -355,7 +433,7 @@ class SearchInfoExtractor(InfoExtractor):
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
raise NotImplementedError("This method must be implemented by sublclasses")
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
@property
|
||||
def SEARCH_KEY(self):
|
||||
|
22
youtube_dl/extractor/d8.py
Normal file
22
youtube_dl/extractor/d8.py
Normal file
@ -0,0 +1,22 @@
|
||||
# encoding: utf-8
|
||||
from .canalplus import CanalplusIE
|
||||
|
||||
|
||||
class D8IE(CanalplusIE):
|
||||
_VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
|
||||
IE_NAME = u'd8.tv'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
|
||||
u'file': u'966289.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Campagne intime - Documentaire exceptionnel',
|
||||
u'description': u'md5:d2643b799fb190846ae09c61e59a859f',
|
||||
u'upload_date': u'20131108',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
"""Build a request with the family filter disabled"""
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.add_header('Cookie', 'family_filter=off')
|
||||
request.add_header('Cookie', 'ff=off')
|
||||
return request
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
@ -28,6 +29,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
|
||||
IE_NAME = u'dailymotion'
|
||||
|
||||
_FORMATS = [
|
||||
(u'stream_h264_ld_url', u'ld'),
|
||||
(u'stream_h264_url', u'standard'),
|
||||
(u'stream_h264_hq_url', u'hq'),
|
||||
(u'stream_h264_hd_url', u'hd'),
|
||||
(u'stream_h264_hd1080_url', u'hd180'),
|
||||
]
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
||||
@ -52,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
},
|
||||
u'skip': u'VEVO is only available in some countries',
|
||||
},
|
||||
# age-restricted video
|
||||
{
|
||||
u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
|
||||
u'file': u'xyh2zz.mp4',
|
||||
u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
|
||||
u'info_dict': {
|
||||
u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
||||
u'uploader': 'HotWaves1012',
|
||||
u'age_limit': 18,
|
||||
}
|
||||
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -60,7 +82,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
||||
|
||||
video_extension = 'mp4'
|
||||
url = 'http://www.dailymotion.com/video/%s' % video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
@ -82,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
|
||||
# Looking for official user
|
||||
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
|
||||
webpage, 'video uploader')
|
||||
webpage, 'video uploader', fatal=False)
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
|
||||
@ -99,37 +121,43 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
# TODO: support choosing qualities
|
||||
|
||||
for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
|
||||
'stream_h264_hq_url','stream_h264_url',
|
||||
'stream_h264_ld_url']:
|
||||
if info.get(key):#key in info and info[key]:
|
||||
max_quality = key
|
||||
self.to_screen(u'Using %s' % key)
|
||||
break
|
||||
else:
|
||||
formats = []
|
||||
for (key, format_id) in self._FORMATS:
|
||||
video_url = info.get(key)
|
||||
if video_url is not None:
|
||||
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
||||
if m_size is not None:
|
||||
width, height = m_size.group(1), m_size.group(2)
|
||||
else:
|
||||
width, height = None, None
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
if not formats:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
video_url = info[max_quality]
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id)
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id)
|
||||
self._list_available_subtitles(video_id, webpage)
|
||||
return
|
||||
|
||||
return [{
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'title': self._og_search_title(webpage),
|
||||
'ext': video_extension,
|
||||
'subtitles': video_subtitles,
|
||||
'thumbnail': info['thumbnail_url']
|
||||
}]
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
sub_list = self._download_webpage(
|
||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
||||
@ -158,7 +186,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
webpage = self._download_webpage(request,
|
||||
id, u'Downloading page %s' % pagenum)
|
||||
|
||||
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
|
||||
playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
|
||||
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
|
@ -25,7 +25,7 @@ class DepositFilesIE(InfoExtractor):
|
||||
url = 'http://depositfiles.com/en/files/' + file_id
|
||||
|
||||
# Retrieve file webpage with 'Free download' button pressed
|
||||
free_download_indication = { 'gateway_result' : '1' }
|
||||
free_download_indication = {'gateway_result' : '1'}
|
||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
|
||||
try:
|
||||
self.report_download_webpage(file_id)
|
||||
|
@ -1,4 +1,3 @@
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
@ -101,7 +100,7 @@ class EightTracksIE(InfoExtractor):
|
||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||
next_url = first_url
|
||||
res = []
|
||||
for i in itertools.count():
|
||||
for i in range(track_count):
|
||||
api_json = self._download_webpage(next_url, playlist_id,
|
||||
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
|
||||
errnote=u'Failed to download song information')
|
||||
@ -116,7 +115,5 @@ class EightTracksIE(InfoExtractor):
|
||||
'ext': 'm4a',
|
||||
}
|
||||
res.append(info)
|
||||
if api_data['set']['at_last_track']:
|
||||
break
|
||||
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
|
||||
return res
|
||||
|
37
youtube_dl/extractor/eitb.py
Normal file
37
youtube_dl/extractor/eitb.py
Normal file
@ -0,0 +1,37 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
IE_NAME = u'eitb.tv'
|
||||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||
u'md5': u'edf4436247185adee3ea18ce64c47998',
|
||||
u'info_dict': {
|
||||
u'id': u'2743577154001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'60 minutos (Lasa y Zabala, 30 años)',
|
||||
# All videos from eitb has this description in the brightcove info
|
||||
u'description': u'.',
|
||||
u'uploader': u'Euskal Telebista',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
chapter_id = mobj.group('chapter_id')
|
||||
webpage = self._download_webpage(url, chapter_id)
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is None:
|
||||
raise ExtractorError(u'Could not extract the Brightcove url')
|
||||
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||
# it manually
|
||||
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
||||
return self.url_result(bc_url, BrightcoveIE.ie_key())
|
@ -11,11 +11,11 @@ from ..utils import (
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
||||
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
|
||||
_TEST = {
|
||||
u'url': u'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
u'file': u'6618-Breaking-Down-Baldurs-Gate.mp4',
|
||||
u'md5': u'c6793dbda81388f4264c1ba18684a74d',
|
||||
u'md5': u'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
u'info_dict': {
|
||||
u"description": u"Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||
u"uploader": u"the-escapist-presents",
|
||||
@ -25,50 +25,60 @@ class EscapistIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
showName = mobj.group('showname')
|
||||
videoId = mobj.group('episode')
|
||||
|
||||
self.report_extraction(videoId)
|
||||
webpage = self._download_webpage(url, videoId)
|
||||
|
||||
videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
|
||||
videoDesc = self._html_search_regex(
|
||||
r'<meta name="description" content="([^"]*)"',
|
||||
webpage, u'description', fatal=False)
|
||||
|
||||
playerUrl = self._og_search_video_url(webpage, name='player url')
|
||||
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
||||
|
||||
title = self._html_search_regex('<meta name="title" content="([^"]*)"',
|
||||
webpage, u'player url').split(' : ')[-1]
|
||||
title = self._html_search_regex(
|
||||
r'<meta name="title" content="([^"]*)"',
|
||||
webpage, u'title').split(' : ')[-1]
|
||||
|
||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
|
||||
configUrl = self._search_regex('config=(.*)$', playerUrl, u'config URL')
|
||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
||||
|
||||
configJSON = self._download_webpage(configUrl, videoId,
|
||||
u'Downloading configuration',
|
||||
u'unable to download configuration')
|
||||
formats = []
|
||||
|
||||
# Technically, it's JavaScript, not JSON
|
||||
configJSON = configJSON.replace("'", '"')
|
||||
def _add_format(name, cfgurl):
|
||||
configJSON = self._download_webpage(
|
||||
cfgurl, videoId,
|
||||
u'Downloading ' + name + ' configuration',
|
||||
u'Unable to download ' + name + ' configuration')
|
||||
|
||||
# Technically, it's JavaScript, not JSON
|
||||
configJSON = configJSON.replace("'", '"')
|
||||
|
||||
try:
|
||||
config = json.loads(configJSON)
|
||||
except (ValueError,) as err:
|
||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
||||
playlist = config['playlist']
|
||||
formats.append({
|
||||
'url': playlist[1]['url'],
|
||||
'format_id': name,
|
||||
})
|
||||
|
||||
_add_format(u'normal', configUrl)
|
||||
hq_url = (configUrl +
|
||||
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
||||
try:
|
||||
config = json.loads(configJSON)
|
||||
except (ValueError,) as err:
|
||||
raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
|
||||
_add_format(u'hq', hq_url)
|
||||
except ExtractorError:
|
||||
pass # That's fine, we'll just use normal quality
|
||||
|
||||
playlist = config['playlist']
|
||||
videoUrl = playlist[1]['url']
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': videoId,
|
||||
'url': videoUrl,
|
||||
'formats': formats,
|
||||
'uploader': showName,
|
||||
'upload_date': None,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': videoDesc,
|
||||
'player_url': playerUrl,
|
||||
}
|
||||
|
||||
return [info]
|
||||
|
@ -11,16 +11,17 @@ class ExfmIE(InfoExtractor):
|
||||
_SOUNDCLOUD_URL = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'95223130.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'url': u'http://ex.fm/song/eh359',
|
||||
u'file': u'44216187.mp3',
|
||||
u'md5': u'e45513df5631e6d760970b14cc0c11e7',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop - Miley Cyrus",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'upload_date': u'20130603',
|
||||
u'description': u'Download "We Can\'t Stop" \r\niTunes: http://smarturl.it/WeCantStop?IQid=SC\r\nAmazon: http://smarturl.it/WeCantStopAMZ?IQid=SC',
|
||||
u"title": u"Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive",
|
||||
u"uploader": u"deadjournalist",
|
||||
u'upload_date': u'20120424',
|
||||
u'description': u'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||
},
|
||||
u'note': u'Soundcloud song',
|
||||
u'skip': u'The site is down too often',
|
||||
},
|
||||
{
|
||||
u'url': u'http://ex.fm/song/wddt8',
|
||||
@ -30,6 +31,7 @@ class ExfmIE(InfoExtractor):
|
||||
u'title': u'Safe and Sound',
|
||||
u'uploader': u'Capital Cities',
|
||||
},
|
||||
u'skip': u'The site is down too often',
|
||||
},
|
||||
]
|
||||
|
||||
|
50
youtube_dl/extractor/extremetube.py
Normal file
50
youtube_dl/extractor/extremetube.py
Normal file
@ -0,0 +1,50 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
class ExtremeTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
|
||||
u'file': u'652431.mp4',
|
||||
u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
|
||||
u'info_dict': {
|
||||
u"title": u"Music Video 14 british euro brit european cumshots swallow",
|
||||
u"uploader": u"unknown",
|
||||
u"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
|
||||
uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'uploader': uploader,
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'age_limit': 18,
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
import json
|
||||
import netrc
|
||||
import re
|
||||
import socket
|
||||
|
||||
@ -19,7 +18,8 @@ class FacebookIE(InfoExtractor):
|
||||
"""Information Extractor for Facebook"""
|
||||
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
|
||||
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
|
||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = u'facebook'
|
||||
_TEST = {
|
||||
@ -36,50 +36,56 @@ class FacebookIE(InfoExtractor):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._downloader is None:
|
||||
return
|
||||
|
||||
useremail = None
|
||||
password = None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username', None) is not None:
|
||||
useremail = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
try:
|
||||
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
|
||||
if info is not None:
|
||||
useremail = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
def _login(self):
|
||||
(useremail, password) = self._get_login_info()
|
||||
if useremail is None:
|
||||
return
|
||||
|
||||
# Log in
|
||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||
self.report_login()
|
||||
login_page = self._download_webpage(login_page_req, None, note=False,
|
||||
errnote=u'Unable to download login page')
|
||||
lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
|
||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
|
||||
|
||||
login_form = {
|
||||
'email': useremail,
|
||||
'pass': password,
|
||||
'login': 'Log+In'
|
||||
'lsd': lsd,
|
||||
'lgnrnd': lgnrnd,
|
||||
'next': 'http://facebook.com/home.php',
|
||||
'default_persistent': '0',
|
||||
'legacy_return': '1',
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
self.report_login()
|
||||
login_results = compat_urllib_request.urlopen(request).read()
|
||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||
return
|
||||
|
||||
check_form = {
|
||||
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
|
||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
|
||||
'name_action_selected': 'dont_save',
|
||||
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_response = compat_urllib_request.urlopen(check_req).read()
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
@ -93,7 +99,13 @@ class FacebookIE(InfoExtractor):
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||
if not m:
|
||||
raise ExtractorError(u'Cannot parse data')
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
if m_msg is not None:
|
||||
raise ExtractorError(
|
||||
u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError(u'Cannot parse data')
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
|
58
youtube_dl/extractor/faz.py
Normal file
58
youtube_dl/extractor/faz.py
Normal file
@ -0,0 +1,58 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class FazIE(InfoExtractor):
|
||||
IE_NAME = u'faz.net'
|
||||
_VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
|
||||
u'file': u'12610585.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
|
||||
u'description': u'md5:1453fbf9a0d041d985a47306192ea253',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
self.to_screen(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
|
||||
u'config xml url')
|
||||
config_xml = self._download_webpage(config_xml_url, video_id,
|
||||
u'Downloading config xml')
|
||||
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
|
||||
|
||||
encodings = config.find('ENCODINGS')
|
||||
formats = []
|
||||
for code in ['LOW', 'HIGH', 'HQ']:
|
||||
encoding = encodings.find(code)
|
||||
if encoding is None:
|
||||
continue
|
||||
encoding_url = encoding.find('FILENAME').text
|
||||
formats.append({
|
||||
'url': encoding_url,
|
||||
'ext': determine_ext(encoding_url),
|
||||
'format_id': code.lower(),
|
||||
})
|
||||
|
||||
descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description')
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'description': descr,
|
||||
'thumbnail': config.find('STILL/STILL_BIG').text,
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
@ -39,7 +39,6 @@ class FKTVIE(InfoExtractor):
|
||||
for i, _ in enumerate(files, 1):
|
||||
video_id = '%04d%d' % (episode, i)
|
||||
video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
|
||||
video_title = 'Fernsehkritik %d.%d' % (episode, i)
|
||||
videos.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
|
38
youtube_dl/extractor/gamekings.py
Normal file
38
youtube_dl/extractor/gamekings.py
Normal file
@ -0,0 +1,38 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
||||
u'file': u'20130811.mp4',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
u'info_dict': {
|
||||
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
|
||||
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_url = self._og_search_video_url(webpage)
|
||||
|
||||
video = re.search(r'[0-9]+', video_url)
|
||||
video_id = video.group(0)
|
||||
|
||||
# Todo: add medium format
|
||||
video_url = video_url.replace(video_id, 'large/' + video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
@ -1,55 +1,59 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
unescapeHTML,
|
||||
get_meta_content,
|
||||
)
|
||||
|
||||
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
|
||||
u"file": u"6410818.mp4",
|
||||
u"file": u"gs-2300-6410818.mp4",
|
||||
u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
|
||||
u"info_dict": {
|
||||
u"title": u"Arma 3 - Community Guide: SITREP I",
|
||||
u"upload_date": u"20130627",
|
||||
u'description': u'Check out this video where some of the basics of Arma 3 is explained.',
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = mobj.group('page_id')
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"',
|
||||
r'http://www\.gamespot\.com/videoembed/(\d+)'],
|
||||
webpage, 'video id')
|
||||
data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'})
|
||||
info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data
|
||||
info_xml = self._download_webpage(info_url, video_id)
|
||||
doc = xml.etree.ElementTree.fromstring(info_xml)
|
||||
clip_el = doc.find('./playList/clip')
|
||||
data_video_json = self._search_regex(r'data-video=\'(.*?)\'', webpage, u'data video')
|
||||
data_video = json.loads(unescapeHTML(data_video_json))
|
||||
|
||||
http_urls = [{'url': node.find('filePath').text,
|
||||
'rate': int(node.find('rate').text)}
|
||||
for node in clip_el.find('./httpURI')]
|
||||
best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1]
|
||||
video_url = best_quality['url']
|
||||
title = clip_el.find('./title').text
|
||||
ext = video_url.rpartition('.')[2]
|
||||
thumbnail_url = clip_el.find('./screenGrabURI').text
|
||||
view_count = int(clip_el.find('./views').text)
|
||||
upload_date = unified_strdate(clip_el.find('./postDate').text)
|
||||
# Transform the manifest url to a link to the mp4 files
|
||||
# they are used in mobile devices.
|
||||
f4m_url = data_video['videoStreams']['f4m_stream']
|
||||
f4m_path = compat_urlparse.urlparse(f4m_url).path
|
||||
QUALITIES_RE = r'((,\d+)+,?)'
|
||||
qualities = self._search_regex(QUALITIES_RE, f4m_path, u'qualities').strip(',').split(',')
|
||||
http_path = f4m_path[1:].split('/', 1)[1]
|
||||
http_template = re.sub(QUALITIES_RE, r'%s', http_path)
|
||||
http_template = http_template.replace('.csmil/manifest.f4m', '')
|
||||
http_template = compat_urlparse.urljoin('http://video.gamespotcdn.com/', http_template)
|
||||
formats = []
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
'url': http_template % q,
|
||||
'ext': 'mp4',
|
||||
'format_id': q,
|
||||
})
|
||||
|
||||
return [{
|
||||
'id' : video_id,
|
||||
'url' : video_url,
|
||||
'ext' : ext,
|
||||
'title' : title,
|
||||
'thumbnail' : thumbnail_url,
|
||||
'upload_date' : upload_date,
|
||||
'view_count' : view_count,
|
||||
}]
|
||||
info = {
|
||||
'id': data_video['guid'],
|
||||
'title': compat_urllib_parse.unquote(data_video['title']),
|
||||
'formats': formats,
|
||||
'description': get_meta_content('description', webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(formats[-1])
|
||||
return info
|
||||
|
@ -11,6 +11,8 @@ from ..utils import (
|
||||
compat_urlparse,
|
||||
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
|
||||
@ -23,12 +25,52 @@ class GenericIE(InfoExtractor):
|
||||
{
|
||||
u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||
u'file': u'13601338388002.mp4',
|
||||
u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
|
||||
u'md5': u'6e15c93721d7ec9e9ca3fdbf07982cfd',
|
||||
u'info_dict': {
|
||||
u"uploader": u"www.hodiho.fr",
|
||||
u"title": u"R\u00e9gis plante sa Jeep"
|
||||
}
|
||||
},
|
||||
# embedded vimeo video
|
||||
{
|
||||
u'add_ie': ['Vimeo'],
|
||||
u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
|
||||
u'file': u'22444065.mp4',
|
||||
u'md5': u'2903896e23df39722c33f015af0666e2',
|
||||
u'info_dict': {
|
||||
u'title': u'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
|
||||
u"uploader_id": u"skillsmatter",
|
||||
u"uploader": u"Skills Matter",
|
||||
}
|
||||
},
|
||||
# bandcamp page with custom domain
|
||||
{
|
||||
u'add_ie': ['Bandcamp'],
|
||||
u'url': u'http://bronyrock.com/track/the-pony-mash',
|
||||
u'file': u'3235767654.mp3',
|
||||
u'info_dict': {
|
||||
u'title': u'The Pony Mash',
|
||||
u'uploader': u'M_Pallante',
|
||||
},
|
||||
u'skip': u'There is a limit of 200 free downloads / month for the test song',
|
||||
},
|
||||
# embedded brightcove video
|
||||
# it also tests brightcove videos that need to set the 'Referer' in the
|
||||
# http requests
|
||||
{
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
|
||||
u'info_dict': {
|
||||
u'id': u'2765128793001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Le cours de bourse : l’analyse technique',
|
||||
u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
|
||||
u'uploader': u'BFM BUSINESS',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@ -117,16 +159,49 @@ class GenericIE(InfoExtractor):
|
||||
except ValueError:
|
||||
# since this is the last-resort InfoExtractor, if
|
||||
# this error is thrown, it'll be thrown here
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError(u'Failed to download URL: %s' % url)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
||||
|
||||
# Look for BrightCove:
|
||||
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
|
||||
if m_brightcove is not None:
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is not None:
|
||||
self.to_screen(u'Brightcove video detected.')
|
||||
bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
|
||||
return self.url_result(bc_url, 'Brightcove')
|
||||
|
||||
# Look for embedded Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group(1))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl, 'Vimeo')
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for Bandcamp pages with custom domain
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
if mobj is not None:
|
||||
burl = unescapeHTML(mobj.group(1))
|
||||
# Don't set the extractor because it can be a track url or an album
|
||||
return self.url_result(burl)
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
@ -134,7 +209,7 @@ class GenericIE(InfoExtractor):
|
||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit: JWPlayer JS loader
|
||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"&]*)', webpage)
|
||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
|
||||
if mobj is None:
|
||||
# Try to find twitter cards info
|
||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
@ -149,39 +224,28 @@ class GenericIE(InfoExtractor):
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError(u'Unsupported URL: %s' % url)
|
||||
|
||||
# It's possible that one of the regexes
|
||||
# matched, but returned an empty group:
|
||||
if mobj.group(1) is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError(u'Did not find a valid video URL at %s' % url)
|
||||
|
||||
video_url = mobj.group(1)
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||
|
||||
# here's a fun little line of code for you:
|
||||
video_extension = os.path.splitext(video_id)[1][1:]
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._html_search_regex(r'<title>(.*)</title>',
|
||||
webpage, u'video title', default=u'video', flags=re.DOTALL)
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(r'(?:https?://)?([^/]*)/.*',
|
||||
url, u'video uploader')
|
||||
|
||||
return [{
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
}]
|
||||
}
|
||||
|
@ -41,9 +41,9 @@ class GooglePlusIE(InfoExtractor):
|
||||
|
||||
# Extract update date
|
||||
upload_date = self._html_search_regex(
|
||||
r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*>
|
||||
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
||||
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
||||
webpage, u'upload date', fatal=False)
|
||||
webpage, u'upload date', fatal=False, flags=re.VERBOSE)
|
||||
if upload_date:
|
||||
# Convert timestring to a format suitable for filename
|
||||
upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d")
|
||||
|
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||
u'file': u'390161.mp4',
|
||||
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
|
||||
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
|
||||
u'info_dict': {
|
||||
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
||||
u"title": u"How to Tie a Square Knot Properly"
|
||||
|
@ -30,7 +30,7 @@ class HypemIE(InfoExtractor):
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
track_id = mobj.group(1)
|
||||
|
||||
data = { 'ax': 1, 'ts': time.time() }
|
||||
data = {'ax': 1, 'ts': time.time()}
|
||||
data_encoded = compat_urllib_parse.urlencode(data)
|
||||
complete_url = url + "?" + data_encoded
|
||||
request = compat_urllib_request.Request(complete_url)
|
||||
@ -68,4 +68,4 @@ class HypemIE(InfoExtractor):
|
||||
'ext': "mp3",
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
}]
|
||||
}]
|
||||
|
@ -26,7 +26,7 @@ class InstagramIE(InfoExtractor):
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'url': self._og_search_video_url(webpage, secure=False),
|
||||
'ext': 'mp4',
|
||||
'title': u'Video by %s' % uploader_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
|
84
youtube_dl/extractor/internetvideoarchive.py
Normal file
84
youtube_dl/extractor/internetvideoarchive.py
Normal file
@ -0,0 +1,84 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
xpath_with_ns,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class InternetVideoArchiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
|
||||
u'file': u'452693.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'SKYFALL',
|
||||
u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
||||
u'duration': 153,
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _build_url(query):
|
||||
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
|
||||
|
||||
@staticmethod
|
||||
def _clean_query(query):
|
||||
NEEDED_ARGS = ['publishedid', 'customerid']
|
||||
query_dic = compat_urlparse.parse_qs(query)
|
||||
cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
|
||||
# Other player ids return m3u8 urls
|
||||
cleaned_dic['playerid'] = '247'
|
||||
cleaned_dic['videokbrate'] = '100000'
|
||||
return compat_urllib_parse.urlencode(cleaned_dic)
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_urlparse.urlparse(url).query
|
||||
query_dic = compat_urlparse.parse_qs(query)
|
||||
video_id = query_dic['publishedid'][0]
|
||||
url = self._build_url(query)
|
||||
|
||||
flashconfiguration_xml = self._download_webpage(url, video_id,
|
||||
u'Downloading flash configuration')
|
||||
flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
|
||||
file_url = flashconfiguration.find('file').text
|
||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||
# Replace some of the parameters in the query to get the best quality
|
||||
# and http links (no m3u8 manifests)
|
||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||
lambda m: self._clean_query(m.group()),
|
||||
file_url)
|
||||
info_xml = self._download_webpage(file_url, video_id,
|
||||
u'Downloading video info')
|
||||
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||
item = info.find('channel/item')
|
||||
|
||||
def _bp(p):
|
||||
return xpath_with_ns(p,
|
||||
{'media': 'http://search.yahoo.com/mrss/',
|
||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
|
||||
formats = []
|
||||
for content in item.findall(_bp('media:group/media:content')):
|
||||
attr = content.attrib
|
||||
f_url = attr['url']
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'ext': determine_ext(f_url),
|
||||
'width': int(attr['width']),
|
||||
'bitrate': int(attr['bitrate']),
|
||||
})
|
||||
formats = sorted(formats, key=lambda f: f['bitrate'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': item.find('title').text,
|
||||
'formats': formats,
|
||||
'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
|
||||
'description': item.find('description').text,
|
||||
'duration': int(attr['duration']),
|
||||
}
|
@ -6,6 +6,7 @@ import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class JeuxVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
|
||||
|
||||
@ -21,27 +22,31 @@ class JeuxVideoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
title = mobj.group(1)
|
||||
webpage = self._download_webpage(url, title)
|
||||
m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
|
||||
|
||||
xml_link = m_download.group(1)
|
||||
xml_link = self._html_search_regex(
|
||||
r'<param name="flashvars" value="config=(.*?)" />',
|
||||
webpage, u'config URL')
|
||||
|
||||
id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
|
||||
video_id = self._search_regex(
|
||||
r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
|
||||
xml_link, u'video ID')
|
||||
|
||||
xml_config = self._download_webpage(xml_link, title,
|
||||
'Downloading XML config')
|
||||
xml_config = self._download_webpage(
|
||||
xml_link, title, u'Downloading XML config')
|
||||
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
|
||||
info = re.search(r'<format\.json>(.*?)</format\.json>',
|
||||
xml_config, re.MULTILINE|re.DOTALL).group(1)
|
||||
info = json.loads(info)['versions'][0]
|
||||
info_json = self._search_regex(
|
||||
r'(?sm)<format\.json>(.*?)</format\.json>',
|
||||
xml_config, u'JSON information')
|
||||
info = json.loads(info_json)['versions'][0]
|
||||
|
||||
video_url = 'http://video720.jeuxvideo.com/' + info['file']
|
||||
|
||||
return {'id': id,
|
||||
'title' : config.find('titre_video').text,
|
||||
'ext' : 'mp4',
|
||||
'url' : video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': config.find('image').text,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': config.find('titre_video').text,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': config.find('image').text,
|
||||
}
|
||||
|
@ -1,8 +1,10 @@
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
class KankanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
|
||||
@ -30,7 +32,10 @@ class KankanIE(InfoExtractor):
|
||||
video_id, u'Downloading video url info')
|
||||
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
|
||||
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
|
||||
video_url = 'http://%s%s' % (ip, path)
|
||||
param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
|
||||
param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
|
||||
key = _md5('xl_mp43651' + param1 + param2)
|
||||
video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
|
61
youtube_dl/extractor/keezmovies.py
Normal file
61
youtube_dl/extractor/keezmovies.py
Normal file
@ -0,0 +1,61 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
|
||||
class KeezMoviesIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>keezmovies\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
||||
u'file': u'1214711.mp4',
|
||||
u'md5': u'6e297b7e789329923fcf83abb67c9289',
|
||||
u'info_dict': {
|
||||
u"title": u"Petite Asian Lady Mai Playing In Bathtub",
|
||||
u"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
# embedded video
|
||||
mobj = re.search(r'href="([^"]+)"></iframe>', webpage)
|
||||
if mobj:
|
||||
embedded_url = mobj.group(1)
|
||||
return self.url_result(embedded_url)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, u'title')
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url'))
|
||||
if webpage.find('encrypted=true')!=-1:
|
||||
password = self._html_search_regex(r'video_title=(.+?)&', webpage, u'password')
|
||||
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[4].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'age_limit': age_limit,
|
||||
}
|
@ -1,16 +1,17 @@
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
get_meta_content,
|
||||
ExtractorError,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
class LivestreamIE(InfoExtractor):
|
||||
IE_NAME = u'livestream'
|
||||
_VALID_URL = r'http://new.livestream.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||
_TEST = {
|
||||
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||
@ -40,13 +41,9 @@ class LivestreamIE(InfoExtractor):
|
||||
|
||||
if video_id is None:
|
||||
# This is an event page:
|
||||
player = get_meta_content('twitter:player', webpage)
|
||||
if player is None:
|
||||
raise ExtractorError('Couldn\'t extract event api url')
|
||||
api_url = player.replace('/player', '')
|
||||
api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
|
||||
info = json.loads(self._download_webpage(api_url, event_name,
|
||||
u'Downloading event info'))
|
||||
config_json = self._search_regex(r'window.config = ({.*?});',
|
||||
webpage, u'window config')
|
||||
info = json.loads(config_json)['event']
|
||||
videos = [self._extract_video_info(video_data['data'])
|
||||
for video_data in info['feed']['data'] if video_data['type'] == u'video']
|
||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||
@ -58,3 +55,44 @@ class LivestreamIE(InfoExtractor):
|
||||
info = json.loads(self._download_webpage(api_url, video_id,
|
||||
u'Downloading video info'))
|
||||
return self._extract_video_info(info)
|
||||
|
||||
|
||||
# The original version of Livestream uses a different system
|
||||
class LivestreamOriginalIE(InfoExtractor):
|
||||
IE_NAME = u'livestream:original'
|
||||
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
u'info_dict': {
|
||||
u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
user = mobj.group('user')
|
||||
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||
|
||||
api_response = self._download_webpage(api_url, video_id)
|
||||
info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
|
||||
item = info.find('channel').find('item')
|
||||
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||
# Remove the extension and number from the path (like 1.jpg)
|
||||
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': item.find('title').text,
|
||||
'url': 'rtmp://extondemand.livestream.com/ondemand',
|
||||
'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path),
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
|
@ -20,10 +20,12 @@ class MetacafeIE(InfoExtractor):
|
||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||
IE_NAME = u'metacafe'
|
||||
_TESTS = [{
|
||||
_TESTS = [
|
||||
# Youtube video
|
||||
{
|
||||
u"add_ie": ["Youtube"],
|
||||
u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/",
|
||||
u"file": u"_aUehQsCQtM.flv",
|
||||
u"file": u"_aUehQsCQtM.mp4",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20090102",
|
||||
u"title": u"The Electric Company | \"Short I\" | PBS KIDS GO!",
|
||||
@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor):
|
||||
u"uploader_id": u"PBS"
|
||||
}
|
||||
},
|
||||
# Normal metacafe video
|
||||
{
|
||||
u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/',
|
||||
u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad',
|
||||
u'info_dict': {
|
||||
u'id': u'11121940',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4',
|
||||
u'uploader': u'ign',
|
||||
u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
||||
},
|
||||
},
|
||||
# AnyClip video
|
||||
{
|
||||
u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/",
|
||||
u"file": u"an-dVVXnuY7Jh77J.mp4",
|
||||
u"info_dict": {
|
||||
u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3",
|
||||
u"uploader": u"anyclip",
|
||||
u"description": u"md5:38c711dd98f5bb87acf973d573442e67"
|
||||
}
|
||||
}]
|
||||
u"description": u"md5:38c711dd98f5bb87acf973d573442e67",
|
||||
},
|
||||
},
|
||||
# age-restricted video
|
||||
{
|
||||
u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/',
|
||||
u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09',
|
||||
u'info_dict': {
|
||||
u'id': u'5186653',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.',
|
||||
u'uploader': u'Dwayne Pipe',
|
||||
u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b',
|
||||
u'age_limit': 18,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def report_disclaimer(self):
|
||||
@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor):
|
||||
'submit': "Continue - I'm over 18",
|
||||
}
|
||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
self.report_age_confirmation()
|
||||
compat_urllib_request.urlopen(request).read()
|
||||
@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor):
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
req.headers['Cookie'] = 'flashVersion=0;'
|
||||
|
||||
# AnyClip videos require the flashversion cookie so that we get the link
|
||||
# to the mp4 file
|
||||
mobj_an = re.match(r'^an-(.*?)$', video_id)
|
||||
if mobj_an:
|
||||
req.headers['Cookie'] = 'flashVersion=0;'
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor):
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, u'uploader nickname', fatal=False)
|
||||
|
||||
if re.search(r'"contentRating":"restricted"', webpage) is not None:
|
||||
age_limit = 18
|
||||
else:
|
||||
age_limit = 0
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor):
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_ext,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ class MixcloudIE(InfoExtractor):
|
||||
'title': info['name'],
|
||||
'url': final_song_url,
|
||||
'ext': 'mp3',
|
||||
'description': info['description'],
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info['pictures'].get('extra_large'),
|
||||
'uploader': info['user']['name'],
|
||||
'uploader_id': info['user']['username'],
|
||||
|
49
youtube_dl/extractor/mofosex.py
Normal file
49
youtube_dl/extractor/mofosex.py
Normal file
@ -0,0 +1,49 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
class MofosexIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||
u'file': u'5018.mp4',
|
||||
u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
|
||||
u'info_dict': {
|
||||
u"title": u"Japanese Teen Music Video",
|
||||
u"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'age_limit': age_limit,
|
||||
}
|
@ -26,6 +26,7 @@ class MTVIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
u'add_ie': ['Vevo'],
|
||||
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
||||
u'file': u'USCJY1331283.mp4',
|
||||
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
|
||||
@ -47,7 +48,7 @@ class MTVIE(InfoExtractor):
|
||||
def _transform_rtmp_url(rtmp_video_url):
|
||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||
if not m:
|
||||
raise ExtractorError(u'Cannot transform RTMP url')
|
||||
return rtmp_video_url
|
||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||
return base + m.group('finalid')
|
||||
|
||||
@ -58,7 +59,6 @@ class MTVIE(InfoExtractor):
|
||||
if '/error_country_block.swf' in metadataXml:
|
||||
raise ExtractorError(u'This video is not available from your country.', expected=True)
|
||||
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
|
||||
renditions = mdoc.findall('.//rendition')
|
||||
|
||||
formats = []
|
||||
for rendition in mdoc.findall('.//rendition'):
|
||||
@ -80,6 +80,8 @@ class MTVIE(InfoExtractor):
|
||||
video_id = self._id_from_uri(uri)
|
||||
self.report_extraction(video_id)
|
||||
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
|
||||
# Remove the templates, like &device={device}
|
||||
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url)
|
||||
if 'acceptMethods' not in mediagen_url:
|
||||
mediagen_url += '&acceptMethods=fms'
|
||||
mediagen_page = self._download_webpage(mediagen_url, video_id,
|
||||
@ -87,7 +89,7 @@ class MTVIE(InfoExtractor):
|
||||
|
||||
description_node = itemdoc.find('description')
|
||||
if description_node is not None:
|
||||
description = description_node.text
|
||||
description = description_node.text.strip()
|
||||
else:
|
||||
description = None
|
||||
|
||||
|
48
youtube_dl/extractor/myspace.py
Normal file
48
youtube_dl/extractor/myspace.py
Normal file
@ -0,0 +1,48 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class MySpaceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689',
|
||||
u'info_dict': {
|
||||
u'id': u'100008689',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Viva La Vida',
|
||||
u'description': u'The official Viva La Vida video, directed by Hype Williams',
|
||||
u'uploader': u'Coldplay',
|
||||
u'uploader_id': u'coldplay',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
|
||||
u'context'))
|
||||
video = context['video']
|
||||
rtmp_url, play_path = video['streamUrl'].split(';', 1)
|
||||
|
||||
return {
|
||||
'id': compat_str(video['mediaId']),
|
||||
'title': video['title'],
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
'description': video['description'],
|
||||
'thumbnail': video['imageUrl'],
|
||||
'uploader': video['artistName'],
|
||||
'uploader_id': video['artistUsername'],
|
||||
}
|
120
youtube_dl/extractor/nhl.py
Normal file
120
youtube_dl/extractor/nhl.py
Normal file
@ -0,0 +1,120 @@
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NHLBaseInfoExtractor(InfoExtractor):
|
||||
@staticmethod
|
||||
def _fix_json(json_string):
|
||||
return json_string.replace('\\\'', '\'')
|
||||
|
||||
def _extract_video(self, info):
|
||||
video_id = info['id']
|
||||
self.report_extraction(video_id)
|
||||
|
||||
initial_video_url = info['publishPoint']
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'type': 'fvod',
|
||||
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
||||
})
|
||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
||||
path_response = self._download_webpage(path_url, video_id,
|
||||
u'Downloading final video url')
|
||||
path_doc = xml.etree.ElementTree.fromstring(path_response)
|
||||
video_url = path_doc.find('path').text
|
||||
|
||||
join = compat_urlparse.urljoin
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['name'],
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'description': info['description'],
|
||||
'duration': int(info['duration']),
|
||||
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
|
||||
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
|
||||
}
|
||||
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = u'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
u'file': u'453614.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Quick clip: Weise 4-3 goal vs Flames',
|
||||
u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.',
|
||||
u'duration': 18,
|
||||
u'upload_date': u'20131006',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
||||
info_json = self._download_webpage(json_url, video_id,
|
||||
u'Downloading info json')
|
||||
info_json = self._fix_json(info_json)
|
||||
info = json.loads(info_json)[0]
|
||||
return self._extract_video(info)
|
||||
|
||||
|
||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = u'nhl.com:videocenter'
|
||||
IE_DESC = u'NHL videocenter category'
|
||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
if NHLIE.suitable(url):
|
||||
return False
|
||||
return super(NHLVideocenterIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
team = mobj.group('team')
|
||||
webpage = self._download_webpage(url, team)
|
||||
cat_id = self._search_regex(
|
||||
[r'var defaultCatId = "(.+?)";',
|
||||
r'{statusIndex:0,index:0,.*?id:(.*?),'],
|
||||
webpage, u'category id')
|
||||
playlist_title = self._html_search_regex(
|
||||
r'tab0"[^>]*?>(.*?)</td>',
|
||||
webpage, u'playlist title', flags=re.DOTALL).lower().capitalize()
|
||||
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'cid': cat_id,
|
||||
# This is the default value
|
||||
'count': 12,
|
||||
'ptrs': 3,
|
||||
'format': 'json',
|
||||
})
|
||||
path = '/videocenter/servlets/browse?' + data
|
||||
request_url = compat_urlparse.urljoin(url, path)
|
||||
response = self._download_webpage(request_url, playlist_title)
|
||||
response = self._fix_json(response)
|
||||
if not response.strip():
|
||||
self._downloader.report_warning(u'Got an empty reponse, trying '
|
||||
u'adding the "newvideos" parameter')
|
||||
response = self._download_webpage(request_url + '&newvideos=true',
|
||||
playlist_title)
|
||||
response = self._fix_json(response)
|
||||
videos = json.loads(response)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': playlist_title,
|
||||
'id': cat_id,
|
||||
'entries': [self._extract_video(i) for i in videos],
|
||||
}
|
131
youtube_dl/extractor/niconico.py
Normal file
131
youtube_dl/extractor/niconico.py
Normal file
@ -0,0 +1,131 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import re
|
||||
import socket
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NiconicoIE(InfoExtractor):
|
||||
IE_NAME = u'niconico'
|
||||
IE_DESC = u'ニコニコ動画'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
|
||||
u'file': u'sm22312215.mp4',
|
||||
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
|
||||
u'info_dict': {
|
||||
u'title': u'Big Buck Bunny',
|
||||
u'uploader': u'takuya0301',
|
||||
u'uploader_id': u'2698420',
|
||||
u'upload_date': u'20131123',
|
||||
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||
},
|
||||
u'params': {
|
||||
u'username': u'ydl.niconico@gmail.com',
|
||||
u'password': u'youtube-dl',
|
||||
},
|
||||
}
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = True
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
if self._LOGIN_REQUIRED:
|
||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return False
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
u'mail': username,
|
||||
u'password': password,
|
||||
}
|
||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||
# chokes on unicode
|
||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||
request = compat_urllib_request.Request(
|
||||
u'https://secure.nicovideo.jp/secure/login', login_data)
|
||||
login_results = self._download_webpage(
|
||||
request, u'', note=u'Logging in', errnote=u'Unable to log in')
|
||||
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||
return False
|
||||
return True
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
# Get video webpage. We are not actually interested in it, but need
|
||||
# the cookies in order to be able to download the info webpage
|
||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
|
||||
video_info_webpage = self._download_webpage(
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||
note=u'Downloading video info page')
|
||||
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||
video_id, u'Downloading flv info')
|
||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||
|
||||
# Start extracting information
|
||||
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
|
||||
video_title = video_info.find('.//title').text
|
||||
video_extension = video_info.find('.//movie_type').text
|
||||
video_format = video_extension.upper()
|
||||
video_thumbnail = video_info.find('.//thumbnail_url').text
|
||||
video_description = video_info.find('.//description').text
|
||||
video_uploader_id = video_info.find('.//user_id').text
|
||||
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
|
||||
video_view_count = video_info.find('.//view_counter').text
|
||||
video_webpage_url = video_info.find('.//watch_url').text
|
||||
|
||||
# uploader
|
||||
video_uploader = video_uploader_id
|
||||
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||
try:
|
||||
user_info_webpage = self._download_webpage(
|
||||
url, video_id, note=u'Downloading user information')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
||||
else:
|
||||
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
|
||||
video_uploader = user_info.find('.//nickname').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_real_url,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
'format': video_format,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'view_count': video_view_count,
|
||||
'webpage_url': video_webpage_url,
|
||||
}
|
46
youtube_dl/extractor/nowvideo.py
Normal file
46
youtube_dl/extractor/nowvideo.py
Normal file
@ -0,0 +1,46 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urlparse
|
||||
|
||||
|
||||
class NowVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.ch/video/(?P<id>\w+)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
|
||||
u'file': u'0mw0yow7b6dxa.flv',
|
||||
u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817',
|
||||
u'info_dict': {
|
||||
u"title": u"youtubedl test video _BaW_jenozKc.mp4"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'http://www.nowvideo.ch/video/' + video_id
|
||||
embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
u'Downloading embed page')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h4>(.*)</h4>',
|
||||
webpage, u'video title')
|
||||
|
||||
video_key = self._search_regex(r'var fkzd="(.*)";',
|
||||
embed_page, u'video key')
|
||||
|
||||
api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
|
||||
api_response = self._download_webpage(api_call, video_id,
|
||||
u'Downloading API page')
|
||||
video_url = compat_urlparse.parse_qs(api_response)[u'url'][0]
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': video_title,
|
||||
}]
|
68
youtube_dl/extractor/pornhub.py
Normal file
68
youtube_dl/extractor/pornhub.py
Normal file
@ -0,0 +1,68 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9]+))'
|
||||
_TEST = {
|
||||
u'url': u'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
u'file': u'648719015.mp4',
|
||||
u'md5': u'882f488fa1f0026f023f33576004a2ed',
|
||||
u'info_dict': {
|
||||
u"uploader": u"BABES-COM",
|
||||
u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
|
||||
u"age_limit": 18
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, u'title')
|
||||
video_uploader = self._html_search_regex(r'<b>From: </b>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password').replace('+', ' ')
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
})
|
||||
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
|
||||
u'md5': u'374dd6dcedd24234453b295209aa69b6',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20090708",
|
||||
u"title": u"Marilyn-Monroe-Bathing"
|
||||
u"title": u"Marilyn-Monroe-Bathing",
|
||||
u"age_limit": 18
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,6 +39,7 @@ class PornotubeIE(InfoExtractor):
|
||||
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
|
||||
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
|
||||
if upload_date: upload_date = unified_strdate(upload_date)
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
info = {'id': video_id,
|
||||
'url': video_url,
|
||||
@ -45,6 +47,7 @@ class PornotubeIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'format': 'flv'}
|
||||
'format': 'flv',
|
||||
'age_limit': age_limit}
|
||||
|
||||
return [info]
|
||||
|
@ -8,9 +8,12 @@ class RedTubeIE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://www.redtube.com/66418',
|
||||
u'file': u'66418.mp4',
|
||||
u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
||||
# md5 varies from time to time, as in
|
||||
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
|
||||
#u'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
||||
u'info_dict': {
|
||||
u"title": u"Sucked on a toilet"
|
||||
u"title": u"Sucked on a toilet",
|
||||
u"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
@ -30,9 +33,14 @@ class RedTubeIE(InfoExtractor):
|
||||
r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
|
||||
webpage, u'title')
|
||||
|
||||
# No self-labeling, but they describe themselves as
|
||||
# "Home of Videos Porno"
|
||||
age_limit = 18
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
16
youtube_dl/extractor/rottentomatoes.py
Normal file
16
youtube_dl/extractor/rottentomatoes.py
Normal file
@ -0,0 +1,16 @@
|
||||
from .videodetective import VideoDetectiveIE
|
||||
|
||||
|
||||
# It just uses the same method as videodetective.com,
|
||||
# the internetvideoarchive.com is extracted from the og:video property
|
||||
class RottenTomatoesIE(VideoDetectiveIE):
|
||||
_VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
|
||||
u'file': '613340.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'TOY STORY 3',
|
||||
u'description': u'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
|
||||
},
|
||||
}
|
@ -62,19 +62,6 @@ class RTLnowIE(InfoExtractor):
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1',
|
||||
u'file': u'127367.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20130926',
|
||||
u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...',
|
||||
u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin',
|
||||
u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
|
||||
u'file': u'124903.flv',
|
||||
|
58
youtube_dl/extractor/rutube.py
Normal file
58
youtube_dl/extractor/rutube.py
Normal file
@ -0,0 +1,58 @@
|
||||
# encoding: utf-8
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RutubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rutube.ru/video/(?P<long_id>\w+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Раненный кенгуру забежал в аптеку',
|
||||
u'uploader': u'NTDRussian',
|
||||
u'uploader_id': u'29790',
|
||||
},
|
||||
u'params': {
|
||||
# It requires ffmpeg (m3u8 download)
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _get_api_response(self, short_id, subpath):
|
||||
api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)
|
||||
response_json = self._download_webpage(api_url, short_id,
|
||||
u'Downloading %s json' % subpath)
|
||||
return json.loads(response_json)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
long_id = mobj.group('long_id')
|
||||
webpage = self._download_webpage(url, long_id)
|
||||
og_video = self._og_search_video_url(webpage)
|
||||
short_id = compat_urlparse.urlparse(og_video).path[1:]
|
||||
options = self._get_api_response(short_id, 'options')
|
||||
trackinfo = self._get_api_response(short_id, 'trackinfo')
|
||||
# Some videos don't have the author field
|
||||
author = trackinfo.get('author') or {}
|
||||
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
||||
if m3u8_url is None:
|
||||
raise ExtractorError(u'Couldn\'t find m3u8 manifest url')
|
||||
|
||||
return {
|
||||
'id': trackinfo['id'],
|
||||
'title': trackinfo['title'],
|
||||
'url': m3u8_url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': options['thumbnail_url'],
|
||||
'uploader': author.get('name'),
|
||||
'uploader_id': compat_str(author['id']) if author else None,
|
||||
}
|
@ -7,6 +7,7 @@ class SlashdotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Ooyala'],
|
||||
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
|
||||
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
|
||||
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
|
||||
|
@ -29,19 +29,37 @@ class SoundcloudIE(InfoExtractor):
|
||||
)
|
||||
'''
|
||||
IE_NAME = u'soundcloud'
|
||||
_TEST = {
|
||||
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||
u'file': u'62986583.mp3',
|
||||
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20121011",
|
||||
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
|
||||
u"uploader": u"E.T. ExTerrestrial Music",
|
||||
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
|
||||
}
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||
u'file': u'62986583.mp3',
|
||||
u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20121011",
|
||||
u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
|
||||
u"uploader": u"E.T. ExTerrestrial Music",
|
||||
u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
|
||||
}
|
||||
},
|
||||
# not streamable song
|
||||
{
|
||||
u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
|
||||
u'info_dict': {
|
||||
u'id': u'47127627',
|
||||
u'ext': u'mp3',
|
||||
u'title': u'Goldrushed',
|
||||
u'uploader': u'The Royal Concept',
|
||||
u'upload_date': u'20120521',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@ -56,24 +74,82 @@ class SoundcloudIE(InfoExtractor):
|
||||
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
|
||||
|
||||
def _extract_info_dict(self, info, full_title=None, quiet=False):
|
||||
video_id = info['id']
|
||||
name = full_title or video_id
|
||||
if quiet == False:
|
||||
track_id = compat_str(info['id'])
|
||||
name = full_title or track_id
|
||||
if quiet:
|
||||
self.report_extraction(name)
|
||||
|
||||
thumbnail = info['artwork_url']
|
||||
if thumbnail is not None:
|
||||
thumbnail = thumbnail.replace('-large', '-t500x500')
|
||||
return {
|
||||
'id': info['id'],
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
ext = info.get('original_format', u'mp3')
|
||||
result = {
|
||||
'id': track_id,
|
||||
'uploader': info['user']['username'],
|
||||
'upload_date': unified_strdate(info['created_at']),
|
||||
'title': info['title'],
|
||||
'ext': u'mp3',
|
||||
'title': info['title'],
|
||||
'description': info['description'],
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
if info.get('downloadable', False):
|
||||
# We can build a direct link to the song
|
||||
format_url = (
|
||||
u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
||||
track_id, self._CLIENT_ID))
|
||||
result['formats'] = [{
|
||||
'format_id': 'download',
|
||||
'ext': ext,
|
||||
'url': format_url,
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
else:
|
||||
# We have to retrieve the url
|
||||
stream_json = self._download_webpage(
|
||||
'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._IPHONE_CLIENT_ID),
|
||||
track_id, u'Downloading track url')
|
||||
|
||||
formats = []
|
||||
format_dict = json.loads(stream_json)
|
||||
for key, stream_url in format_dict.items():
|
||||
if key.startswith(u'http'):
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'ext': ext,
|
||||
'url': stream_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
elif key.startswith(u'rtmp'):
|
||||
# The url doesn't have an rtmp app, we have to extract the playpath
|
||||
url, path = stream_url.split('mp3:', 1)
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'url': url,
|
||||
'play_path': 'mp3:' + path,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
if not formats:
|
||||
# We fallback to the stream_url in the original info, this
|
||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||
formats.append({
|
||||
'format_id': u'fallback',
|
||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
def format_pref(f):
|
||||
if f['format_id'].startswith('http'):
|
||||
return 2
|
||||
if f['format_id'].startswith('rtmp'):
|
||||
return 1
|
||||
return 0
|
||||
|
||||
formats.sort(key=format_pref)
|
||||
result['formats'] = formats
|
||||
|
||||
return result
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
@ -106,70 +182,8 @@ class SoundcloudIE(InfoExtractor):
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
||||
IE_NAME = u'soundcloud:set'
|
||||
_TEST = {
|
||||
u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
|
||||
u"playlist": [
|
||||
{
|
||||
u"file":"30510138.mp3",
|
||||
u"md5":"f9136bf103901728f29e419d2c70f55d",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20111213",
|
||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"D-D-Dance"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"47127625.mp3",
|
||||
u"md5":"09b6758a018470570f8fd423c9453dd8",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120521",
|
||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"The Royal Concept - Gimme Twice"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"47127627.mp3",
|
||||
u"md5":"154abd4e418cea19c3b901f1e1306d9c",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120521",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"Goldrushed"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"47127629.mp3",
|
||||
u"md5":"2f5471edc79ad3f33a683153e96a79c1",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120521",
|
||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"In the End"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"47127631.mp3",
|
||||
u"md5":"f9ba87aa940af7213f98949254f1c6e2",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120521",
|
||||
u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"Knocked Up"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"file":"75206121.mp3",
|
||||
u"md5":"f9d1fe9406717e302980c30de4af9353",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20130116",
|
||||
u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
|
||||
u"uploader": u"The Royal Concept",
|
||||
u"title": u"World On Fire"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
# it's in tests/test_playlists.py
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -188,7 +202,6 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
resolv_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(resolv_url, full_title)
|
||||
|
||||
videos = []
|
||||
info = json.loads(info_json)
|
||||
if 'errors' in info:
|
||||
for err in info['errors']:
|
||||
@ -208,7 +221,7 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
IE_NAME = u'soundcloud:user'
|
||||
|
||||
# it's in tests/test_playlists.py
|
||||
_TEST = None
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@ -5,21 +5,19 @@ from .mtv import MTVIE, _media_xml_tag
|
||||
|
||||
class SouthParkStudiosIE(MTVIE):
|
||||
IE_NAME = u'southparkstudios.com'
|
||||
_VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
|
||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||
|
||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||
|
||||
_TEST = {
|
||||
# Overwrite MTVIE properties we don't want
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'Bat Daded',
|
||||
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||
},
|
||||
}
|
||||
|
||||
# Overwrite MTVIE properties we don't want
|
||||
_TESTS = []
|
||||
}]
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||
@ -31,8 +29,23 @@ class SouthParkStudiosIE(MTVIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
url = u'http://www.' + mobj.group(u'url')
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
|
||||
webpage, u'mgid')
|
||||
return self._get_videos_info(mgid)
|
||||
|
||||
class SouthparkDeIE(SouthParkStudiosIE):
|
||||
IE_NAME = u'southpark.de'
|
||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
||||
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'The Government Won\'t Respect My Privacy',
|
||||
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
},
|
||||
}]
|
||||
|
35
youtube_dl/extractor/space.py
Normal file
35
youtube_dl/extractor/space.py
Normal file
@ -0,0 +1,35 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveIE
|
||||
from ..utils import RegexNotFoundError, ExtractorError
|
||||
|
||||
|
||||
class SpaceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
|
||||
_TEST = {
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
|
||||
u'info_dict': {
|
||||
u'id': u'2780937028001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
|
||||
u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
|
||||
u'uploader': u'TechMedia Networks',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
try:
|
||||
# Some videos require the playerKey field, which isn't define in
|
||||
# the BrightcoveExperience object
|
||||
brightcove_url = self._og_search_video_url(webpage)
|
||||
except RegexNotFoundError:
|
||||
# Other videos works fine with the info from the object
|
||||
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if brightcove_url is None:
|
||||
raise ExtractorError(u'The webpage does not contain a video', expected=True)
|
||||
return self.url_result(brightcove_url, BrightcoveIE.ie_key())
|
74
youtube_dl/extractor/spankwire.py
Normal file
74
youtube_dl/extractor/spankwire.py
Normal file
@ -0,0 +1,74 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
|
||||
class SpankwireIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
||||
u'file': u'103545.mp4',
|
||||
u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
|
||||
u'info_dict': {
|
||||
u"uploader": u"oreusz",
|
||||
u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
|
||||
u"description": u"Crazy Bitch X rated music video.",
|
||||
u"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
|
||||
video_uploader = self._html_search_regex(
|
||||
r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||
password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[4].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
})
|
||||
formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
@ -6,14 +6,22 @@ from .common import InfoExtractor
|
||||
|
||||
class SpiegelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
u'file': u'1259285.mp4',
|
||||
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
||||
u'info_dict': {
|
||||
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||
u'file': u'1309159.mp4',
|
||||
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
|
||||
u'info_dict': {
|
||||
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
@ -21,25 +29,38 @@ class SpiegelIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>',
|
||||
webpage, u'title')
|
||||
video_title = self._html_search_regex(
|
||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
||||
|
||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
||||
xml_code = self._download_webpage(xml_url, video_id,
|
||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||
xml_code = self._download_webpage(
|
||||
xml_url, video_id,
|
||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
||||
|
||||
idoc = xml.etree.ElementTree.fromstring(xml_code)
|
||||
last_type = idoc[-1]
|
||||
filename = last_type.findall('./filename')[0].text
|
||||
duration = float(last_type.findall('./duration')[0].text)
|
||||
|
||||
video_url = 'http://video2.spiegel.de/flash/' + filename
|
||||
video_ext = filename.rpartition('.')[2]
|
||||
formats = [
|
||||
{
|
||||
'format_id': n.tag.rpartition('type')[2],
|
||||
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||
'width': int(n.find('./width').text),
|
||||
'height': int(n.find('./height').text),
|
||||
'abr': int(n.find('./audiobitrate').text),
|
||||
'vbr': int(n.find('./videobitrate').text),
|
||||
'vcodec': n.find('./codec').text,
|
||||
'acodec': 'MP4A',
|
||||
}
|
||||
for n in list(idoc)
|
||||
# Blacklist type 6, it's extremely LQ and not available on the same server
|
||||
if n.tag.startswith('type') and n.tag != 'type6'
|
||||
]
|
||||
formats.sort(key=lambda f: f['vbr'])
|
||||
duration = float(idoc[0].findall('./duration')[0].text)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
'title': video_title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
return [info]
|
||||
return info
|
||||
|
66
youtube_dl/extractor/streamcloud.py
Normal file
66
youtube_dl/extractor/streamcloud.py
Normal file
@ -0,0 +1,66 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class StreamcloudIE(InfoExtractor):
|
||||
IE_NAME = u'streamcloud.eu'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
u'file': u'skp9j99s4bpz.mp4',
|
||||
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
|
||||
u'info_dict': {
|
||||
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
||||
u'duration': 9,
|
||||
},
|
||||
u'skip': u'Only available from the EU'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
fields = re.findall(r'''(?x)<input\s+
|
||||
type="(?:hidden|submit)"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', orig_webpage)
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
|
||||
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||
time.sleep(12)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note=u'Downloading video page ...')
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)<', webpage, u'title')
|
||||
video_url = self._search_regex(
|
||||
r'file:\s*"([^"]+)"', webpage, u'video URL')
|
||||
duration_str = self._search_regex(
|
||||
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
|
||||
duration = None if duration_str is None else int(duration_str)
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
return any([self._downloader.params.get('writesubtitles', False),
|
||||
self._downloader.params.get('writeautomaticsub')])
|
||||
|
||||
def _list_available_subtitles(self, video_id, webpage=None):
|
||||
def _list_available_subtitles(self, video_id, webpage):
|
||||
""" outputs the available subtitles for the video """
|
||||
sub_lang_list = self._get_available_subtitles(video_id)
|
||||
sub_lang_list = self._get_available_subtitles(video_id, webpage)
|
||||
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
|
||||
sub_lang = ",".join(list(sub_lang_list.keys()))
|
||||
self.to_screen(u'%s: Available subtitles for video: %s' %
|
||||
@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
self.to_screen(u'%s: Available automatic captions for video: %s' %
|
||||
(video_id, auto_lang))
|
||||
|
||||
def extract_subtitles(self, video_id, video_webpage=None):
|
||||
def extract_subtitles(self, video_id, webpage):
|
||||
"""
|
||||
returns {sub_lang: sub} ,{} if subtitles not found or None if the
|
||||
subtitles aren't requested.
|
||||
@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
return None
|
||||
available_subs_list = {}
|
||||
if self._downloader.params.get('writeautomaticsub', False):
|
||||
available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
|
||||
available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
|
||||
if self._downloader.params.get('writesubtitles', False):
|
||||
available_subs_list.update(self._get_available_subtitles(video_id))
|
||||
available_subs_list.update(self._get_available_subtitles(video_id, webpage))
|
||||
|
||||
if not available_subs_list: # error, it didn't get the available subtitles
|
||||
return {}
|
||||
@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
return
|
||||
return sub
|
||||
|
||||
def _get_available_subtitles(self, video_id):
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
"""
|
||||
returns {sub_lang: url} or {} if not available
|
||||
Must be redefined by the subclasses
|
||||
|
45
youtube_dl/extractor/sztvhu.py
Normal file
45
youtube_dl/extractor/sztvhu.py
Normal file
@ -0,0 +1,45 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class SztvHuIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
|
||||
u'file': u'20130909.mp4',
|
||||
u'md5': u'a6df607b11fb07d0e9f2ad94613375cb',
|
||||
u'info_dict': {
|
||||
u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
|
||||
u"description": u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
|
||||
},
|
||||
u'skip': u'Service temporarily disabled as of 2013-11-20'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_file = self._search_regex(
|
||||
r'file: "...:(.*?)",', webpage, 'video file')
|
||||
title = self._html_search_regex(
|
||||
r'<meta name="title" content="([^"]*?) - [^-]*? - [^-]*?"',
|
||||
webpage, 'video title')
|
||||
description = self._html_search_regex(
|
||||
r'<meta name="description" content="([^"]*)"/>',
|
||||
webpage, 'video description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
video_url = 'http://media.sztv.hu/vod/' + video_file
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': determine_ext(video_url),
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@ -11,7 +12,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||
u'file': u'19705.mp4',
|
||||
u'md5': u'27b6f7527da5acf534b15f21b032656e',
|
||||
u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
|
||||
u'info_dict': {
|
||||
u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||
u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
|
||||
@ -31,16 +32,40 @@ class TeamcocoIE(InfoExtractor):
|
||||
self.report_extraction(video_id)
|
||||
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||
data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
|
||||
data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
|
||||
|
||||
video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
|
||||
data, u'video URL')
|
||||
|
||||
return [{
|
||||
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
||||
formats = []
|
||||
for file in data.findall('files/file'):
|
||||
if file.attrib.get('playmode') == 'all':
|
||||
# it just duplicates one of the entries
|
||||
break
|
||||
file_url = file.text
|
||||
m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
|
||||
if m_format is not None:
|
||||
format_id = m_format.group(1)
|
||||
else:
|
||||
format_id = file.attrib['bitrate']
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
})
|
||||
def sort_key(f):
|
||||
try:
|
||||
return qualities.index(f['format_id'])
|
||||
except ValueError:
|
||||
return -1
|
||||
formats.sort(key=sort_key)
|
||||
if not formats:
|
||||
raise ExtractorError(u'Unable to extract video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'formats': formats,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
}]
|
||||
}
|
||||
|
65
youtube_dl/extractor/techtalks.py
Normal file
65
youtube_dl/extractor/techtalks.py
Normal file
@ -0,0 +1,65 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class TechTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
|
||||
u'playlist': [
|
||||
{
|
||||
u'file': u'57758.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
{
|
||||
u'file': u'57758-slides.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Learning Topic Models --- Going beyond SVD',
|
||||
},
|
||||
},
|
||||
],
|
||||
u'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
talk_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, talk_id)
|
||||
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
|
||||
u'rtmp url')
|
||||
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
|
||||
webpage, u'presenter play path')
|
||||
title = clean_html(get_element_by_attribute('class', 'title', webpage))
|
||||
video_info = {
|
||||
'id': talk_id,
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
}
|
||||
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
|
||||
if m_slides is None:
|
||||
return video_info
|
||||
else:
|
||||
return [
|
||||
video_info,
|
||||
# The slides video
|
||||
{
|
||||
'id': talk_id + '-slides',
|
||||
'title': title,
|
||||
'url': rtmp_url,
|
||||
'play_path': m_slides.group(1),
|
||||
'ext': 'flv',
|
||||
},
|
||||
]
|
@ -1,10 +1,13 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
RegexNotFoundError,
|
||||
)
|
||||
|
||||
class TEDIE(InfoExtractor):
|
||||
class TEDIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL=r'''http://www\.ted\.com/
|
||||
(
|
||||
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
||||
@ -32,33 +35,32 @@ class TEDIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
m=re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if m.group('type_talk'):
|
||||
return [self._talk_info(url)]
|
||||
return self._talk_info(url)
|
||||
else :
|
||||
playlist_id=m.group('playlist_id')
|
||||
name=m.group('name')
|
||||
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
|
||||
return [self._playlist_videos_info(url,name,playlist_id)]
|
||||
|
||||
def _playlist_videos_info(self,url,name,playlist_id=0):
|
||||
|
||||
def _playlist_videos_info(self, url, name, playlist_id):
|
||||
'''Returns the videos of the playlist'''
|
||||
video_RE=r'''
|
||||
<li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)"
|
||||
([.\s]*?)data-playlist_item_id="(\d+)"
|
||||
([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
|
||||
'''
|
||||
video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
|
||||
webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
|
||||
m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
|
||||
m_names=re.finditer(video_name_RE,webpage)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, u'Downloading playlist webpage')
|
||||
matches = re.finditer(
|
||||
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
|
||||
webpage)
|
||||
|
||||
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
|
||||
webpage, 'playlist title')
|
||||
|
||||
playlist_entries = []
|
||||
for m_video, m_name in zip(m_videos,m_names):
|
||||
talk_url='http://www.ted.com%s' % m_name.group('talk_url')
|
||||
playlist_entries.append(self.url_result(talk_url, 'TED'))
|
||||
return self.playlist_result(playlist_entries, playlist_id = playlist_id, playlist_title = playlist_title)
|
||||
playlist_entries = [
|
||||
self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
|
||||
for m in matches
|
||||
]
|
||||
return self.playlist_result(
|
||||
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
|
||||
|
||||
def _talk_info(self, url, video_id=0):
|
||||
"""Return the video for the talk in the url"""
|
||||
@ -81,16 +83,35 @@ class TEDIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'url': stream['file'],
|
||||
'format': stream['id']
|
||||
} for stream in info['htmlStreams']]
|
||||
info = {
|
||||
'id': info['id'],
|
||||
} for stream in info['htmlStreams']]
|
||||
|
||||
video_id = info['id']
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, webpage)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': desc,
|
||||
'subtitles': video_subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
# TODO: Remove when #980 has been merged
|
||||
info.update(info['formats'][-1])
|
||||
|
||||
return info
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
|
||||
languages = re.findall(r'(?:<option value=")(\S+)"', options)
|
||||
if languages:
|
||||
sub_lang_list = {}
|
||||
for l in languages:
|
||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||
sub_lang_list[l] = url
|
||||
return sub_lang_list
|
||||
except RegexNotFoundError:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
74
youtube_dl/extractor/toutv.py
Normal file
74
youtube_dl/extractor/toutv.py
Normal file
@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class TouTvIE(InfoExtractor):
|
||||
IE_NAME = u'tou.tv'
|
||||
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.tou.tv/30-vies/S04E41',
|
||||
u'file': u'30-vies_S04E41.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'30 vies Saison 4 / Épisode 41',
|
||||
u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||
u'age_limit': 8,
|
||||
u'uploader': u'Groupe des Nouveaux Médias',
|
||||
u'duration': 1296,
|
||||
u'upload_date': u'20131118',
|
||||
u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True, # Requires rtmpdump
|
||||
},
|
||||
u'skip': 'Only available in Canada'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mediaId = self._search_regex(
|
||||
r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
|
||||
|
||||
streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||
streams_webpage = self._download_webpage(
|
||||
streams_url, video_id, note=u'Downloading stream list')
|
||||
|
||||
streams_doc = xml.etree.ElementTree.fromstring(
|
||||
streams_webpage.encode('utf-8'))
|
||||
video_url = next(n.text
|
||||
for n in streams_doc.findall('.//choice/url')
|
||||
if u'//ad.doubleclick' not in n.text)
|
||||
if video_url.endswith('/Unavailable.flv'):
|
||||
raise ExtractorError(
|
||||
u'Access to this video is blocked from outside of Canada',
|
||||
expected=True)
|
||||
|
||||
duration_str = self._html_search_meta(
|
||||
'video:duration', webpage, u'duration')
|
||||
duration = int(duration_str) if duration_str else None
|
||||
upload_date_str = self._html_search_meta(
|
||||
'video:release_date', webpage, u'upload date')
|
||||
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': self._dc_search_uploader(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': self._media_rating_search(webpage),
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'ext': 'mp4',
|
||||
}
|
63
youtube_dl/extractor/tube8.py
Normal file
63
youtube_dl/extractor/tube8.py
Normal file
@ -0,0 +1,63 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
|
||||
class Tube8IE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
u'file': u'229795.mp4',
|
||||
u'md5': u'e9e0b0c86734e5e3766e653509475db0',
|
||||
u'info_dict': {
|
||||
u"description": u"hot teen Kasia grinding",
|
||||
u"uploader": u"unknown",
|
||||
u"title": u"Kasia music video",
|
||||
u"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'videotitle ="([^"]+)', webpage, u'title')
|
||||
video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
|
||||
video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = thumbnail.replace('\\/', '/')
|
||||
|
||||
video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
|
||||
if webpage.find('"encrypted":true')!=-1:
|
||||
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
|
||||
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[4].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': video_description,
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
'age_limit': 18,
|
||||
}
|
@ -7,15 +7,25 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class TudouIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
|
||||
u'file': u'159448201.f4v',
|
||||
u'md5': u'140a49ed444bd22f93330985d8475fcb',
|
||||
u'info_dict': {
|
||||
u"title": u"卡马乔国足开大脚长传冲吊集锦"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
|
||||
u'file': u'todo.mp4',
|
||||
u'md5': u'todo.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'todo.mp4',
|
||||
},
|
||||
u'add_ie': [u'Youku'],
|
||||
u'skip': u'Only works from China'
|
||||
}]
|
||||
|
||||
def _url_for_id(self, id, quality = None):
|
||||
info_url = "http://v2.tudou.com/f?id="+str(id)
|
||||
@ -29,14 +39,19 @@ class TudouIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(2)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = re.search(",kw:\"(.+)\"",webpage)
|
||||
if title is None:
|
||||
title = re.search(",kw: \'(.+)\'",webpage)
|
||||
title = title.group(1)
|
||||
thumbnail_url = re.search(",pic: \'(.+?)\'",webpage)
|
||||
if thumbnail_url is None:
|
||||
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
|
||||
thumbnail_url = thumbnail_url.group(1)
|
||||
|
||||
m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
|
||||
if m and m.group(1):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': u'youku:' + m.group(1),
|
||||
'ie_key': 'Youku'
|
||||
}
|
||||
|
||||
title = self._search_regex(
|
||||
r",kw:\s*['\"](.+?)[\"']", webpage, u'title')
|
||||
thumbnail_url = self._search_regex(
|
||||
r",pic:\s*[\"'](.+?)[\"']", webpage, u'thumbnail URL', fatal=False)
|
||||
|
||||
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
|
||||
segments = json.loads(segs_json)
|
||||
|
42
youtube_dl/extractor/tvp.py
Normal file
42
youtube_dl/extractor/tvp.py
Normal file
@ -0,0 +1,42 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TvpIE(InfoExtractor):
|
||||
IE_NAME = u'tvp.pl'
|
||||
_VALID_URL = r'https?://www\.tvp\.pl/.*?wideo/(?P<date>\d+)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.tvp.pl/warszawa/magazyny/campusnews/wideo/31102013/12878238',
|
||||
u'md5': u'148408967a6a468953c0a75cbdaf0d7a',
|
||||
u'file': u'12878238.wmv',
|
||||
u'info_dict': {
|
||||
u'title': u'31.10.2013 - Odcinek 2',
|
||||
u'description': u'31.10.2013 - Odcinek 2',
|
||||
},
|
||||
u'skip': u'Download has to use same server IP as extraction. Therefore, a good (load-balancing) DNS resolver will make the download fail.'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id
|
||||
json_params = self._download_webpage(
|
||||
json_url, video_id, u"Downloading video metadata")
|
||||
|
||||
params = json.loads(json_params)
|
||||
self.report_extraction(video_id)
|
||||
video_url = params['video_url']
|
||||
|
||||
title = self._og_search_title(webpage, fatal=True)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'ext': 'wmv',
|
||||
'url': video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@ -1,53 +1,128 @@
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_HTTPError,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class VevoIE(InfoExtractor):
|
||||
"""
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE)
|
||||
"""
|
||||
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
u'file': u'GB1101300280.mp4',
|
||||
u'md5': u'06bea460acb744eab74a9d7dcb4bfd61',
|
||||
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130624",
|
||||
u"uploader": u"Hurts",
|
||||
u"title": u"Somebody to Die For"
|
||||
u"title": u"Somebody to Die For",
|
||||
u"duration": 230,
|
||||
u"width": 1920,
|
||||
u"height": 1080,
|
||||
}
|
||||
}
|
||||
}]
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||
|
||||
def _formats_from_json(self, video_info):
|
||||
last_version = {'version': -1}
|
||||
for version in video_info['videoVersions']:
|
||||
# These are the HTTP downloads, other types are for different manifests
|
||||
if version['sourceType'] == 2:
|
||||
if version['version'] > last_version['version']:
|
||||
last_version = version
|
||||
if last_version['version'] == -1:
|
||||
raise ExtractorError(u'Unable to extract last version of the video')
|
||||
|
||||
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
||||
formats = []
|
||||
# Already sorted from worst to best quality
|
||||
for rend in renditions.findall('rendition'):
|
||||
attr = rend.attrib
|
||||
format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
|
||||
formats.append({
|
||||
'url': attr['url'],
|
||||
'format_id': attr['name'],
|
||||
'format_note': format_note,
|
||||
'height': int(attr['frameheight']),
|
||||
'width': int(attr['frameWidth']),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _formats_from_smil(self, smil_xml):
|
||||
formats = []
|
||||
smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
|
||||
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
for el in els:
|
||||
src = el.attrib['src']
|
||||
m = re.match(r'''(?xi)
|
||||
(?P<ext>[a-z0-9]+):
|
||||
(?P<path>
|
||||
[/a-z0-9]+ # The directory and main part of the URL
|
||||
_(?P<cbr>[0-9]+)k
|
||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
_(?P<vcodec>[a-z0-9]+)
|
||||
_(?P<vbr>[0-9]+)
|
||||
_(?P<acodec>[a-z0-9]+)
|
||||
_(?P<abr>[0-9]+)
|
||||
\.[a-z0-9]+ # File extension
|
||||
)''', src)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': u'SMIL_' + m.group('cbr'),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'abr': int(m.group('abr')),
|
||||
'ext': m.group('ext'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
json_url = 'http://www.vevo.com/data/video/%s' % video_id
|
||||
base_url = 'http://smil.lvl3.vevo.com'
|
||||
videos_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (base_url, video_id, video_id.lower())
|
||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
|
||||
links_webpage = self._download_webpage(videos_url, video_id, u'Downloading videos urls')
|
||||
video_info = json.loads(info_json)['video']
|
||||
|
||||
self.report_extraction(video_id)
|
||||
video_info = json.loads(info_json)
|
||||
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
|
||||
if m_urls is None or len(m_urls) == 0:
|
||||
raise ExtractorError(u'Unable to extract video url')
|
||||
# They are sorted from worst to best quality
|
||||
m_url = m_urls[-1]
|
||||
video_url = base_url + '/' + m_url.group('url')
|
||||
ext = m_url.group('ext')
|
||||
formats = self._formats_from_json(video_info)
|
||||
try:
|
||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||
smil_xml = self._download_webpage(smil_url, video_id,
|
||||
u'Downloading SMIL info')
|
||||
formats.extend(self._formats_from_smil(smil_xml))
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError):
|
||||
raise
|
||||
self._downloader.report_warning(
|
||||
u'Cannot download SMIL information, falling back to JSON ..')
|
||||
|
||||
return {'url': video_url,
|
||||
'ext': ext,
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'thumbnail': video_info['img'],
|
||||
'upload_date': video_info['launchDate'].replace('/',''),
|
||||
'uploader': video_info['Artists'][0]['title'],
|
||||
}
|
||||
timestamp_ms = int(self._search_regex(
|
||||
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
|
||||
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': video_info['imageUrl'],
|
||||
'upload_date': upload_date.strftime('%Y%m%d'),
|
||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||
'duration': video_info['duration'],
|
||||
}
|
||||
|
||||
return info
|
||||
|
@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.viddler.com/v/43903784",
|
||||
u'file': u'43903784.mp4',
|
||||
|
30
youtube_dl/extractor/videodetective.py
Normal file
30
youtube_dl/extractor/videodetective.py
Normal file
@ -0,0 +1,30 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class VideoDetectiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
|
||||
u'file': u'194487.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'KICK-ASS 2',
|
||||
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
|
||||
u'duration': 135,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage)
|
||||
query = compat_urlparse.urlparse(og_video).query
|
||||
return self.url_result(InternetVideoArchiveIE._build_url(query),
|
||||
ie=InternetVideoArchiveIE.ie_key())
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user