Compare commits
323 Commits
2015.02.02
...
2015.02.24
Author | SHA1 | Date | |
---|---|---|---|
4f3b21e1c7 | |||
54233c9080 | |||
db8e13ef71 | |||
5a42414b9c | |||
9c665ab72e | |||
b665ba6aa6 | |||
ec5913b5cd | |||
25ac63ed71 | |||
99209c2916 | |||
1fbaa0a521 | |||
3037b91e05 | |||
ffdf972b91 | |||
459e5fbd5f | |||
bfc993cc91 | |||
4432db35d9 | |||
591ab1dff9 | |||
5bca2424bc | |||
bd61a9e770 | |||
3438e7acd2 | |||
09c200acf2 | |||
716889cab1 | |||
409693984f | |||
04e8c11080 | |||
80af2b73ab | |||
3cc57f9645 | |||
a65d4e7f14 | |||
b531cfc019 | |||
543ec2136b | |||
93b5071f73 | |||
ddc369f073 | |||
fcc3e6138b | |||
9fe6ef7ab2 | |||
c010af6f19 | |||
35b7982303 | |||
f311cfa231 | |||
80970e531b | |||
b7bb76df05 | |||
98c70d6fc7 | |||
ab84349b16 | |||
03091e372f | |||
4d17184817 | |||
e086e0eb6c | |||
314368c822 | |||
c5181ab410 | |||
ea5152cae1 | |||
255fca5eea | |||
4aeccadf4e | |||
93540ee10e | |||
8fb3ac3649 | |||
77b2986b5b | |||
62b013df0d | |||
fad6768bd1 | |||
a78125f925 | |||
a00a8bcc8a | |||
1e9a9e167d | |||
3da0db62e6 | |||
e14ced7918 | |||
ab9d02f53b | |||
a461a11989 | |||
1bd838608f | |||
365577f567 | |||
50efb383f0 | |||
5da6bd0083 | |||
5e9a033e6e | |||
fb7cb6823e | |||
dd0a58f5f0 | |||
a21420389e | |||
6140baf4e1 | |||
8fc642eb5b | |||
e66e1a0046 | |||
d5c69f1da4 | |||
f13b1e7d7f | |||
5c8a3f862a | |||
8807f1277f | |||
a3b9157f49 | |||
b88ba05356 | |||
b74d505577 | |||
9e2d7dca87 | |||
d236b37ac9 | |||
e880c66bd8 | |||
383456aa29 | |||
1a13940c8d | |||
3d54788495 | |||
71d53ace2f | |||
f37e3f99f0 | |||
bd03ffc16e | |||
1ac1af9b47 | |||
3bf5705316 | |||
1c2528c8a3 | |||
7bd15b1a03 | |||
6b961a85fd | |||
7707004043 | |||
a025d3c5a5 | |||
c460bdd56b | |||
b81a359eb6 | |||
d61aefb24c | |||
d305dd73a3 | |||
93a16ba238 | |||
4f7cea6c53 | |||
afbdd3acc3 | |||
85d5866177 | |||
9789d7535d | |||
d8443cd3f7 | |||
d47c26e168 | |||
01561da142 | |||
0af25f784b | |||
b9b42f2ea0 | |||
311c393838 | |||
18c1c42405 | |||
37dd5d4629 | |||
81975f4693 | |||
b8b928d5cb | |||
3eff81fbf7 | |||
785521bf4f | |||
6d1a55a521 | |||
9cad27008b | |||
11e611a7fa | |||
72c1f8de06 | |||
6e99868e4c | |||
4d278fde64 | |||
f21e915fb9 | |||
6f53c63df6 | |||
1def5f359e | |||
15ec669374 | |||
a3fa5da496 | |||
30965ac66a | |||
09ab40b7d1 | |||
edab9dbf4d | |||
9868ea4936 | |||
85920dd01d | |||
fa15607773 | |||
a91a2c1a83 | |||
16e7711e22 | |||
5cda4eda72 | |||
98f000409f | |||
bd7fe0cf66 | |||
48246541da | |||
4a8d4a53b1 | |||
4cd95bcbc3 | |||
be24c8697f | |||
0d93378887 | |||
4069766c52 | |||
7010577720 | |||
8ac27a68e6 | |||
46312e0b46 | |||
f9216ed6ad | |||
65bf37ef83 | |||
f740fae2a4 | |||
fbc503d696 | |||
662435f728 | |||
163d966707 | |||
85729c51af | |||
360e1ca5cc | |||
a1f2a06b34 | |||
c84dd8a90d | |||
65469a7f8b | |||
6b597516c1 | |||
b5857f62e2 | |||
a504ced097 | |||
1db5fbcfe3 | |||
59b8ab5834 | |||
a568180441 | |||
85e80f71cd | |||
bfa6bdcd8b | |||
03cd72b007 | |||
5bfd430f81 | |||
73fac4e911 | |||
8fb474fb17 | |||
f813928e4b | |||
b9c7a97318 | |||
9fb2f1cd6d | |||
6ca7732d5e | |||
b0ab0fac49 | |||
a294bce82f | |||
76d1466b08 | |||
1888d3f7b3 | |||
c2787701cc | |||
52e1d0ccc4 | |||
10e3c4c221 | |||
68f2d273bf | |||
7c86c21662 | |||
ae1580d790 | |||
3215c50f25 | |||
36f73e8044 | |||
a4f3d779db | |||
d9aa2b784d | |||
cffcbc02de | |||
9347fddbfc | |||
037e9437e4 | |||
36e7a4ca2e | |||
ae6423d704 | |||
7105440cec | |||
c80b9cd280 | |||
171ca612af | |||
c3d64fc1b3 | |||
7c24ce225d | |||
08b38d5401 | |||
024c53694d | |||
7e6011101f | |||
c40feaba77 | |||
5277f09dfc | |||
2d30521ab9 | |||
050fa43561 | |||
f36f92f4da | |||
124f3bc67d | |||
d304209a85 | |||
8367d3f3cb | |||
c56d7d899d | |||
ea5db8469e | |||
3811c567e7 | |||
8708d76425 | |||
054fe3cc40 | |||
af0d11f244 | |||
9650885be9 | |||
596ac6e31f | |||
612ee37365 | |||
442c37b7a9 | |||
04bbe41330 | |||
8f84f57183 | |||
6a78740211 | |||
c0e1a415fd | |||
bf8f082a90 | |||
2f543a2142 | |||
7e5db8c930 | |||
f7a211dcc8 | |||
845734773d | |||
347de4931c | |||
8829650513 | |||
c73fae1e2e | |||
834bf069d2 | |||
c06a9fa34f | |||
753fad4adc | |||
34814eb66e | |||
3a5bcd0326 | |||
99c2398bc6 | |||
28f1272870 | |||
f18e3a2fc0 | |||
c4c5dc27cb | |||
2caf182f37 | |||
43f244b6d5 | |||
1309b396d0 | |||
ba61796458 | |||
3255fe7141 | |||
e98b8e79ea | |||
196121c51b | |||
5269028951 | |||
f7bc056b5a | |||
a0f7198544 | |||
dd8930684e | |||
bdb186f3b0 | |||
64f9baa084 | |||
b29231c040 | |||
6128bf07a9 | |||
2ec19e9558 | |||
9ddb6925bf | |||
12931e1c6e | |||
41c23b0da5 | |||
2578ab19e4 | |||
d87ec897e9 | |||
3bd4bffb1c | |||
c36b09a502 | |||
641eb10d34 | |||
955c5505e7 | |||
69319969de | |||
a14292e848 | |||
5d678df64a | |||
8ca8cbe2bd | |||
ba322d8209 | |||
2f38289b79 | |||
f23a3ca699 | |||
77d2b106cc | |||
c0e46412e9 | |||
0161353d7d | |||
2b4ecde2c8 | |||
b3a286d69d | |||
467d3c9a0c | |||
ad5747bad1 | |||
d6eb66ed3c | |||
7f2a9f1b49 | |||
1e1896f2de | |||
c831973366 | |||
1a2548d9e9 | |||
3900eec27c | |||
a02d212638 | |||
9c91a8fa70 | |||
41469f335e | |||
67ce4f8820 | |||
bc63d56cca | |||
c893d70805 | |||
3ee6e02564 | |||
e3aaace400 | |||
300753a069 | |||
f13b88c616 | |||
60ca389c64 | |||
1b0f3919c1 | |||
6a348cf7d5 | |||
9e91449c8d | |||
25e5ebf382 | |||
7dfc356625 | |||
58ba6c0160 | |||
f076b63821 | |||
12f0454cd6 | |||
cd7342755f | |||
9bb8e0a3f9 | |||
1a6373ef39 | |||
f6c24009be | |||
d862042301 | |||
23d9ded655 | |||
4c1a017e69 | |||
ee623d9247 | |||
330537d08a | |||
2cf0ecac7b | |||
d200b11c7e | |||
d0eca21021 | |||
c1147c05e1 | |||
55898ad2cf | |||
a465808592 | |||
5c4862bad4 | |||
995029a142 | |||
a57b562cff | |||
531572578e | |||
3a4cca687f | |||
7d3d06a16c |
4
AUTHORS
4
AUTHORS
@ -108,3 +108,7 @@ Enam Mijbah Noor
|
|||||||
David Luhmer
|
David Luhmer
|
||||||
Shaya Goldberg
|
Shaya Goldberg
|
||||||
Paul Hartmann
|
Paul Hartmann
|
||||||
|
Frans de Jonge
|
||||||
|
Robin de Rooij
|
||||||
|
Ryan Schmidt
|
||||||
|
Leslie P. Polzer
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
**Please include the full output of youtube-dl when run with `-v`**.
|
||||||
|
|
||||||
|
The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||||
|
|
||||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||||
|
|
||||||
@ -122,7 +124,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/__init__.py
|
$ git add youtube_dl/extractor/__init__.py
|
||||||
|
7
Makefile
7
Makefile
@ -1,10 +1,7 @@
|
|||||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||||
|
|
||||||
cleanall: clean
|
|
||||||
rm -f youtube-dl youtube-dl.exe
|
|
||||||
|
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
BINDIR ?= $(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
@ -46,7 +43,7 @@ test:
|
|||||||
ot: offlinetest
|
ot: offlinetest
|
||||||
|
|
||||||
offlinetest: codetest
|
offlinetest: codetest
|
||||||
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
|
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
|
||||||
|
|
||||||
tar: youtube-dl.tar.gz
|
tar: youtube-dl.tar.gz
|
||||||
|
|
||||||
|
73
README.md
73
README.md
@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
on Windows)
|
on Windows)
|
||||||
--flat-playlist Do not extract the videos of a playlist,
|
--flat-playlist Do not extract the videos of a playlist,
|
||||||
only list them.
|
only list them.
|
||||||
|
--no-color Do not emit color codes in output.
|
||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||||
@ -119,6 +120,23 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
COUNT views
|
COUNT views
|
||||||
--max-views COUNT Do not download any videos with more than
|
--max-views COUNT Do not download any videos with more than
|
||||||
COUNT views
|
COUNT views
|
||||||
|
--match-filter FILTER (Experimental) Generic video filter.
|
||||||
|
Specify any key (see help for -o for a list
|
||||||
|
of available keys) to match if the key is
|
||||||
|
present, !key to check if the key is not
|
||||||
|
present,key > NUMBER (like "comment_count >
|
||||||
|
12", also works with >=, <, <=, !=, =) to
|
||||||
|
compare against a number, and & to require
|
||||||
|
multiple matches. Values which are not
|
||||||
|
known are excluded unless you put a
|
||||||
|
question mark (?) after the operator.For
|
||||||
|
example, to only match videos that have
|
||||||
|
been liked more than 100 times and disliked
|
||||||
|
less than 50 times (or the dislike
|
||||||
|
functionality is not available at the given
|
||||||
|
service), but who also have a description,
|
||||||
|
use --match-filter "like_count > 100 &
|
||||||
|
dislike_count <? 50 & description" .
|
||||||
--no-playlist If the URL refers to a video and a
|
--no-playlist If the URL refers to a video and a
|
||||||
playlist, download only the video.
|
playlist, download only the video.
|
||||||
--age-limit YEARS download only videos suitable for the given
|
--age-limit YEARS download only videos suitable for the given
|
||||||
@ -143,6 +161,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--playlist-reverse Download playlist videos in reverse order
|
--playlist-reverse Download playlist videos in reverse order
|
||||||
--xattr-set-filesize (experimental) set file xattribute
|
--xattr-set-filesize (experimental) set file xattribute
|
||||||
ytdl.filesize with expected filesize
|
ytdl.filesize with expected filesize
|
||||||
|
--hls-prefer-native (experimental) Use the native HLS
|
||||||
|
downloader instead of ffmpeg.
|
||||||
--external-downloader COMMAND (experimental) Use the specified external
|
--external-downloader COMMAND (experimental) Use the specified external
|
||||||
downloader. Currently supports
|
downloader. Currently supports
|
||||||
aria2c,curl,wget
|
aria2c,curl,wget
|
||||||
@ -292,18 +312,20 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
video results by putting a condition in
|
video results by putting a condition in
|
||||||
brackets, as in -f "best[height=720]" (or
|
brackets, as in -f "best[height=720]" (or
|
||||||
-f "[filesize>10M]"). This works for
|
-f "[filesize>10M]"). This works for
|
||||||
filesize, height, width, tbr, abr, vbr, and
|
filesize, height, width, tbr, abr, vbr,
|
||||||
fps and the comparisons <, <=, >, >=, =, !=
|
asr, and fps and the comparisons <, <=, >,
|
||||||
. Formats for which the value is not known
|
>=, =, != and for ext, acodec, vcodec,
|
||||||
are excluded unless you put a question mark
|
container, and protocol and the comparisons
|
||||||
(?) after the operator. You can combine
|
=, != . Formats for which the value is not
|
||||||
format filters, so -f "[height <=?
|
known are excluded unless you put a
|
||||||
720][tbr>500]" selects up to 720p videos
|
question mark (?) after the operator. You
|
||||||
(or videos where the height is not known)
|
can combine format filters, so -f "[height
|
||||||
with a bitrate of at least 500 KBit/s. By
|
<=? 720][tbr>500]" selects up to 720p
|
||||||
default, youtube-dl will pick the best
|
videos (or videos where the height is not
|
||||||
quality. Use commas to download multiple
|
known) with a bitrate of at least 500
|
||||||
audio formats, such as -f
|
KBit/s. By default, youtube-dl will pick
|
||||||
|
the best quality. Use commas to download
|
||||||
|
multiple audio formats, such as -f
|
||||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||||
You can merge the video and audio of two
|
You can merge the video and audio of two
|
||||||
formats into a single file using -f <video-
|
formats into a single file using -f <video-
|
||||||
@ -329,8 +351,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--all-subs downloads all the available subtitles of
|
--all-subs downloads all the available subtitles of
|
||||||
the video
|
the video
|
||||||
--list-subs lists all available subtitles for the video
|
--list-subs lists all available subtitles for the video
|
||||||
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt]
|
--sub-format FORMAT subtitle format, accepts formats
|
||||||
youtube only)
|
preference, for example: "ass/srt/best"
|
||||||
--sub-lang LANGS languages of the subtitles to download
|
--sub-lang LANGS languages of the subtitles to download
|
||||||
(optional) separated by commas, use IETF
|
(optional) separated by commas, use IETF
|
||||||
language tags like 'en,pt'
|
language tags like 'en,pt'
|
||||||
@ -377,6 +399,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
postprocessors (default)
|
postprocessors (default)
|
||||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||||
postprocessors
|
postprocessors
|
||||||
|
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||||
|
either the path to the binary or its
|
||||||
|
containing directory.
|
||||||
--exec CMD Execute a command on the file after
|
--exec CMD Execute a command on the file after
|
||||||
downloading, similar to find's -exec
|
downloading, similar to find's -exec
|
||||||
syntax. Example: --exec 'adb push {}
|
syntax. Example: --exec 'adb push {}
|
||||||
@ -490,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c
|
|||||||
|
|
||||||
### ERROR: no fmt_url_map or conn information found in video info
|
### ERROR: no fmt_url_map or conn information found in video info
|
||||||
|
|
||||||
youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
### ERROR: unable to download video ###
|
### ERROR: unable to download video ###
|
||||||
|
|
||||||
youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
|
### ExtractorError: Could not find JS function u'OF'
|
||||||
|
|
||||||
|
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
### SyntaxError: Non-ASCII character ###
|
### SyntaxError: Non-ASCII character ###
|
||||||
|
|
||||||
@ -532,9 +561,17 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
|||||||
youtube-dl -- -wNyEUrxzFU
|
youtube-dl -- -wNyEUrxzFU
|
||||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||||
|
|
||||||
|
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||||
|
|
||||||
|
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||||
|
|
||||||
|
A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization.
|
||||||
|
|
||||||
|
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
|
||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
|
||||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||||
|
|
||||||
@ -728,7 +765,7 @@ In particular, every site support request issue should only pertain to services
|
|||||||
|
|
||||||
### Is anyone going to need the feature?
|
### Is anyone going to need the feature?
|
||||||
|
|
||||||
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||||
|
|
||||||
### Is your question about youtube-dl?
|
### Is your question about youtube-dl?
|
||||||
|
|
||||||
|
@ -45,12 +45,12 @@ for test in get_testcases():
|
|||||||
|
|
||||||
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
|
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
|
||||||
|
|
||||||
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
|
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
|
||||||
or test['info_dict']['age_limit'] != 18):
|
test['info_dict']['age_limit'] != 18):
|
||||||
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
||||||
|
|
||||||
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
|
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
|
||||||
and test['info_dict']['age_limit'] == 18):
|
test['info_dict']['age_limit'] == 18):
|
||||||
print('\nPotential false negative: {0}'.format(test['name']))
|
print('\nPotential false negative: {0}'.format(test['name']))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -35,7 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us
|
|||||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||||
|
|
||||||
/bin/echo -e "\n### First of all, testing..."
|
/bin/echo -e "\n### First of all, testing..."
|
||||||
make cleanall
|
make clean
|
||||||
if $skip_tests ; then
|
if $skip_tests ; then
|
||||||
echo 'SKIPPING TESTS'
|
echo 'SKIPPING TESTS'
|
||||||
else
|
else
|
||||||
@ -45,9 +45,9 @@ fi
|
|||||||
/bin/echo -e "\n### Changing version in version.py..."
|
/bin/echo -e "\n### Changing version in version.py..."
|
||||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||||
|
|
||||||
/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
|
/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..."
|
||||||
make README.md
|
make README.md CONTRIBUTING.md supportedsites
|
||||||
git add README.md youtube_dl/version.py
|
git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py
|
||||||
git commit -m "release $version"
|
git commit -m "release $version"
|
||||||
|
|
||||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# Supported sites
|
# Supported sites
|
||||||
|
- **1tv**: Первый канал
|
||||||
- **1up.com**
|
- **1up.com**
|
||||||
- **220.ro**
|
- **220.ro**
|
||||||
- **24video**
|
- **24video**
|
||||||
@ -9,16 +10,21 @@
|
|||||||
- **8tracks**
|
- **8tracks**
|
||||||
- **9gag**
|
- **9gag**
|
||||||
- **abc.net.au**
|
- **abc.net.au**
|
||||||
|
- **Abc7News**
|
||||||
- **AcademicEarth:Course**
|
- **AcademicEarth:Course**
|
||||||
- **AddAnime**
|
- **AddAnime**
|
||||||
- **AdobeTV**
|
- **AdobeTV**
|
||||||
- **AdultSwim**
|
- **AdultSwim**
|
||||||
|
- **Aftenposten**
|
||||||
- **Aftonbladet**
|
- **Aftonbladet**
|
||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
|
- **AlphaPorno**
|
||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
- **AnySex**
|
- **AnySex**
|
||||||
- **Aparat**
|
- **Aparat**
|
||||||
|
- **AppleDailyAnimationNews**
|
||||||
|
- **AppleDailyRealtimeNews**
|
||||||
- **AppleTrailers**
|
- **AppleTrailers**
|
||||||
- **archive.org**: archive.org videos
|
- **archive.org**: archive.org videos
|
||||||
- **ARD**
|
- **ARD**
|
||||||
@ -30,8 +36,10 @@
|
|||||||
- **arte.tv:ddc**
|
- **arte.tv:ddc**
|
||||||
- **arte.tv:embed**
|
- **arte.tv:embed**
|
||||||
- **arte.tv:future**
|
- **arte.tv:future**
|
||||||
|
- **AtresPlayer**
|
||||||
|
- **ATTTechChannel**
|
||||||
- **audiomack**
|
- **audiomack**
|
||||||
- **AUEngine**
|
- **audiomack:album**
|
||||||
- **Azubu**
|
- **Azubu**
|
||||||
- **bambuser**
|
- **bambuser**
|
||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
@ -53,14 +61,19 @@
|
|||||||
- **Brightcove**
|
- **Brightcove**
|
||||||
- **BuzzFeed**
|
- **BuzzFeed**
|
||||||
- **BYUtv**
|
- **BYUtv**
|
||||||
|
- **Camdemy**
|
||||||
|
- **CamdemyFolder**
|
||||||
- **Canal13cl**
|
- **Canal13cl**
|
||||||
- **canalc2.tv**
|
- **canalc2.tv**
|
||||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
|
- **CBSSports**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
- **channel9**: Channel 9
|
- **channel9**: Channel 9
|
||||||
- **Chilloutzone**
|
- **Chilloutzone**
|
||||||
|
- **chirbit**
|
||||||
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
- **Cinemassacre**
|
- **Cinemassacre**
|
||||||
- **clipfish**
|
- **clipfish**
|
||||||
@ -71,8 +84,10 @@
|
|||||||
- **cmt.com**
|
- **cmt.com**
|
||||||
- **CNET**
|
- **CNET**
|
||||||
- **CNN**
|
- **CNN**
|
||||||
|
- **CNNArticle**
|
||||||
- **CNNBlogs**
|
- **CNNBlogs**
|
||||||
- **CollegeHumor**
|
- **CollegeHumor**
|
||||||
|
- **CollegeRama**
|
||||||
- **ComCarCoff**
|
- **ComCarCoff**
|
||||||
- **ComedyCentral**
|
- **ComedyCentral**
|
||||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||||
@ -82,32 +97,38 @@
|
|||||||
- **Crunchyroll**
|
- **Crunchyroll**
|
||||||
- **crunchyroll:playlist**
|
- **crunchyroll:playlist**
|
||||||
- **CSpan**: C-SPAN
|
- **CSpan**: C-SPAN
|
||||||
|
- **CtsNews**
|
||||||
- **culturebox.francetvinfo.fr**
|
- **culturebox.francetvinfo.fr**
|
||||||
- **dailymotion**
|
- **dailymotion**
|
||||||
- **dailymotion:playlist**
|
- **dailymotion:playlist**
|
||||||
- **dailymotion:user**
|
- **dailymotion:user**
|
||||||
- **daum.net**
|
- **daum.net**
|
||||||
- **DBTV**
|
- **DBTV**
|
||||||
|
- **DctpTv**
|
||||||
- **DeezerPlaylist**
|
- **DeezerPlaylist**
|
||||||
- **defense.gouv.fr**
|
- **defense.gouv.fr**
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **divxstage**: DivxStage
|
- **divxstage**: DivxStage
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
|
- **DRBonanza**
|
||||||
- **Dropbox**
|
- **Dropbox**
|
||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
- **DRTV**
|
- **DRTV**
|
||||||
- **Dump**
|
- **Dump**
|
||||||
- **dvtv**: http://video.aktualne.cz/
|
- **dvtv**: http://video.aktualne.cz/
|
||||||
- **EbaumsWorld**
|
- **EbaumsWorld**
|
||||||
|
- **EchoMsk**
|
||||||
- **eHow**
|
- **eHow**
|
||||||
- **Einthusan**
|
- **Einthusan**
|
||||||
- **eitb.tv**
|
- **eitb.tv**
|
||||||
- **EllenTV**
|
- **EllenTV**
|
||||||
- **EllenTV:clips**
|
- **EllenTV:clips**
|
||||||
- **ElPais**: El País
|
- **ElPais**: El País
|
||||||
|
- **Embedly**
|
||||||
- **EMPFlix**
|
- **EMPFlix**
|
||||||
- **Engadget**
|
- **Engadget**
|
||||||
- **Eporner**
|
- **Eporner**
|
||||||
|
- **EroProfile**
|
||||||
- **Escapist**
|
- **Escapist**
|
||||||
- **EveryonesMixtape**
|
- **EveryonesMixtape**
|
||||||
- **exfm**: ex.fm
|
- **exfm**: ex.fm
|
||||||
@ -120,7 +141,6 @@
|
|||||||
- **fernsehkritik.tv:postecke**
|
- **fernsehkritik.tv:postecke**
|
||||||
- **Firedrive**
|
- **Firedrive**
|
||||||
- **Firstpost**
|
- **Firstpost**
|
||||||
- **firsttv**: Видеоархив - Первый канал
|
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
@ -143,6 +163,7 @@
|
|||||||
- **GDCVault**
|
- **GDCVault**
|
||||||
- **generic**: Generic downloader that works on some sites
|
- **generic**: Generic downloader that works on some sites
|
||||||
- **GiantBomb**
|
- **GiantBomb**
|
||||||
|
- **Giga**
|
||||||
- **Glide**: Glide mobile video messages (glide.me)
|
- **Glide**: Glide mobile video messages (glide.me)
|
||||||
- **Globo**
|
- **Globo**
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
@ -153,9 +174,15 @@
|
|||||||
- **Grooveshark**
|
- **Grooveshark**
|
||||||
- **Groupon**
|
- **Groupon**
|
||||||
- **Hark**
|
- **Hark**
|
||||||
|
- **HearThisAt**
|
||||||
- **Heise**
|
- **Heise**
|
||||||
|
- **HellPorno**
|
||||||
- **Helsinki**: helsinki.fi
|
- **Helsinki**: helsinki.fi
|
||||||
- **HentaiStigma**
|
- **HentaiStigma**
|
||||||
|
- **HistoricFilms**
|
||||||
|
- **History**
|
||||||
|
- **hitbox**
|
||||||
|
- **hitbox:live**
|
||||||
- **HornBunny**
|
- **HornBunny**
|
||||||
- **HostingBulk**
|
- **HostingBulk**
|
||||||
- **HotNewHipHop**
|
- **HotNewHipHop**
|
||||||
@ -167,6 +194,7 @@
|
|||||||
- **ign.com**
|
- **ign.com**
|
||||||
- **imdb**: Internet Movie Database trailers
|
- **imdb**: Internet Movie Database trailers
|
||||||
- **imdb:list**: Internet Movie Database lists
|
- **imdb:list**: Internet Movie Database lists
|
||||||
|
- **Imgur**
|
||||||
- **Ina**
|
- **Ina**
|
||||||
- **InfoQ**
|
- **InfoQ**
|
||||||
- **Instagram**
|
- **Instagram**
|
||||||
@ -182,6 +210,7 @@
|
|||||||
- **jpopsuki.tv**
|
- **jpopsuki.tv**
|
||||||
- **Jukebox**
|
- **Jukebox**
|
||||||
- **Kankan**
|
- **Kankan**
|
||||||
|
- **Karaoketv**
|
||||||
- **keek**
|
- **keek**
|
||||||
- **KeezMovies**
|
- **KeezMovies**
|
||||||
- **KhanAcademy**
|
- **KhanAcademy**
|
||||||
@ -195,6 +224,7 @@
|
|||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
- **livestream:original**
|
- **livestream:original**
|
||||||
|
- **LnkGo**
|
||||||
- **lrt.lt**
|
- **lrt.lt**
|
||||||
- **lynda**: lynda.com videos
|
- **lynda**: lynda.com videos
|
||||||
- **lynda:course**: lynda.com online courses
|
- **lynda:course**: lynda.com online courses
|
||||||
@ -203,6 +233,7 @@
|
|||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **Malemotion**
|
- **Malemotion**
|
||||||
- **MDR**
|
- **MDR**
|
||||||
|
- **media.ccc.de**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
- **Metacritic**
|
- **Metacritic**
|
||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
@ -235,6 +266,8 @@
|
|||||||
- **MySpass**
|
- **MySpass**
|
||||||
- **myvideo**
|
- **myvideo**
|
||||||
- **MyVidster**
|
- **MyVidster**
|
||||||
|
- **n-tv.de**
|
||||||
|
- **NationalGeographic**
|
||||||
- **Naver**
|
- **Naver**
|
||||||
- **NBA**
|
- **NBA**
|
||||||
- **NBC**
|
- **NBC**
|
||||||
@ -242,11 +275,16 @@
|
|||||||
- **ndr**: NDR.de - Mediathek
|
- **ndr**: NDR.de - Mediathek
|
||||||
- **NDTV**
|
- **NDTV**
|
||||||
- **NerdCubedFeed**
|
- **NerdCubedFeed**
|
||||||
|
- **Nerdist**
|
||||||
|
- **Netzkino**
|
||||||
- **Newgrounds**
|
- **Newgrounds**
|
||||||
- **Newstube**
|
- **Newstube**
|
||||||
|
- **NextMedia**
|
||||||
|
- **NextMediaActionNews**
|
||||||
- **nfb**: National Film Board of Canada
|
- **nfb**: National Film Board of Canada
|
||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
- **nhl.com**
|
- **nhl.com**
|
||||||
|
- **nhl.com:news**: NHL news
|
||||||
- **nhl.com:videocenter**: NHL videocenter category
|
- **nhl.com:videocenter**: NHL videocenter category
|
||||||
- **niconico**: ニコニコ動画
|
- **niconico**: ニコニコ動画
|
||||||
- **NiconicoPlaylist**
|
- **NiconicoPlaylist**
|
||||||
@ -257,18 +295,22 @@
|
|||||||
- **Nowness**
|
- **Nowness**
|
||||||
- **nowvideo**: NowVideo
|
- **nowvideo**: NowVideo
|
||||||
- **npo.nl**
|
- **npo.nl**
|
||||||
|
- **npo.nl:live**
|
||||||
|
- **npo.nl:radio**
|
||||||
|
- **npo.nl:radio:fragment**
|
||||||
- **NRK**
|
- **NRK**
|
||||||
- **NRKTV**
|
- **NRKTV**
|
||||||
- **NTV**
|
- **ntv.ru**
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
- **ocw.mit.edu**
|
- **ocw.mit.edu**
|
||||||
- **OktoberfestTV**
|
- **OktoberfestTV**
|
||||||
- **on.aol.com**
|
- **on.aol.com**
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
|
- **OpenFilm**
|
||||||
|
- **orf:fm4**: radio FM4
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
- **ORFFM4**: radio FM4
|
|
||||||
- **parliamentlive.tv**: UK parliament videos
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
- **Patreon**
|
- **Patreon**
|
||||||
- **PBS**
|
- **PBS**
|
||||||
@ -283,13 +325,16 @@
|
|||||||
- **podomatic**
|
- **podomatic**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**
|
- **PornHub**
|
||||||
|
- **PornHubPlaylist**
|
||||||
- **Pornotube**
|
- **Pornotube**
|
||||||
- **PornoXO**
|
- **PornoXO**
|
||||||
- **PromptFile**
|
- **PromptFile**
|
||||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
- **Pyvideo**
|
- **Pyvideo**
|
||||||
- **QuickVid**
|
- **QuickVid**
|
||||||
|
- **R7**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
|
- **radiobremen**
|
||||||
- **radiofrance**
|
- **radiofrance**
|
||||||
- **Rai**
|
- **Rai**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
@ -300,8 +345,10 @@
|
|||||||
- **RottenTomatoes**
|
- **RottenTomatoes**
|
||||||
- **Roxwel**
|
- **Roxwel**
|
||||||
- **RTBF**
|
- **RTBF**
|
||||||
|
- **Rte**
|
||||||
|
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||||
|
- **RTL2**
|
||||||
- **RTLnow**
|
- **RTLnow**
|
||||||
- **rtlxl.nl**
|
|
||||||
- **RTP**
|
- **RTP**
|
||||||
- **RTS**: RTS.ch
|
- **RTS**: RTS.ch
|
||||||
- **rtve.es:alacarta**: RTVE a la carta
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
@ -309,9 +356,11 @@
|
|||||||
- **RUHD**
|
- **RUHD**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
- **rutube:channel**: Rutube channels
|
- **rutube:channel**: Rutube channels
|
||||||
|
- **rutube:embed**: Rutube embedded videos
|
||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
|
- **Sandia**: Sandia National Laboratories
|
||||||
- **Sapo**: SAPO Vídeos
|
- **Sapo**: SAPO Vídeos
|
||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
- **SBS**: sbs.com.au
|
- **SBS**: sbs.com.au
|
||||||
@ -339,7 +388,8 @@
|
|||||||
- **soundcloud:playlist**
|
- **soundcloud:playlist**
|
||||||
- **soundcloud:set**
|
- **soundcloud:set**
|
||||||
- **soundcloud:user**
|
- **soundcloud:user**
|
||||||
- **Soundgasm**
|
- **soundgasm**
|
||||||
|
- **soundgasm:profile**
|
||||||
- **southpark.cc.com**
|
- **southpark.cc.com**
|
||||||
- **southpark.de**
|
- **southpark.de**
|
||||||
- **Space**
|
- **Space**
|
||||||
@ -351,12 +401,14 @@
|
|||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBox**
|
- **SportBox**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
- **SRMediathek**: Süddeutscher Rundfunk
|
- **SRMediathek**: Saarländischer Rundfunk
|
||||||
- **stanfordoc**: Stanford Open ClassRoom
|
- **stanfordoc**: Stanford Open ClassRoom
|
||||||
- **Steam**
|
- **Steam**
|
||||||
- **streamcloud.eu**
|
- **streamcloud.eu**
|
||||||
- **StreamCZ**
|
- **StreamCZ**
|
||||||
|
- **StreetVoice**
|
||||||
- **SunPorno**
|
- **SunPorno**
|
||||||
|
- **SVTPlay**
|
||||||
- **SWRMediathek**
|
- **SWRMediathek**
|
||||||
- **Syfy**
|
- **Syfy**
|
||||||
- **SztvHu**
|
- **SztvHu**
|
||||||
@ -375,7 +427,9 @@
|
|||||||
- **TeleBruxelles**
|
- **TeleBruxelles**
|
||||||
- **telecinco.es**
|
- **telecinco.es**
|
||||||
- **TeleMB**
|
- **TeleMB**
|
||||||
|
- **TeleTask**
|
||||||
- **TenPlay**
|
- **TenPlay**
|
||||||
|
- **TestTube**
|
||||||
- **TF1**
|
- **TF1**
|
||||||
- **TheOnion**
|
- **TheOnion**
|
||||||
- **ThePlatform**
|
- **ThePlatform**
|
||||||
@ -401,10 +455,19 @@
|
|||||||
- **Turbo**
|
- **Turbo**
|
||||||
- **Tutv**
|
- **Tutv**
|
||||||
- **tv.dfb.de**
|
- **tv.dfb.de**
|
||||||
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvp.pl**
|
- **tvp.pl**
|
||||||
|
- **tvp.pl:Series**
|
||||||
- **TVPlay**: TV3Play and related services
|
- **TVPlay**: TV3Play and related services
|
||||||
- **Twitch**
|
- **Tweakers**
|
||||||
|
- **twitch:bookmarks**
|
||||||
|
- **twitch:chapter**
|
||||||
|
- **twitch:past_broadcasts**
|
||||||
|
- **twitch:profile**
|
||||||
|
- **twitch:stream**
|
||||||
|
- **twitch:video**
|
||||||
|
- **twitch:vod**
|
||||||
- **Ubu**
|
- **Ubu**
|
||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
@ -433,6 +496,8 @@
|
|||||||
- **videoweed**: VideoWeed
|
- **videoweed**: VideoWeed
|
||||||
- **Vidme**
|
- **Vidme**
|
||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
|
- **vier**
|
||||||
|
- **vier:videos**
|
||||||
- **viki**
|
- **viki**
|
||||||
- **vimeo**
|
- **vimeo**
|
||||||
- **vimeo:album**
|
- **vimeo:album**
|
||||||
@ -460,11 +525,13 @@
|
|||||||
- **WDR**
|
- **WDR**
|
||||||
- **wdr:mobile**
|
- **wdr:mobile**
|
||||||
- **WDRMaus**: Sendung mit der Maus
|
- **WDRMaus**: Sendung mit der Maus
|
||||||
|
- **WebOfStories**
|
||||||
- **Weibo**
|
- **Weibo**
|
||||||
- **Wimp**
|
- **Wimp**
|
||||||
- **Wistia**
|
- **Wistia**
|
||||||
- **WorldStarHipHop**
|
- **WorldStarHipHop**
|
||||||
- **wrzuta.pl**
|
- **wrzuta.pl**
|
||||||
|
- **WSJ**: Wall Street Journal
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
@ -472,8 +539,11 @@
|
|||||||
- **XNXX**
|
- **XNXX**
|
||||||
- **XTube**
|
- **XTube**
|
||||||
- **XTubeUser**: XTube user profile
|
- **XTubeUser**: XTube user profile
|
||||||
|
- **Xuite**
|
||||||
- **XVideos**
|
- **XVideos**
|
||||||
|
- **XXXYMovies**
|
||||||
- **Yahoo**: Yahoo screen and movies
|
- **Yahoo**: Yahoo screen and movies
|
||||||
|
- **Yam**
|
||||||
- **YesJapan**
|
- **YesJapan**
|
||||||
- **Ynet**
|
- **Ynet**
|
||||||
- **YouJizz**
|
- **YouJizz**
|
||||||
@ -491,9 +561,9 @@
|
|||||||
- **youtube:search_url**: YouTube.com search URLs
|
- **youtube:search_url**: YouTube.com search URLs
|
||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
- **youtube:show**: YouTube.com (multi-season) shows
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
|
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
|
- **Zapiks**
|
||||||
- **ZDF**
|
- **ZDF**
|
||||||
- **ZDFChannel**
|
- **ZDFChannel**
|
||||||
- **zingmp3:album**: mp3.zing.vn albums
|
- **zingmp3:album**: mp3.zing.vn albums
|
||||||
|
@ -3,4 +3,4 @@ universal = True
|
|||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
|
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
|
||||||
ignore = E501
|
ignore = E402,E501,E731
|
||||||
|
@ -103,6 +103,26 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
match_rex.match(got),
|
match_rex.match(got),
|
||||||
'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||||
|
elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
start_str = expected[len('startswith:'):]
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, compat_str),
|
||||||
|
'Expected a %s object, but got %s for field %s' % (
|
||||||
|
compat_str.__name__, type(got).__name__, info_field))
|
||||||
|
self.assertTrue(
|
||||||
|
got.startswith(start_str),
|
||||||
|
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
|
||||||
|
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
contains_str = expected[len('contains:'):]
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, compat_str),
|
||||||
|
'Expected a %s object, but got %s for field %s' % (
|
||||||
|
compat_str.__name__, type(got).__name__, info_field))
|
||||||
|
self.assertTrue(
|
||||||
|
contains_str in got,
|
||||||
|
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
|
||||||
elif isinstance(expected, type):
|
elif isinstance(expected, type):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertTrue(isinstance(got, expected),
|
self.assertTrue(isinstance(got, expected),
|
||||||
@ -153,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
info_dict_str += ''.join(
|
info_dict_str += ''.join(
|
||||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||||
for k, v in test_info_dict.items() if k not in missing_keys)
|
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||||
info_dict_str += '\n'
|
|
||||||
|
if info_dict_str:
|
||||||
|
info_dict_str += '\n'
|
||||||
info_dict_str += ''.join(
|
info_dict_str += ''.join(
|
||||||
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
||||||
for k in missing_keys)
|
for k in missing_keys)
|
||||||
write_string(
|
write_string(
|
||||||
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
'\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
missing_keys,
|
missing_keys,
|
||||||
'Missing keys in test definition: %s' % (
|
'Missing keys in test definition: %s' % (
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
"retries": 10,
|
"retries": 10,
|
||||||
"simulate": false,
|
"simulate": false,
|
||||||
"subtitleslang": null,
|
"subtitleslang": null,
|
||||||
"subtitlesformat": "srt",
|
"subtitlesformat": "best",
|
||||||
"test": true,
|
"test": true,
|
||||||
"updatetime": true,
|
"updatetime": true,
|
||||||
"usenetrc": false,
|
"usenetrc": false,
|
||||||
@ -39,5 +39,6 @@
|
|||||||
"writesubtitles": false,
|
"writesubtitles": false,
|
||||||
"allsubtitles": false,
|
"allsubtitles": false,
|
||||||
"listssubtitles": false,
|
"listssubtitles": false,
|
||||||
"socket_timeout": 20
|
"socket_timeout": 20,
|
||||||
|
"fixup": "never"
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,7 @@ import copy
|
|||||||
from test.helper import FakeYDL, assertRegexpMatches
|
from test.helper import FakeYDL, assertRegexpMatches
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
|
|
||||||
|
|
||||||
class YDL(FakeYDL):
|
class YDL(FakeYDL):
|
||||||
@ -336,6 +337,65 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'G')
|
self.assertEqual(downloaded['format_id'], 'G')
|
||||||
|
|
||||||
|
def test_subtitles(self):
|
||||||
|
def s_formats(lang, autocaption=False):
|
||||||
|
return [{
|
||||||
|
'ext': ext,
|
||||||
|
'url': 'http://localhost/video.%s.%s' % (lang, ext),
|
||||||
|
'_auto': autocaption,
|
||||||
|
} for ext in ['vtt', 'srt', 'ass']]
|
||||||
|
subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
|
||||||
|
auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
|
||||||
|
info_dict = {
|
||||||
|
'id': 'test',
|
||||||
|
'title': 'Test',
|
||||||
|
'url': 'http://localhost/video.mp4',
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'automatic_captions': auto_captions,
|
||||||
|
'extractor': 'TEST',
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_info(params={}):
|
||||||
|
params.setdefault('simulate', True)
|
||||||
|
ydl = YDL(params)
|
||||||
|
ydl.report_warning = lambda *args, **kargs: None
|
||||||
|
return ydl.process_video_result(info_dict, download=False)
|
||||||
|
|
||||||
|
result = get_info()
|
||||||
|
self.assertFalse(result.get('requested_subtitles'))
|
||||||
|
self.assertEqual(result['subtitles'], subtitles)
|
||||||
|
self.assertEqual(result['automatic_captions'], auto_captions)
|
||||||
|
|
||||||
|
result = get_info({'writesubtitles': True})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertTrue(subs)
|
||||||
|
self.assertEqual(set(subs.keys()), set(['en']))
|
||||||
|
self.assertTrue(subs['en'].get('data') is None)
|
||||||
|
self.assertEqual(subs['en']['ext'], 'ass')
|
||||||
|
|
||||||
|
result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertEqual(subs['en']['ext'], 'srt')
|
||||||
|
|
||||||
|
result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertTrue(subs)
|
||||||
|
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
|
||||||
|
|
||||||
|
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertTrue(subs)
|
||||||
|
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
|
||||||
|
self.assertFalse(subs['es']['_auto'])
|
||||||
|
self.assertTrue(subs['pt']['_auto'])
|
||||||
|
|
||||||
|
result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
|
||||||
|
subs = result['requested_subtitles']
|
||||||
|
self.assertTrue(subs)
|
||||||
|
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
|
||||||
|
self.assertTrue(subs['es']['_auto'])
|
||||||
|
self.assertTrue(subs['pt']['_auto'])
|
||||||
|
|
||||||
def test_add_extra_info(self):
|
def test_add_extra_info(self):
|
||||||
test_dict = {
|
test_dict = {
|
||||||
'extractor': 'Foo',
|
'extractor': 'Foo',
|
||||||
@ -370,5 +430,35 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
'vbr': 10,
|
'vbr': 10,
|
||||||
}), '^\s*10k$')
|
}), '^\s*10k$')
|
||||||
|
|
||||||
|
def test_postprocessors(self):
|
||||||
|
filename = 'post-processor-testfile.mp4'
|
||||||
|
audiofile = filename + '.mp3'
|
||||||
|
|
||||||
|
class SimplePP(PostProcessor):
|
||||||
|
def run(self, info):
|
||||||
|
with open(audiofile, 'wt') as f:
|
||||||
|
f.write('EXAMPLE')
|
||||||
|
info['filepath']
|
||||||
|
return False, info
|
||||||
|
|
||||||
|
def run_pp(params):
|
||||||
|
with open(filename, 'wt') as f:
|
||||||
|
f.write('EXAMPLE')
|
||||||
|
ydl = YoutubeDL(params)
|
||||||
|
ydl.add_post_processor(SimplePP())
|
||||||
|
ydl.post_process(filename, {'filepath': filename})
|
||||||
|
|
||||||
|
run_pp({'keepvideo': True})
|
||||||
|
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
|
||||||
|
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||||
|
os.unlink(filename)
|
||||||
|
os.unlink(audiofile)
|
||||||
|
|
||||||
|
run_pp({'keepvideo': False})
|
||||||
|
self.assertFalse(os.path.exists(filename), '%s exists' % filename)
|
||||||
|
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||||
|
os.unlink(audiofile)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self.assertEqual(jsi.call_function('f'), -11)
|
self.assertEqual(jsi.call_function('f'), -11)
|
||||||
|
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
|
'Skipping: Not yet fully implemented'
|
||||||
|
return
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() {
|
function x() {
|
||||||
var x = /* 1 + */ 2;
|
var x = /* 1 + */ 2;
|
||||||
@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x'), 52)
|
self.assertEqual(jsi.call_function('x'), 52)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function f() {
|
||||||
|
var x = "/*";
|
||||||
|
var y = 1 /* comment */ + 2;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 3)
|
||||||
|
|
||||||
def test_precedence(self):
|
def test_precedence(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() {
|
function x() {
|
||||||
|
@ -18,6 +18,14 @@ from youtube_dl.extractor import (
|
|||||||
VimeoIE,
|
VimeoIE,
|
||||||
WallaIE,
|
WallaIE,
|
||||||
CeskaTelevizeIE,
|
CeskaTelevizeIE,
|
||||||
|
LyndaIE,
|
||||||
|
NPOIE,
|
||||||
|
ComedyCentralIE,
|
||||||
|
NRKTVIE,
|
||||||
|
RaiIE,
|
||||||
|
VikiIE,
|
||||||
|
ThePlatformIE,
|
||||||
|
RTVEALaCartaIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -27,42 +35,38 @@ class BaseTestSubtitles(unittest.TestCase):
|
|||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.DL = FakeYDL()
|
self.DL = FakeYDL()
|
||||||
self.ie = self.IE(self.DL)
|
self.ie = self.IE()
|
||||||
|
self.DL.add_info_extractor(self.ie)
|
||||||
|
|
||||||
def getInfoDict(self):
|
def getInfoDict(self):
|
||||||
info_dict = self.ie.extract(self.url)
|
info_dict = self.DL.extract_info(self.url, download=False)
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
def getSubtitles(self):
|
def getSubtitles(self):
|
||||||
info_dict = self.getInfoDict()
|
info_dict = self.getInfoDict()
|
||||||
return info_dict['subtitles']
|
subtitles = info_dict['requested_subtitles']
|
||||||
|
if not subtitles:
|
||||||
|
return subtitles
|
||||||
|
for sub_info in subtitles.values():
|
||||||
|
if sub_info.get('data') is None:
|
||||||
|
uf = self.DL.urlopen(sub_info['url'])
|
||||||
|
sub_info['data'] = uf.read().decode('utf-8')
|
||||||
|
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeSubtitles(BaseTestSubtitles):
|
class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||||
url = 'QRS8MkLhQmM'
|
url = 'QRS8MkLhQmM'
|
||||||
IE = YoutubeIE
|
IE = YoutubeIE
|
||||||
|
|
||||||
def test_youtube_no_writesubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = False
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_youtube_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
|
||||||
|
|
||||||
def test_youtube_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['it']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
|
||||||
|
|
||||||
def test_youtube_allsubtitles(self):
|
def test_youtube_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles.keys()), 13)
|
self.assertEqual(len(subtitles.keys()), 13)
|
||||||
|
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||||
|
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||||
|
for lang in ['it', 'fr', 'de']:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
def test_youtube_subtitles_sbv_format(self):
|
def test_youtube_subtitles_sbv_format(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
@ -76,12 +80,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||||
|
|
||||||
def test_youtube_list_subtitles(self):
|
|
||||||
self.DL.expect_warning('Video doesn\'t have automatic captions')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_youtube_automatic_captions(self):
|
def test_youtube_automatic_captions(self):
|
||||||
self.url = '8YoUxe5ncPo'
|
self.url = '8YoUxe5ncPo'
|
||||||
self.DL.params['writeautomaticsub'] = True
|
self.DL.params['writeautomaticsub'] = True
|
||||||
@ -103,55 +101,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
def test_youtube_multiple_langs(self):
|
|
||||||
self.url = 'QRS8MkLhQmM'
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['it', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDailymotionSubtitles(BaseTestSubtitles):
|
class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.dailymotion.com/video/xczg00'
|
url = 'http://www.dailymotion.com/video/xczg00'
|
||||||
IE = DailymotionIE
|
IE = DailymotionIE
|
||||||
|
|
||||||
def test_no_writesubtitles(self):
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
|
||||||
|
|
||||||
def test_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['fr']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles.keys()), 5)
|
self.assertTrue(len(subtitles.keys()) >= 6)
|
||||||
|
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
|
||||||
def test_list_subtitles(self):
|
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
for lang in ['es', 'fr', 'de']:
|
||||||
self.DL.params['listsubtitles'] = True
|
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_automatic_captions(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslang'] = ['en']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(len(subtitles.keys()) == 0)
|
|
||||||
|
|
||||||
def test_nosubtitles(self):
|
def test_nosubtitles(self):
|
||||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
@ -159,61 +124,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
def test_multiple_langs(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['es', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
|
|
||||||
class TestTedSubtitles(BaseTestSubtitles):
|
class TestTedSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
|
||||||
IE = TEDIE
|
IE = TEDIE
|
||||||
|
|
||||||
def test_no_writesubtitles(self):
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
|
|
||||||
|
|
||||||
def test_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['fr']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertTrue(len(subtitles.keys()) >= 28)
|
self.assertTrue(len(subtitles.keys()) >= 28)
|
||||||
|
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
|
||||||
def test_list_subtitles(self):
|
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
for lang in ['es', 'fr', 'de']:
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_automatic_captions(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslang'] = ['en']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(len(subtitles.keys()) == 0)
|
|
||||||
|
|
||||||
def test_multiple_langs(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['es', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
@ -221,14 +146,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
|
|||||||
url = 'http://blip.tv/a/a-6603250'
|
url = 'http://blip.tv/a/a-6603250'
|
||||||
IE = BlipTVIE
|
IE = BlipTVIE
|
||||||
|
|
||||||
def test_list_subtitles(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
@ -240,39 +158,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
|||||||
url = 'http://vimeo.com/76979871'
|
url = 'http://vimeo.com/76979871'
|
||||||
IE = VimeoIE
|
IE = VimeoIE
|
||||||
|
|
||||||
def test_no_writesubtitles(self):
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(subtitles, None)
|
|
||||||
|
|
||||||
def test_subtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
|
|
||||||
|
|
||||||
def test_subtitles_lang(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['subtitleslangs'] = ['fr']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||||
def test_list_subtitles(self):
|
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_automatic_captions(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['writeautomaticsub'] = True
|
|
||||||
self.DL.params['subtitleslang'] = ['en']
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertTrue(len(subtitles.keys()) == 0)
|
|
||||||
|
|
||||||
def test_nosubtitles(self):
|
def test_nosubtitles(self):
|
||||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
@ -280,27 +172,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
def test_multiple_langs(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
langs = ['es', 'fr', 'de']
|
|
||||||
self.DL.params['subtitleslangs'] = langs
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
for lang in langs:
|
|
||||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
|
||||||
|
|
||||||
|
|
||||||
class TestWallaSubtitles(BaseTestSubtitles):
|
class TestWallaSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
|
||||||
IE = WallaIE
|
IE = WallaIE
|
||||||
|
|
||||||
def test_list_subtitles(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
@ -315,26 +193,20 @@ class TestWallaSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
|
|
||||||
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
|
||||||
IE = CeskaTelevizeIE
|
IE = CeskaTelevizeIE
|
||||||
|
|
||||||
def test_list_subtitles(self):
|
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
|
||||||
self.DL.params['listsubtitles'] = True
|
|
||||||
info_dict = self.getInfoDict()
|
|
||||||
self.assertEqual(info_dict, None)
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
||||||
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
|
self.assertTrue(len(subtitles['cs']) > 20000)
|
||||||
|
|
||||||
def test_nosubtitles(self):
|
def test_nosubtitles(self):
|
||||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
@ -342,7 +214,110 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles), 0)
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
|
|
||||||
|
class TestLyndaSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
|
||||||
|
IE = LyndaIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
|
||||||
|
|
||||||
|
|
||||||
|
class TestNPOSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
|
||||||
|
IE = NPOIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['nl']))
|
||||||
|
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
|
||||||
|
|
||||||
|
|
||||||
|
class TestMTVSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
|
||||||
|
IE = ComedyCentralIE
|
||||||
|
|
||||||
|
def getInfoDict(self):
|
||||||
|
return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
|
||||||
|
|
||||||
|
|
||||||
|
class TestNRKSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
|
||||||
|
IE = NRKTVIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['no']))
|
||||||
|
self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
|
||||||
|
|
||||||
|
|
||||||
|
class TestRaiSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||||
|
IE = RaiIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['it']))
|
||||||
|
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
|
||||||
|
|
||||||
|
|
||||||
|
class TestVikiSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||||
|
IE = VikiIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
|
||||||
|
|
||||||
|
|
||||||
|
class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||||
|
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
|
||||||
|
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
|
||||||
|
url = 'theplatform:JFUjUE1_ehvq'
|
||||||
|
IE = ThePlatformIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||||
|
|
||||||
|
|
||||||
|
class TestRtveSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||||
|
IE = RTVEALaCartaIE
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
print('Skipping, only available from Spain')
|
||||||
|
return
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['es']))
|
||||||
|
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -34,8 +34,8 @@ def _make_testfunc(testfile):
|
|||||||
def test_func(self):
|
def test_func(self):
|
||||||
as_file = os.path.join(TEST_DIR, testfile)
|
as_file = os.path.join(TEST_DIR, testfile)
|
||||||
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
|
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
|
||||||
if ((not os.path.exists(swf_file))
|
if ((not os.path.exists(swf_file)) or
|
||||||
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
||||||
# Recompile
|
# Recompile
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([
|
subprocess.check_call([
|
||||||
|
@ -53,6 +53,7 @@ from youtube_dl.utils import (
|
|||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
render_table,
|
render_table,
|
||||||
|
match_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -84,6 +85,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
sanitize_filename('New World record at 0:12:34'),
|
sanitize_filename('New World record at 0:12:34'),
|
||||||
'New World record at 0_12_34')
|
'New World record at 0_12_34')
|
||||||
|
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
|
||||||
|
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
|
||||||
|
|
||||||
forbidden = '"\0\\/'
|
forbidden = '"\0\\/'
|
||||||
for fc in forbidden:
|
for fc in forbidden:
|
||||||
@ -156,6 +159,9 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
|
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
|
||||||
'20141126')
|
'20141126')
|
||||||
|
self.assertEqual(
|
||||||
|
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||||
|
'20150202')
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
@ -366,6 +372,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
"playlist":[{"controls":{"all":null}}]
|
"playlist":[{"controls":{"all":null}}]
|
||||||
}''')
|
}''')
|
||||||
|
|
||||||
|
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
|
||||||
|
json_code = js_to_json(inp)
|
||||||
|
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||||
|
|
||||||
def test_js_to_json_edgecases(self):
|
def test_js_to_json_edgecases(self):
|
||||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||||
@ -456,6 +466,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
|||||||
'123 4\n'
|
'123 4\n'
|
||||||
'9999 51')
|
'9999 51')
|
||||||
|
|
||||||
|
def test_match_str(self):
|
||||||
|
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
|
||||||
|
self.assertFalse(match_str('xy', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('!xy', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('!x', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x', {'x': 0}))
|
||||||
|
self.assertFalse(match_str('x>0', {'x': 0}))
|
||||||
|
self.assertFalse(match_str('x>0', {}))
|
||||||
|
self.assertTrue(match_str('x>?0', {}))
|
||||||
|
self.assertTrue(match_str('x>1K', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('x>2K', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 90, 'description': 'foo'}))
|
||||||
|
self.assertTrue(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'description': 'foo'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'dislike_count': 10}))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -8,11 +8,11 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
|
from test.helper import FakeYDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||||
|
|
||||||
@ -64,6 +64,12 @@ _TESTS = [
|
|||||||
'js',
|
'js',
|
||||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||||
|
'js',
|
||||||
|
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||||
|
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -88,7 +94,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
|
|||||||
if not os.path.exists(fn):
|
if not os.path.exists(fn):
|
||||||
compat_urlretrieve(url, fn)
|
compat_urlretrieve(url, fn)
|
||||||
|
|
||||||
ie = YoutubeIE()
|
ydl = FakeYDL()
|
||||||
|
ie = YoutubeIE(ydl)
|
||||||
if stype == 'js':
|
if stype == 'js':
|
||||||
with io.open(fn, encoding='utf-8') as testf:
|
with io.open(fn, encoding='utf-8') as testf:
|
||||||
jscode = testf.read()
|
jscode = testf.read()
|
||||||
|
@ -154,7 +154,7 @@ class YoutubeDL(object):
|
|||||||
allsubtitles: Downloads all the subtitles of the video
|
allsubtitles: Downloads all the subtitles of the video
|
||||||
(requires writesubtitles or writeautomaticsub)
|
(requires writesubtitles or writeautomaticsub)
|
||||||
listsubtitles: Lists all available subtitles for the video
|
listsubtitles: Lists all available subtitles for the video
|
||||||
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
|
subtitlesformat: The format code for subtitles
|
||||||
subtitleslangs: List of languages of the subtitles to download
|
subtitleslangs: List of languages of the subtitles to download
|
||||||
keepvideo: Keep the video file after post-processing
|
keepvideo: Keep the video file after post-processing
|
||||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||||
@ -199,18 +199,25 @@ class YoutubeDL(object):
|
|||||||
postprocessor.
|
postprocessor.
|
||||||
progress_hooks: A list of functions that get called on download
|
progress_hooks: A list of functions that get called on download
|
||||||
progress, with a dictionary with the entries
|
progress, with a dictionary with the entries
|
||||||
* status: One of "downloading" and "finished".
|
* status: One of "downloading", "error", or "finished".
|
||||||
Check this first and ignore unknown values.
|
Check this first and ignore unknown values.
|
||||||
|
|
||||||
If status is one of "downloading" or "finished", the
|
If status is one of "downloading", or "finished", the
|
||||||
following properties may also be present:
|
following properties may also be present:
|
||||||
* filename: The final filename (always present)
|
* filename: The final filename (always present)
|
||||||
|
* tmpfilename: The filename we're currently writing to
|
||||||
* downloaded_bytes: Bytes on disk
|
* downloaded_bytes: Bytes on disk
|
||||||
* total_bytes: Size of the whole file, None if unknown
|
* total_bytes: Size of the whole file, None if unknown
|
||||||
* tmpfilename: The filename we're currently writing to
|
* total_bytes_estimate: Guess of the eventual file size,
|
||||||
|
None if unavailable.
|
||||||
|
* elapsed: The number of seconds since download started.
|
||||||
* eta: The estimated time in seconds, None if unknown
|
* eta: The estimated time in seconds, None if unknown
|
||||||
* speed: The download speed in bytes/second, None if
|
* speed: The download speed in bytes/second, None if
|
||||||
unknown
|
unknown
|
||||||
|
* fragment_index: The counter of the currently
|
||||||
|
downloaded video fragment.
|
||||||
|
* fragment_count: The number of fragments (= individual
|
||||||
|
files that will be merged)
|
||||||
|
|
||||||
Progress hooks are guaranteed to be called at least once
|
Progress hooks are guaranteed to be called at least once
|
||||||
(with status "finished") if the download is successful.
|
(with status "finished") if the download is successful.
|
||||||
@ -225,10 +232,19 @@ class YoutubeDL(object):
|
|||||||
call_home: Boolean, true iff we are allowed to contact the
|
call_home: Boolean, true iff we are allowed to contact the
|
||||||
youtube-dl servers for debugging.
|
youtube-dl servers for debugging.
|
||||||
sleep_interval: Number of seconds to sleep before each download.
|
sleep_interval: Number of seconds to sleep before each download.
|
||||||
external_downloader: Executable of the external downloader to call.
|
|
||||||
listformats: Print an overview of available video formats and exit.
|
listformats: Print an overview of available video formats and exit.
|
||||||
list_thumbnails: Print a table of all thumbnails and exit.
|
list_thumbnails: Print a table of all thumbnails and exit.
|
||||||
|
match_filter: A function that gets called with the info_dict of
|
||||||
|
every video.
|
||||||
|
If it returns a message, the video is ignored.
|
||||||
|
If it returns None, the video is downloaded.
|
||||||
|
match_filter_func in utils.py is one example for this.
|
||||||
|
no_color: Do not emit color codes in output.
|
||||||
|
|
||||||
|
The following options determine which downloader is picked:
|
||||||
|
external_downloader: Executable of the external downloader to call.
|
||||||
|
None or unset for standard (built-in) downloader.
|
||||||
|
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@ -292,8 +308,8 @@ class YoutubeDL(object):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
|
||||||
and not params.get('restrictfilenames', False)):
|
not params.get('restrictfilenames', False)):
|
||||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Assuming --restrict-filenames since file system encoding '
|
'Assuming --restrict-filenames since file system encoding '
|
||||||
@ -485,7 +501,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
if self.params.get('no_warnings'):
|
if self.params.get('no_warnings'):
|
||||||
return
|
return
|
||||||
if self._err_file.isatty() and os.name != 'nt':
|
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = 'WARNING:'
|
_msg_header = 'WARNING:'
|
||||||
@ -497,7 +513,7 @@ class YoutubeDL(object):
|
|||||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||||
in red if stderr is a tty file.
|
in red if stderr is a tty file.
|
||||||
'''
|
'''
|
||||||
if self._err_file.isatty() and os.name != 'nt':
|
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
_msg_header = '\033[0;31mERROR:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = 'ERROR:'
|
_msg_header = 'ERROR:'
|
||||||
@ -554,7 +570,7 @@ class YoutubeDL(object):
|
|||||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _match_entry(self, info_dict):
|
def _match_entry(self, info_dict, incomplete):
|
||||||
""" Returns None iff the file should be downloaded """
|
""" Returns None iff the file should be downloaded """
|
||||||
|
|
||||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||||
@ -583,9 +599,17 @@ class YoutubeDL(object):
|
|||||||
if max_views is not None and view_count > max_views:
|
if max_views is not None and view_count > max_views:
|
||||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||||
return 'Skipping "%s" because it is age restricted' % title
|
return 'Skipping "%s" because it is age restricted' % video_title
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return '%s has already been recorded in archive' % video_title
|
return '%s has already been recorded in archive' % video_title
|
||||||
|
|
||||||
|
if not incomplete:
|
||||||
|
match_filter = self.params.get('match_filter')
|
||||||
|
if match_filter is not None:
|
||||||
|
ret = match_filter(info_dict)
|
||||||
|
if ret is not None:
|
||||||
|
return ret
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -779,7 +803,7 @@ class YoutubeDL(object):
|
|||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
}
|
}
|
||||||
|
|
||||||
reason = self._match_entry(entry)
|
reason = self._match_entry(entry, incomplete=True)
|
||||||
if reason is not None:
|
if reason is not None:
|
||||||
self.to_screen('[download] ' + reason)
|
self.to_screen('[download] ' + reason)
|
||||||
continue
|
continue
|
||||||
@ -826,27 +850,44 @@ class YoutubeDL(object):
|
|||||||
'!=': operator.ne,
|
'!=': operator.ne,
|
||||||
}
|
}
|
||||||
operator_rex = re.compile(r'''(?x)\s*\[
|
operator_rex = re.compile(r'''(?x)\s*\[
|
||||||
(?P<key>width|height|tbr|abr|vbr|filesize|fps)
|
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
|
||||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||||
\]$
|
\]$
|
||||||
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||||
m = operator_rex.search(format_spec)
|
m = operator_rex.search(format_spec)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
comparison_value = int(m.group('value'))
|
||||||
|
except ValueError:
|
||||||
|
comparison_value = parse_filesize(m.group('value'))
|
||||||
|
if comparison_value is None:
|
||||||
|
comparison_value = parse_filesize(m.group('value') + 'B')
|
||||||
|
if comparison_value is None:
|
||||||
|
raise ValueError(
|
||||||
|
'Invalid value %r in format specification %r' % (
|
||||||
|
m.group('value'), format_spec))
|
||||||
|
op = OPERATORS[m.group('op')]
|
||||||
|
|
||||||
|
if not m:
|
||||||
|
STR_OPERATORS = {
|
||||||
|
'=': operator.eq,
|
||||||
|
'!=': operator.ne,
|
||||||
|
}
|
||||||
|
str_operator_rex = re.compile(r'''(?x)\s*\[
|
||||||
|
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||||
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||||
|
\s*(?P<value>[a-zA-Z0-9_-]+)
|
||||||
|
\s*\]$
|
||||||
|
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||||
|
m = str_operator_rex.search(format_spec)
|
||||||
|
if m:
|
||||||
|
comparison_value = m.group('value')
|
||||||
|
op = STR_OPERATORS[m.group('op')]
|
||||||
|
|
||||||
if not m:
|
if not m:
|
||||||
raise ValueError('Invalid format specification %r' % format_spec)
|
raise ValueError('Invalid format specification %r' % format_spec)
|
||||||
|
|
||||||
try:
|
|
||||||
comparison_value = int(m.group('value'))
|
|
||||||
except ValueError:
|
|
||||||
comparison_value = parse_filesize(m.group('value'))
|
|
||||||
if comparison_value is None:
|
|
||||||
comparison_value = parse_filesize(m.group('value') + 'B')
|
|
||||||
if comparison_value is None:
|
|
||||||
raise ValueError(
|
|
||||||
'Invalid value %r in format specification %r' % (
|
|
||||||
m.group('value'), format_spec))
|
|
||||||
op = OPERATORS[m.group('op')]
|
|
||||||
|
|
||||||
def _filter(f):
|
def _filter(f):
|
||||||
actual_value = f.get(m.group('key'))
|
actual_value = f.get(m.group('key'))
|
||||||
if actual_value is None:
|
if actual_value is None:
|
||||||
@ -920,27 +961,9 @@ class YoutubeDL(object):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def _calc_cookies(self, info_dict):
|
def _calc_cookies(self, info_dict):
|
||||||
class _PseudoRequest(object):
|
pr = compat_urllib_request.Request(info_dict['url'])
|
||||||
def __init__(self, url):
|
|
||||||
self.url = url
|
|
||||||
self.headers = {}
|
|
||||||
self.unverifiable = False
|
|
||||||
|
|
||||||
def add_unredirected_header(self, k, v):
|
|
||||||
self.headers[k] = v
|
|
||||||
|
|
||||||
def get_full_url(self):
|
|
||||||
return self.url
|
|
||||||
|
|
||||||
def is_unverifiable(self):
|
|
||||||
return self.unverifiable
|
|
||||||
|
|
||||||
def has_header(self, h):
|
|
||||||
return h in self.headers
|
|
||||||
|
|
||||||
pr = _PseudoRequest(info_dict['url'])
|
|
||||||
self.cookiejar.add_cookie_header(pr)
|
self.cookiejar.add_cookie_header(pr)
|
||||||
return pr.headers.get('Cookie')
|
return pr.get_header('Cookie')
|
||||||
|
|
||||||
def process_video_result(self, info_dict, download=True):
|
def process_video_result(self, info_dict, download=True):
|
||||||
assert info_dict.get('_type', 'video') == 'video'
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
@ -964,9 +987,11 @@ class YoutubeDL(object):
|
|||||||
thumbnails.sort(key=lambda t: (
|
thumbnails.sort(key=lambda t: (
|
||||||
t.get('preference'), t.get('width'), t.get('height'),
|
t.get('preference'), t.get('width'), t.get('height'),
|
||||||
t.get('id'), t.get('url')))
|
t.get('id'), t.get('url')))
|
||||||
for t in thumbnails:
|
for i, t in enumerate(thumbnails):
|
||||||
if 'width' in t and 'height' in t:
|
if 'width' in t and 'height' in t:
|
||||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||||
|
if t.get('id') is None:
|
||||||
|
t['id'] = '%d' % i
|
||||||
|
|
||||||
if thumbnails and 'thumbnail' not in info_dict:
|
if thumbnails and 'thumbnail' not in info_dict:
|
||||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||||
@ -983,6 +1008,15 @@ class YoutubeDL(object):
|
|||||||
info_dict['timestamp'])
|
info_dict['timestamp'])
|
||||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||||
|
|
||||||
|
if self.params.get('listsubtitles', False):
|
||||||
|
if 'automatic_captions' in info_dict:
|
||||||
|
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
|
||||||
|
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
|
||||||
|
return
|
||||||
|
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||||
|
info_dict['id'], info_dict.get('subtitles'),
|
||||||
|
info_dict.get('automatic_captions'))
|
||||||
|
|
||||||
# This extractors handle format selection themselves
|
# This extractors handle format selection themselves
|
||||||
if info_dict['extractor'] in ['Youku']:
|
if info_dict['extractor'] in ['Youku']:
|
||||||
if download:
|
if download:
|
||||||
@ -1074,7 +1108,8 @@ class YoutubeDL(object):
|
|||||||
else self.params['merge_output_format'])
|
else self.params['merge_output_format'])
|
||||||
selected_format = {
|
selected_format = {
|
||||||
'requested_formats': formats_info,
|
'requested_formats': formats_info,
|
||||||
'format': rf,
|
'format': '%s+%s' % (formats_info[0].get('format'),
|
||||||
|
formats_info[1].get('format')),
|
||||||
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||||
formats_info[1].get('format_id')),
|
formats_info[1].get('format_id')),
|
||||||
'width': formats_info[0].get('width'),
|
'width': formats_info[0].get('width'),
|
||||||
@ -1110,6 +1145,55 @@ class YoutubeDL(object):
|
|||||||
info_dict.update(formats_to_download[-1])
|
info_dict.update(formats_to_download[-1])
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
||||||
|
"""Select the requested subtitles and their format"""
|
||||||
|
available_subs = {}
|
||||||
|
if normal_subtitles and self.params.get('writesubtitles'):
|
||||||
|
available_subs.update(normal_subtitles)
|
||||||
|
if automatic_captions and self.params.get('writeautomaticsub'):
|
||||||
|
for lang, cap_info in automatic_captions.items():
|
||||||
|
if lang not in available_subs:
|
||||||
|
available_subs[lang] = cap_info
|
||||||
|
|
||||||
|
if (not self.params.get('writesubtitles') and not
|
||||||
|
self.params.get('writeautomaticsub') or not
|
||||||
|
available_subs):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if self.params.get('allsubtitles', False):
|
||||||
|
requested_langs = available_subs.keys()
|
||||||
|
else:
|
||||||
|
if self.params.get('subtitleslangs', False):
|
||||||
|
requested_langs = self.params.get('subtitleslangs')
|
||||||
|
elif 'en' in available_subs:
|
||||||
|
requested_langs = ['en']
|
||||||
|
else:
|
||||||
|
requested_langs = [list(available_subs.keys())[0]]
|
||||||
|
|
||||||
|
formats_query = self.params.get('subtitlesformat', 'best')
|
||||||
|
formats_preference = formats_query.split('/') if formats_query else []
|
||||||
|
subs = {}
|
||||||
|
for lang in requested_langs:
|
||||||
|
formats = available_subs.get(lang)
|
||||||
|
if formats is None:
|
||||||
|
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
|
||||||
|
continue
|
||||||
|
for ext in formats_preference:
|
||||||
|
if ext == 'best':
|
||||||
|
f = formats[-1]
|
||||||
|
break
|
||||||
|
matches = list(filter(lambda f: f['ext'] == ext, formats))
|
||||||
|
if matches:
|
||||||
|
f = matches[-1]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
f = formats[-1]
|
||||||
|
self.report_warning(
|
||||||
|
'No subtitle format found matching "%s" for language %s, '
|
||||||
|
'using %s' % (formats_query, lang, f['ext']))
|
||||||
|
subs[lang] = f
|
||||||
|
return subs
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
|
|
||||||
@ -1130,7 +1214,7 @@ class YoutubeDL(object):
|
|||||||
if 'format' not in info_dict:
|
if 'format' not in info_dict:
|
||||||
info_dict['format'] = info_dict['ext']
|
info_dict['format'] = info_dict['ext']
|
||||||
|
|
||||||
reason = self._match_entry(info_dict)
|
reason = self._match_entry(info_dict, incomplete=False)
|
||||||
if reason is not None:
|
if reason is not None:
|
||||||
self.to_screen('[download] ' + reason)
|
self.to_screen('[download] ' + reason)
|
||||||
return
|
return
|
||||||
@ -1212,15 +1296,22 @@ class YoutubeDL(object):
|
|||||||
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
subtitles_are_requested = any([self.params.get('writesubtitles', False),
|
||||||
self.params.get('writeautomaticsub')])
|
self.params.get('writeautomaticsub')])
|
||||||
|
|
||||||
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
|
if subtitles_are_requested and info_dict.get('requested_subtitles'):
|
||||||
# subtitles download errors are already managed as troubles in relevant IE
|
# subtitles download errors are already managed as troubles in relevant IE
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
subtitles = info_dict['subtitles']
|
subtitles = info_dict['requested_subtitles']
|
||||||
sub_format = self.params.get('subtitlesformat', 'srt')
|
for sub_lang, sub_info in subtitles.items():
|
||||||
for sub_lang in subtitles.keys():
|
sub_format = sub_info['ext']
|
||||||
sub = subtitles[sub_lang]
|
if sub_info.get('data') is not None:
|
||||||
if sub is None:
|
sub_data = sub_info['data']
|
||||||
continue
|
else:
|
||||||
|
try:
|
||||||
|
uf = self.urlopen(sub_info['url'])
|
||||||
|
sub_data = uf.read().decode('utf-8')
|
||||||
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
|
(sub_lang, compat_str(err)))
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||||
@ -1228,7 +1319,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
|
||||||
subfile.write(sub)
|
subfile.write(sub_data)
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
return
|
return
|
||||||
@ -1261,7 +1352,7 @@ class YoutubeDL(object):
|
|||||||
downloaded = []
|
downloaded = []
|
||||||
success = True
|
success = True
|
||||||
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
||||||
if not merger._executable:
|
if not merger.available:
|
||||||
postprocessors = []
|
postprocessors = []
|
||||||
self.report_warning('You have requested multiple '
|
self.report_warning('You have requested multiple '
|
||||||
'formats but ffmpeg or avconv are not installed.'
|
'formats but ffmpeg or avconv are not installed.'
|
||||||
@ -1340,8 +1431,8 @@ class YoutubeDL(object):
|
|||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
if (len(url_list) > 1 and
|
if (len(url_list) > 1 and
|
||||||
'%' not in outtmpl
|
'%' not in outtmpl and
|
||||||
and self.params.get('max_downloads') != 1):
|
self.params.get('max_downloads') != 1):
|
||||||
raise SameFileError(outtmpl)
|
raise SameFileError(outtmpl)
|
||||||
|
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
@ -1508,30 +1599,18 @@ class YoutubeDL(object):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def list_formats(self, info_dict):
|
def list_formats(self, info_dict):
|
||||||
def line(format, idlen=20):
|
|
||||||
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
|
|
||||||
format['format_id'],
|
|
||||||
format['ext'],
|
|
||||||
self.format_resolution(format),
|
|
||||||
self._format_note(format),
|
|
||||||
))
|
|
||||||
|
|
||||||
formats = info_dict.get('formats', [info_dict])
|
formats = info_dict.get('formats', [info_dict])
|
||||||
idlen = max(len('format code'),
|
table = [
|
||||||
max(len(f['format_id']) for f in formats))
|
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
|
||||||
formats_s = [
|
for f in formats
|
||||||
line(f, idlen) for f in formats
|
|
||||||
if f.get('preference') is None or f['preference'] >= -1000]
|
if f.get('preference') is None or f['preference'] >= -1000]
|
||||||
if len(formats) > 1:
|
if len(formats) > 1:
|
||||||
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
|
||||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
|
||||||
|
|
||||||
header_line = line({
|
header_line = ['format code', 'extension', 'resolution', 'note']
|
||||||
'format_id': 'format code', 'ext': 'extension',
|
|
||||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[info] Available formats for %s:\n%s\n%s' %
|
'[info] Available formats for %s:\n%s' %
|
||||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
(info_dict['id'], render_table(header_line, table)))
|
||||||
|
|
||||||
def list_thumbnails(self, info_dict):
|
def list_thumbnails(self, info_dict):
|
||||||
thumbnails = info_dict.get('thumbnails')
|
thumbnails = info_dict.get('thumbnails')
|
||||||
@ -1550,6 +1629,17 @@ class YoutubeDL(object):
|
|||||||
['ID', 'width', 'height', 'URL'],
|
['ID', 'width', 'height', 'URL'],
|
||||||
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
||||||
|
|
||||||
|
def list_subtitles(self, video_id, subtitles, name='subtitles'):
|
||||||
|
if not subtitles:
|
||||||
|
self.to_screen('%s has no %s' % (video_id, name))
|
||||||
|
return
|
||||||
|
self.to_screen(
|
||||||
|
'Available %s for %s:' % (name, video_id))
|
||||||
|
self.to_screen(render_table(
|
||||||
|
['Language', 'formats'],
|
||||||
|
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
|
||||||
|
for lang, formats in subtitles.items()]))
|
||||||
|
|
||||||
def urlopen(self, req):
|
def urlopen(self, req):
|
||||||
""" Start an HTTP download """
|
""" Start an HTTP download """
|
||||||
|
|
||||||
@ -1611,7 +1701,7 @@ class YoutubeDL(object):
|
|||||||
self._write_string('[debug] Python version %s - %s\n' % (
|
self._write_string('[debug] Python version %s - %s\n' % (
|
||||||
platform.python_version(), platform_name()))
|
platform.python_version(), platform_name()))
|
||||||
|
|
||||||
exe_versions = FFmpegPostProcessor.get_versions()
|
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||||
exe_str = ', '.join(
|
exe_str = ', '.join(
|
||||||
'%s %s' % (exe, v)
|
'%s %s' % (exe, v)
|
||||||
|
@ -23,9 +23,10 @@ from .compat import (
|
|||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
DateRange,
|
DateRange,
|
||||||
DEFAULT_OUTTMPL,
|
|
||||||
decodeOption,
|
decodeOption,
|
||||||
|
DEFAULT_OUTTMPL,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
|
match_filter_func,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
@ -188,14 +189,14 @@ def _real_main(argv=None):
|
|||||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||||
if opts.outtmpl is not None:
|
if opts.outtmpl is not None:
|
||||||
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
||||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
|
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
|
||||||
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
|
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
|
||||||
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
|
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
|
||||||
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
|
(opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
|
||||||
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
|
(opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
|
||||||
or (opts.useid and '%(id)s.%(ext)s')
|
(opts.useid and '%(id)s.%(ext)s') or
|
||||||
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
|
(opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
|
||||||
or DEFAULT_OUTTMPL)
|
DEFAULT_OUTTMPL)
|
||||||
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||||
parser.error('Cannot download a video and extract audio into the same'
|
parser.error('Cannot download a video and extract audio into the same'
|
||||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||||
@ -225,7 +226,6 @@ def _real_main(argv=None):
|
|||||||
if opts.embedsubtitles:
|
if opts.embedsubtitles:
|
||||||
postprocessors.append({
|
postprocessors.append({
|
||||||
'key': 'FFmpegEmbedSubtitle',
|
'key': 'FFmpegEmbedSubtitle',
|
||||||
'subtitlesformat': opts.subtitlesformat,
|
|
||||||
})
|
})
|
||||||
if opts.xattrs:
|
if opts.xattrs:
|
||||||
postprocessors.append({'key': 'XAttrMetadata'})
|
postprocessors.append({'key': 'XAttrMetadata'})
|
||||||
@ -247,6 +247,9 @@ def _real_main(argv=None):
|
|||||||
xattr # Confuse flake8
|
xattr # Confuse flake8
|
||||||
except ImportError:
|
except ImportError:
|
||||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||||
|
match_filter = (
|
||||||
|
None if opts.match_filter is None
|
||||||
|
else match_filter_func(opts.match_filter))
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
@ -344,6 +347,10 @@ def _real_main(argv=None):
|
|||||||
'list_thumbnails': opts.list_thumbnails,
|
'list_thumbnails': opts.list_thumbnails,
|
||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||||
|
'match_filter': match_filter,
|
||||||
|
'no_color': opts.no_color,
|
||||||
|
'ffmpeg_location': opts.ffmpeg_location,
|
||||||
|
'hls_prefer_native': opts.hls_prefer_native,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
from math import ceil
|
from math import ceil
|
||||||
|
|
||||||
@ -329,3 +327,5 @@ def inc(data):
|
|||||||
data[i] = data[i] + 1
|
data[i] = data[i] + 1
|
||||||
break
|
break
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||||
|
@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}):
|
|||||||
if ed.supports(info_dict):
|
if ed.supports(info_dict):
|
||||||
return ed
|
return ed
|
||||||
|
|
||||||
|
if protocol == 'm3u8' and params.get('hls_prefer_native'):
|
||||||
|
return NativeHlsFD
|
||||||
|
|
||||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -54,6 +54,7 @@ class FileDownloader(object):
|
|||||||
self.ydl = ydl
|
self.ydl = ydl
|
||||||
self._progress_hooks = []
|
self._progress_hooks = []
|
||||||
self.params = params
|
self.params = params
|
||||||
|
self.add_progress_hook(self.report_progress)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_seconds(seconds):
|
def format_seconds(seconds):
|
||||||
@ -226,42 +227,64 @@ class FileDownloader(object):
|
|||||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||||
self.to_console_title('youtube-dl ' + msg)
|
self.to_console_title('youtube-dl ' + msg)
|
||||||
|
|
||||||
def report_progress(self, percent, data_len_str, speed, eta):
|
def report_progress(self, s):
|
||||||
"""Report download progress."""
|
if s['status'] == 'finished':
|
||||||
if self.params.get('noprogress', False):
|
if self.params.get('noprogress', False):
|
||||||
|
self.to_screen('[download] Download completed')
|
||||||
|
else:
|
||||||
|
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||||
|
if s.get('elapsed') is not None:
|
||||||
|
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||||
|
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
|
||||||
|
else:
|
||||||
|
msg_template = '100%% of %(_total_bytes_str)s'
|
||||||
|
self._report_progress_status(
|
||||||
|
msg_template % s, is_last_line=True)
|
||||||
|
|
||||||
|
if self.params.get('noprogress'):
|
||||||
return
|
return
|
||||||
if eta is not None:
|
|
||||||
eta_str = self.format_eta(eta)
|
|
||||||
else:
|
|
||||||
eta_str = 'Unknown ETA'
|
|
||||||
if percent is not None:
|
|
||||||
percent_str = self.format_percent(percent)
|
|
||||||
else:
|
|
||||||
percent_str = 'Unknown %'
|
|
||||||
speed_str = self.format_speed(speed)
|
|
||||||
|
|
||||||
msg = ('%s of %s at %s ETA %s' %
|
if s['status'] != 'downloading':
|
||||||
(percent_str, data_len_str, speed_str, eta_str))
|
|
||||||
self._report_progress_status(msg)
|
|
||||||
|
|
||||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
|
||||||
if self.params.get('noprogress', False):
|
|
||||||
return
|
return
|
||||||
downloaded_str = format_bytes(downloaded_data_len)
|
|
||||||
speed_str = self.format_speed(speed)
|
|
||||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
|
||||||
msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
|
||||||
self._report_progress_status(msg)
|
|
||||||
|
|
||||||
def report_finish(self, data_len_str, tot_time):
|
if s.get('eta') is not None:
|
||||||
"""Report download finished."""
|
s['_eta_str'] = self.format_eta(s['eta'])
|
||||||
if self.params.get('noprogress', False):
|
|
||||||
self.to_screen('[download] Download completed')
|
|
||||||
else:
|
else:
|
||||||
self._report_progress_status(
|
s['_eta_str'] = 'Unknown ETA'
|
||||||
('100%% of %s in %s' %
|
|
||||||
(data_len_str, self.format_seconds(tot_time))),
|
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
|
||||||
is_last_line=True)
|
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
|
||||||
|
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
|
||||||
|
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
|
||||||
|
else:
|
||||||
|
if s.get('downloaded_bytes') == 0:
|
||||||
|
s['_percent_str'] = self.format_percent(0)
|
||||||
|
else:
|
||||||
|
s['_percent_str'] = 'Unknown %'
|
||||||
|
|
||||||
|
if s.get('speed') is not None:
|
||||||
|
s['_speed_str'] = self.format_speed(s['speed'])
|
||||||
|
else:
|
||||||
|
s['_speed_str'] = 'Unknown speed'
|
||||||
|
|
||||||
|
if s.get('total_bytes') is not None:
|
||||||
|
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||||
|
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
elif s.get('total_bytes_estimate') is not None:
|
||||||
|
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
|
||||||
|
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
else:
|
||||||
|
if s.get('downloaded_bytes') is not None:
|
||||||
|
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
|
||||||
|
if s.get('elapsed'):
|
||||||
|
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||||
|
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
|
||||||
|
else:
|
||||||
|
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
|
||||||
|
else:
|
||||||
|
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
|
||||||
|
self._report_progress_status(msg_template % s)
|
||||||
|
|
||||||
def report_resuming_byte(self, resume_len):
|
def report_resuming_byte(self, resume_len):
|
||||||
"""Report attempt to resume at given byte."""
|
"""Report attempt to resume at given byte."""
|
||||||
@ -288,14 +311,14 @@ class FileDownloader(object):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
nooverwrites_and_exists = (
|
nooverwrites_and_exists = (
|
||||||
self.params.get('nooverwrites', False)
|
self.params.get('nooverwrites', False) and
|
||||||
and os.path.exists(encodeFilename(filename))
|
os.path.exists(encodeFilename(filename))
|
||||||
)
|
)
|
||||||
|
|
||||||
continuedl_and_exists = (
|
continuedl_and_exists = (
|
||||||
self.params.get('continuedl', False)
|
self.params.get('continuedl', False) and
|
||||||
and os.path.isfile(encodeFilename(filename))
|
os.path.isfile(encodeFilename(filename)) and
|
||||||
and not self.params.get('nopart', False)
|
not self.params.get('nopart', False)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check file already present
|
# Check file already present
|
||||||
|
@ -75,7 +75,7 @@ class ExternalFD(FileDownloader):
|
|||||||
|
|
||||||
class CurlFD(ExternalFD):
|
class CurlFD(ExternalFD):
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-o', tmpfilename]
|
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._source_address('--interface')
|
cmd += self._source_address('--interface')
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
@ -15,7 +15,6 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_pack,
|
struct_pack,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
format_bytes,
|
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
@ -252,17 +251,6 @@ class F4mFD(FileDownloader):
|
|||||||
requested_bitrate = info_dict.get('tbr')
|
requested_bitrate = info_dict.get('tbr')
|
||||||
self.to_screen('[download] Downloading f4m manifest')
|
self.to_screen('[download] Downloading f4m manifest')
|
||||||
manifest = self.ydl.urlopen(man_url).read()
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
self.report_destination(filename)
|
|
||||||
http_dl = HttpQuietDownloader(
|
|
||||||
self.ydl,
|
|
||||||
{
|
|
||||||
'continuedl': True,
|
|
||||||
'quiet': True,
|
|
||||||
'noprogress': True,
|
|
||||||
'ratelimit': self.params.get('ratelimit', None),
|
|
||||||
'test': self.params.get('test', False),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
doc = etree.fromstring(manifest)
|
doc = etree.fromstring(manifest)
|
||||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||||
@ -298,39 +286,65 @@ class F4mFD(FileDownloader):
|
|||||||
# For some akamai manifests we'll need to add a query to the fragment url
|
# For some akamai manifests we'll need to add a query to the fragment url
|
||||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||||
|
|
||||||
|
self.report_destination(filename)
|
||||||
|
http_dl = HttpQuietDownloader(
|
||||||
|
self.ydl,
|
||||||
|
{
|
||||||
|
'continuedl': True,
|
||||||
|
'quiet': True,
|
||||||
|
'noprogress': True,
|
||||||
|
'ratelimit': self.params.get('ratelimit', None),
|
||||||
|
'test': self.params.get('test', False),
|
||||||
|
}
|
||||||
|
)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||||
|
|
||||||
write_flv_header(dest_stream)
|
write_flv_header(dest_stream)
|
||||||
write_metadata_tag(dest_stream, metadata)
|
write_metadata_tag(dest_stream, metadata)
|
||||||
|
|
||||||
# This dict stores the download progress, it's updated by the progress
|
# This dict stores the download progress, it's updated by the progress
|
||||||
# hook
|
# hook
|
||||||
state = {
|
state = {
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': 0,
|
'downloaded_bytes': 0,
|
||||||
'frag_counter': 0,
|
'frag_index': 0,
|
||||||
|
'frag_count': total_frags,
|
||||||
|
'filename': filename,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
}
|
}
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
def frag_progress_hook(status):
|
def frag_progress_hook(s):
|
||||||
frag_total_bytes = status.get('total_bytes', 0)
|
if s['status'] not in ('downloading', 'finished'):
|
||||||
estimated_size = (state['downloaded_bytes'] +
|
return
|
||||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
|
||||||
if status['status'] == 'finished':
|
frag_total_bytes = s.get('total_bytes', 0)
|
||||||
|
if s['status'] == 'finished':
|
||||||
state['downloaded_bytes'] += frag_total_bytes
|
state['downloaded_bytes'] += frag_total_bytes
|
||||||
state['frag_counter'] += 1
|
state['frag_index'] += 1
|
||||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
|
||||||
byte_counter = state['downloaded_bytes']
|
estimated_size = (
|
||||||
|
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||||
|
(state['frag_index'] + 1) * total_frags)
|
||||||
|
time_now = time.time()
|
||||||
|
state['total_bytes_estimate'] = estimated_size
|
||||||
|
state['elapsed'] = time_now - start
|
||||||
|
|
||||||
|
if s['status'] == 'finished':
|
||||||
|
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||||
else:
|
else:
|
||||||
frag_downloaded_bytes = status['downloaded_bytes']
|
frag_downloaded_bytes = s['downloaded_bytes']
|
||||||
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
|
||||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||||
frag_total_bytes)
|
frag_total_bytes)
|
||||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||||
progress += frag_progress / float(total_frags)
|
progress += frag_progress / float(total_frags)
|
||||||
|
|
||||||
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
state['eta'] = self.calc_eta(
|
||||||
self.report_progress(progress, format_bytes(estimated_size),
|
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||||
status.get('speed'), eta)
|
state['speed'] = s.get('speed')
|
||||||
|
self._hook_progress(state)
|
||||||
|
|
||||||
http_dl.add_progress_hook(frag_progress_hook)
|
http_dl.add_progress_hook(frag_progress_hook)
|
||||||
|
|
||||||
frags_filenames = []
|
frags_filenames = []
|
||||||
@ -354,8 +368,8 @@ class F4mFD(FileDownloader):
|
|||||||
frags_filenames.append(frag_filename)
|
frags_filenames.append(frag_filename)
|
||||||
|
|
||||||
dest_stream.close()
|
dest_stream.close()
|
||||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
for frag_file in frags_filenames:
|
for frag_file in frags_filenames:
|
||||||
os.remove(frag_file)
|
os.remove(frag_file)
|
||||||
@ -366,6 +380,7 @@ class F4mFD(FileDownloader):
|
|||||||
'total_bytes': fsize,
|
'total_bytes': fsize,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'elapsed': elapsed,
|
||||||
})
|
})
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -23,15 +23,14 @@ class HlsFD(FileDownloader):
|
|||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
ffpp = FFmpegPostProcessor(downloader=self)
|
ffpp = FFmpegPostProcessor(downloader=self)
|
||||||
program = ffpp._executable
|
if not ffpp.available:
|
||||||
if program is None:
|
|
||||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||||
return False
|
return False
|
||||||
ffpp.check_version()
|
ffpp.check_version()
|
||||||
|
|
||||||
args = [
|
args = [
|
||||||
encodeArgument(opt)
|
encodeArgument(opt)
|
||||||
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||||
args.append(encodeFilename(tmpfilename, True))
|
args.append(encodeFilename(tmpfilename, True))
|
||||||
|
|
||||||
retval = subprocess.call(args)
|
retval = subprocess.call(args)
|
||||||
@ -48,7 +47,7 @@ class HlsFD(FileDownloader):
|
|||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self.to_stderr('\n')
|
self.to_stderr('\n')
|
||||||
self.report_error('%s exited with code %d' % (program, retval))
|
self.report_error('%s exited with code %d' % (ffpp.basename, retval))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
from socket import error as SocketError
|
|
||||||
import errno
|
import errno
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -15,7 +14,6 @@ from ..utils import (
|
|||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
format_bytes,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -102,7 +100,7 @@ class HttpFD(FileDownloader):
|
|||||||
resume_len = 0
|
resume_len = 0
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
break
|
break
|
||||||
except SocketError as e:
|
except socket.error as e:
|
||||||
if e.errno != errno.ECONNRESET:
|
if e.errno != errno.ECONNRESET:
|
||||||
# Connection reset is no problem, just retry
|
# Connection reset is no problem, just retry
|
||||||
raise
|
raise
|
||||||
@ -137,7 +135,6 @@ class HttpFD(FileDownloader):
|
|||||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
data_len_str = format_bytes(data_len)
|
|
||||||
byte_counter = 0 + resume_len
|
byte_counter = 0 + resume_len
|
||||||
block_size = self.params.get('buffersize', 1024)
|
block_size = self.params.get('buffersize', 1024)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
@ -196,20 +193,19 @@ class HttpFD(FileDownloader):
|
|||||||
# Progress message
|
# Progress message
|
||||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
||||||
if data_len is None:
|
if data_len is None:
|
||||||
eta = percent = None
|
eta = None
|
||||||
else:
|
else:
|
||||||
percent = self.calc_percent(byte_counter, data_len)
|
|
||||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
'total_bytes': data_len,
|
'total_bytes': data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
|
||||||
'eta': eta,
|
'eta': eta,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
|
'elapsed': now - start,
|
||||||
})
|
})
|
||||||
|
|
||||||
if is_test and byte_counter == data_len:
|
if is_test and byte_counter == data_len:
|
||||||
@ -221,7 +217,13 @@ class HttpFD(FileDownloader):
|
|||||||
return False
|
return False
|
||||||
if tmpfilename != '-':
|
if tmpfilename != '-':
|
||||||
stream.close()
|
stream.close()
|
||||||
self.report_finish(data_len_str, (time.time() - start))
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'status': 'error',
|
||||||
|
})
|
||||||
if data_len is not None and byte_counter != data_len:
|
if data_len is not None and byte_counter != data_len:
|
||||||
raise ContentTooShortError(byte_counter, int(data_len))
|
raise ContentTooShortError(byte_counter, int(data_len))
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
@ -235,6 +237,7 @@ class HttpFD(FileDownloader):
|
|||||||
'total_bytes': byte_counter,
|
'total_bytes': byte_counter,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'elapsed': time.time() - start,
|
||||||
})
|
})
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -11,7 +11,6 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
check_executable,
|
check_executable,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
format_bytes,
|
|
||||||
get_exe_version,
|
get_exe_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
|
|||||||
if not resume_percent:
|
if not resume_percent:
|
||||||
resume_percent = percent
|
resume_percent = percent
|
||||||
resume_downloaded_data_len = downloaded_data_len
|
resume_downloaded_data_len = downloaded_data_len
|
||||||
eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
|
time_now = time.time()
|
||||||
speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
|
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||||
|
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||||
data_len = None
|
data_len = None
|
||||||
if percent > 0:
|
if percent > 0:
|
||||||
data_len = int(downloaded_data_len * 100 / percent)
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
data_len_str = '~' + format_bytes(data_len)
|
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
|
||||||
cursor_in_new_line = False
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': downloaded_data_len,
|
'downloaded_bytes': downloaded_data_len,
|
||||||
'total_bytes': data_len,
|
'total_bytes_estimate': data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
|
||||||
'eta': eta,
|
'eta': eta,
|
||||||
|
'elapsed': time_now - start,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
})
|
})
|
||||||
|
cursor_in_new_line = False
|
||||||
else:
|
else:
|
||||||
# no percent for live streams
|
# no percent for live streams
|
||||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||||
@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
|
|||||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||||
time_now = time.time()
|
time_now = time.time()
|
||||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
|
||||||
cursor_in_new_line = False
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': downloaded_data_len,
|
'downloaded_bytes': downloaded_data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
'status': 'downloading',
|
||||||
|
'elapsed': time_now - start,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
})
|
})
|
||||||
|
cursor_in_new_line = False
|
||||||
elif self.params.get('verbose', False):
|
elif self.params.get('verbose', False):
|
||||||
if not cursor_in_new_line:
|
if not cursor_in_new_line:
|
||||||
self.to_screen('')
|
self.to_screen('')
|
||||||
|
@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE
|
|||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
from .adobetv import AdobeTVIE
|
from .adobetv import AdobeTVIE
|
||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
|
from .aftenposten import AftenpostenIE
|
||||||
from .aftonbladet import AftonbladetIE
|
from .aftonbladet import AftonbladetIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
@ -48,14 +49,24 @@ from .brightcove import BrightcoveIE
|
|||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
|
from .camdemy import (
|
||||||
|
CamdemyIE,
|
||||||
|
CamdemyFolderIE
|
||||||
|
)
|
||||||
from .canal13cl import Canal13clIE
|
from .canal13cl import Canal13clIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import CBSNewsIE
|
from .cbsnews import CBSNewsIE
|
||||||
|
from .cbssports import CBSSportsIE
|
||||||
|
from .ccc import CCCIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .chilloutzone import ChilloutzoneIE
|
from .chilloutzone import ChilloutzoneIE
|
||||||
|
from .chirbit import (
|
||||||
|
ChirbitIE,
|
||||||
|
ChirbitProfileIE,
|
||||||
|
)
|
||||||
from .cinchcast import CinchcastIE
|
from .cinchcast import CinchcastIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
@ -73,7 +84,7 @@ from .collegehumor import CollegeHumorIE
|
|||||||
from .collegerama import CollegeRamaIE
|
from .collegerama import CollegeRamaIE
|
||||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .comcarcoff import ComCarCoffIE
|
from .comcarcoff import ComCarCoffIE
|
||||||
from .commonmistakes import CommonMistakesIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
@ -115,6 +126,7 @@ from .ellentv import (
|
|||||||
EllenTVClipsIE,
|
EllenTVClipsIE,
|
||||||
)
|
)
|
||||||
from .elpais import ElPaisIE
|
from .elpais import ElPaisIE
|
||||||
|
from .embedly import EmbedlyIE
|
||||||
from .empflix import EMPFlixIE
|
from .empflix import EMPFlixIE
|
||||||
from .engadget import EngadgetIE
|
from .engadget import EngadgetIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
@ -183,6 +195,7 @@ from .hellporno import HellPornoIE
|
|||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
from .historicfilms import HistoricFilmsIE
|
from .historicfilms import HistoricFilmsIE
|
||||||
|
from .history import HistoryIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
from .hostingbulk import HostingBulkIE
|
from .hostingbulk import HostingBulkIE
|
||||||
@ -197,6 +210,7 @@ from .imdb import (
|
|||||||
ImdbIE,
|
ImdbIE,
|
||||||
ImdbListIE
|
ImdbListIE
|
||||||
)
|
)
|
||||||
|
from .imgur import ImgurIE
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE, InstagramUserIE
|
from .instagram import InstagramIE, InstagramUserIE
|
||||||
@ -275,6 +289,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
|
|||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
|
from .nationalgeographic import NationalGeographicIE
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import (
|
from .nbc import (
|
||||||
@ -285,6 +300,7 @@ from .ndr import NDRIE
|
|||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .netzkino import NetzkinoIE
|
from .netzkino import NetzkinoIE
|
||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
|
from .nerdist import NerdistIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nextmedia import (
|
from .nextmedia import (
|
||||||
@ -311,6 +327,8 @@ from .nowvideo import NowVideoIE
|
|||||||
from .npo import (
|
from .npo import (
|
||||||
NPOIE,
|
NPOIE,
|
||||||
NPOLiveIE,
|
NPOLiveIE,
|
||||||
|
NPORadioIE,
|
||||||
|
NPORadioFragmentIE,
|
||||||
TegenlichtVproIE,
|
TegenlichtVproIE,
|
||||||
)
|
)
|
||||||
from .nrk import (
|
from .nrk import (
|
||||||
@ -340,13 +358,17 @@ from .playfm import PlayFMIE
|
|||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import (
|
||||||
|
PornHubIE,
|
||||||
|
PornHubPlaylistIE,
|
||||||
|
)
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
from .promptfile import PromptFileIE
|
from .promptfile import PromptFileIE
|
||||||
from .prosiebensat1 import ProSiebenSat1IE
|
from .prosiebensat1 import ProSiebenSat1IE
|
||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
from .quickvid import QuickVidIE
|
from .quickvid import QuickVidIE
|
||||||
|
from .r7 import R7IE
|
||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
@ -361,7 +383,7 @@ from .rottentomatoes import RottenTomatoesIE
|
|||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
from .rtbf import RTBFIE
|
from .rtbf import RTBFIE
|
||||||
from .rte import RteIE
|
from .rte import RteIE
|
||||||
from .rtlnl import RtlXlIE
|
from .rtlnl import RtlNlIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
from .rtl2 import RTL2IE
|
from .rtl2 import RTL2IE
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
@ -376,6 +398,7 @@ from .rutube import (
|
|||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
|
from .sandia import SandiaIE
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
@ -406,7 +429,10 @@ from .soundcloud import (
|
|||||||
SoundcloudUserIE,
|
SoundcloudUserIE,
|
||||||
SoundcloudPlaylistIE
|
SoundcloudPlaylistIE
|
||||||
)
|
)
|
||||||
from .soundgasm import SoundgasmIE
|
from .soundgasm import (
|
||||||
|
SoundgasmIE,
|
||||||
|
SoundgasmProfileIE
|
||||||
|
)
|
||||||
from .southpark import (
|
from .southpark import (
|
||||||
SouthParkIE,
|
SouthParkIE,
|
||||||
SouthparkDeIE,
|
SouthparkDeIE,
|
||||||
@ -426,6 +452,7 @@ from .streamcloud import StreamcloudIE
|
|||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
from .streetvoice import StreetVoiceIE
|
from .streetvoice import StreetVoiceIE
|
||||||
from .sunporno import SunPornoIE
|
from .sunporno import SunPornoIE
|
||||||
|
from .svtplay import SVTPlayIE
|
||||||
from .swrmediathek import SWRMediathekIE
|
from .swrmediathek import SWRMediathekIE
|
||||||
from .syfy import SyfyIE
|
from .syfy import SyfyIE
|
||||||
from .sztvhu import SztvHuIE
|
from .sztvhu import SztvHuIE
|
||||||
@ -471,9 +498,11 @@ from .tumblr import TumblrIE
|
|||||||
from .tunein import TuneInIE
|
from .tunein import TuneInIE
|
||||||
from .turbo import TurboIE
|
from .turbo import TurboIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
|
from .tv4 import TV4IE
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE, TvpSeriesIE
|
from .tvp import TvpIE, TvpSeriesIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
|
from .tweakers import TweakersIE
|
||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
from .twitch import (
|
from .twitch import (
|
||||||
TwitchVideoIE,
|
TwitchVideoIE,
|
||||||
@ -553,6 +582,7 @@ from .wimp import WimpIE
|
|||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
from .wrzuta import WrzutaIE
|
from .wrzuta import WrzutaIE
|
||||||
|
from .wsj import WSJIE
|
||||||
from .xbef import XBefIE
|
from .xbef import XBefIE
|
||||||
from .xboxclips import XboxClipsIE
|
from .xboxclips import XboxClipsIE
|
||||||
from .xhamster import XHamsterIE
|
from .xhamster import XHamsterIE
|
||||||
@ -566,6 +596,7 @@ from .yahoo import (
|
|||||||
YahooIE,
|
YahooIE,
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
)
|
)
|
||||||
|
from .yam import YamIE
|
||||||
from .yesjapan import YesJapanIE
|
from .yesjapan import YesJapanIE
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
@ -589,6 +620,7 @@ from .youtube import (
|
|||||||
YoutubeUserIE,
|
YoutubeUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
|
from .zapiks import ZapiksIE
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
from .zingmp3 import (
|
from .zingmp3 import (
|
||||||
ZingMp3SongIE,
|
ZingMp3SongIE,
|
||||||
|
@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player = self._parse_json(
|
player = self._parse_json(
|
||||||
@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
|
|||||||
self._html_search_meta('datepublished', webpage, 'upload date'))
|
self._html_search_meta('datepublished', webpage, 'upload date'))
|
||||||
|
|
||||||
duration = parse_duration(
|
duration = parse_duration(
|
||||||
self._html_search_meta('duration', webpage, 'duration')
|
self._html_search_meta('duration', webpage, 'duration') or
|
||||||
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
|
self._search_regex(
|
||||||
|
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
||||||
|
@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||||
'title': 'Rick and Morty - Pilot',
|
'title': 'Rick and Morty - Pilot',
|
||||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||||
}
|
}
|
||||||
@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '-t8CamQlQ2aYZ49ItZCFog',
|
||||||
'title': 'American Dad - Putting Francine Out of Business',
|
'title': 'American Dad - Putting Francine Out of Business',
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||||
},
|
},
|
||||||
|
103
youtube_dl/extractor/aftenposten.py
Normal file
103
youtube_dl/extractor/aftenposten.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_with_ns,
|
||||||
|
xpath_text,
|
||||||
|
find_xpath_attr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AftenpostenIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
|
||||||
|
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '21039',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'TRAILER: "Sweatshop" - I can´t take any more',
|
||||||
|
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
|
||||||
|
'timestamp': 1416927969,
|
||||||
|
'upload_date': '20141125',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._html_search_regex(
|
||||||
|
r'data-xs-id="(\d+)"', webpage, 'video id')
|
||||||
|
|
||||||
|
data = self._download_xml(
|
||||||
|
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'atom': 'http://www.w3.org/2005/Atom',
|
||||||
|
'xt': 'http://xstream.dk/',
|
||||||
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
|
}
|
||||||
|
|
||||||
|
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
|
||||||
|
|
||||||
|
title = xpath_text(
|
||||||
|
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
|
||||||
|
description = xpath_text(
|
||||||
|
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
|
||||||
|
timestamp = parse_iso8601(xpath_text(
|
||||||
|
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
|
||||||
|
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
|
||||||
|
media_url = media_content.get('url')
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
tbr = int_or_none(media_content.get('bitrate'))
|
||||||
|
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
|
||||||
|
if mobj:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'play_path': 'mp4:%s' % mobj.group('playpath'),
|
||||||
|
'app': mobj.group('app'),
|
||||||
|
'ext': 'flv',
|
||||||
|
'tbr': tbr,
|
||||||
|
'format_id': 'rtmp-%d' % tbr,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': media_url,
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
link = find_xpath_attr(
|
||||||
|
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
|
||||||
|
if link is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': link.get('href'),
|
||||||
|
'format_id': link.get('rel'),
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'url': splash.get('url'),
|
||||||
|
'width': int_or_none(splash.get('width')),
|
||||||
|
'height': int_or_none(splash.get('height')),
|
||||||
|
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
}
|
@ -1,8 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -21,9 +19,7 @@ class AftonbladetIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.search(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_id = mobj.group('video_id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# find internal video meta data
|
# find internal video meta data
|
||||||
|
@ -20,6 +20,7 @@ class AparatIE(InfoExtractor):
|
|||||||
'id': 'wP8On',
|
'id': 'wP8On',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'تیم گلکسی 11 - زومیت',
|
'title': 'تیم گلکسی 11 - زومیت',
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
# 'skip': 'Extremely unreliable',
|
# 'skip': 'Extremely unreliable',
|
||||||
}
|
}
|
||||||
@ -34,7 +35,8 @@ class AparatIE(InfoExtractor):
|
|||||||
video_id + '/vt/frame')
|
video_id + '/vt/frame')
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
|
video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
|
||||||
|
r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
|
||||||
for i, video_url in enumerate(video_urls):
|
for i, video_url in enumerate(video_urls):
|
||||||
req = HEADRequest(video_url)
|
req = HEADRequest(video_url)
|
||||||
res = self._request_webpage(
|
res = self._request_webpage(
|
||||||
@ -46,7 +48,7 @@ class AparatIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -54,4 +56,5 @@ class AparatIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
}
|
}
|
||||||
|
@ -11,9 +11,12 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AppleTrailersIE(InfoExtractor):
|
class AppleTrailersIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'manofsteel',
|
||||||
|
},
|
||||||
"playlist": [
|
"playlist": [
|
||||||
{
|
{
|
||||||
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
||||||
@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import time
|
import time
|
||||||
import hmac
|
import hmac
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AtresPlayerIE(SubtitlesInfoExtractor):
|
class AtresPlayerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -144,13 +144,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
|
|||||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||||
if subtitle:
|
if subtitle_url:
|
||||||
subtitles['es'] = subtitle
|
subtitles['es'] = [{
|
||||||
|
'ext': 'srt',
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
'url': subtitle_url,
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
}]
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -159,5 +158,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
|
|||||||
'duration': int(info['length']),
|
'duration': int(info['length']),
|
||||||
'view_count': int(info['views_total']),
|
'view_count': int(info['views_total']),
|
||||||
'uploader': info['username'],
|
'uploader': info['username'],
|
||||||
'uploader_id': info['uid'],
|
'uploader_id': info['owner']['uid'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor):
|
|||||||
|
|
||||||
download_link = m_download.group(1)
|
download_link = m_download.group(1)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'var TralbumData = {.*?id: (?P<id>\d+),?$',
|
r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
|
||||||
webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
|
webpage, 'video id')
|
||||||
|
|
||||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||||
# We get the dictionary of the track from some javascript code
|
# We get the dictionary of the track from some javascript code
|
||||||
info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
|
all_info = self._parse_json(self._search_regex(
|
||||||
info = json.loads(info)[0]
|
r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
|
||||||
|
info = all_info[0]
|
||||||
# We pick mp3-320 for now, until format selection can be easily implemented.
|
# We pick mp3-320 for now, until format selection can be easily implemented.
|
||||||
mp3_info = info['downloads']['mp3-320']
|
mp3_info = info['downloads']['mp3-320']
|
||||||
# If we try to use this url it says the link has expired
|
# If we try to use this url it says the link has expired
|
||||||
initial_url = mp3_info['url']
|
initial_url = mp3_info['url']
|
||||||
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
|
m_url = re.match(
|
||||||
m_url = re.match(re_url, initial_url)
|
r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
|
||||||
|
initial_url)
|
||||||
# We build the url we will use to get the final track url
|
# We build the url we will use to get the final track url
|
||||||
# This url is build in Bandcamp in the script download_bunde_*.js
|
# This url is build in Bandcamp in the script download_bunde_*.js
|
||||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
||||||
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
|
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
|
||||||
# If we could correctly generate the .rand field the url would be
|
# If we could correctly generate the .rand field the url would be
|
||||||
# in the "download_url" key
|
# in the "download_url" key
|
||||||
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
|
final_url = self._search_regex(
|
||||||
|
r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -106,7 +109,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
@ -130,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Jazz Format Mixtape vol.1',
|
'title': 'Jazz Format Mixtape vol.1',
|
||||||
|
'id': 'jazz-format-mixtape-vol-1',
|
||||||
|
'uploader_id': 'blazo',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'playlistend': 2
|
'playlistend': 2
|
||||||
},
|
},
|
||||||
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
'skip': 'Bandcamp imposes download limits.'
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Hierophany of the Open Grave',
|
'title': 'Hierophany of the Open Grave',
|
||||||
|
'uploader_id': 'nightbringer',
|
||||||
|
'id': 'hierophany-of-the-open-grave',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 9,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://dotscale.bandcamp.com',
|
'url': 'http://dotscale.bandcamp.com',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Loom',
|
'title': 'Loom',
|
||||||
|
'id': 'dotscale',
|
||||||
|
'uploader_id': 'dotscale',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
playlist_id = mobj.group('subdomain')
|
uploader_id = mobj.group('subdomain')
|
||||||
title = mobj.group('title')
|
album_id = mobj.group('album_id')
|
||||||
display_id = title or playlist_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||||
if not tracks_paths:
|
if not tracks_paths:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
@ -165,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
'uploader_id': uploader_id,
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
@ -2,12 +2,12 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||||
@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
formats.extend(conn_formats)
|
formats.extend(conn_formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_captions(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for connection in self._extract_connections(media):
|
for connection in self._extract_connections(media):
|
||||||
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
|
||||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||||
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
|
||||||
srt = ''
|
srt = ''
|
||||||
|
|
||||||
|
def _extract_text(p):
|
||||||
|
if p.text is not None:
|
||||||
|
stripped_text = p.text.strip()
|
||||||
|
if stripped_text:
|
||||||
|
return stripped_text
|
||||||
|
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
|
||||||
for pos, p in enumerate(ps):
|
for pos, p in enumerate(ps):
|
||||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
|
||||||
p.text.strip() if p.text is not None else '')
|
subtitles[lang] = [
|
||||||
subtitles[lang] = srt
|
{
|
||||||
|
'url': connection.get('href'),
|
||||||
|
'ext': 'ttml',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'data': srt,
|
||||||
|
'ext': 'srt',
|
||||||
|
},
|
||||||
|
]
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _download_media_selector(self, programme_id):
|
def _download_media_selector(self, programme_id):
|
||||||
@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
elif kind == 'video':
|
elif kind == 'video':
|
||||||
formats.extend(self._extract_video(media, programme_id))
|
formats.extend(self._extract_video(media, programme_id))
|
||||||
elif kind == 'captions':
|
elif kind == 'captions':
|
||||||
subtitles = self._extract_captions(media, programme_id)
|
subtitles = self.extract_subtitles(media, programme_id)
|
||||||
|
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
@ -273,7 +288,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
return programme_id, title, description, duration, formats, subtitles
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# fallback to legacy playlist
|
# fallback to legacy playlist
|
||||||
@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(programme_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://beeg.com/5416503',
|
'url': 'http://beeg.com/5416503',
|
||||||
'md5': '634526ae978711f6b748fe0dd6c11f57',
|
'md5': '1bff67111adb785c51d1b42959ec10e5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5416503',
|
'id': '5416503',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -1,40 +1,35 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_start
|
from ..utils import (
|
||||||
|
remove_start,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlinkxIE(InfoExtractor):
|
class BlinkxIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||||
IE_NAME = 'blinkx'
|
IE_NAME = 'blinkx'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
|
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||||
'md5': '2e9a07364af40163a908edbf10bb2492',
|
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8aQUy7GV',
|
'id': 'Da0Gw3xc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Police Car Rolls Away',
|
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||||
'uploader': 'stupidvideos.com',
|
'uploader': 'IGN News',
|
||||||
'upload_date': '20131215',
|
'upload_date': '20150217',
|
||||||
'timestamp': 1387068000,
|
'timestamp': 1424215740,
|
||||||
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
|
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||||
'duration': 14.886,
|
'duration': 47.743333,
|
||||||
'thumbnails': [{
|
|
||||||
'width': 100,
|
|
||||||
'height': 76,
|
|
||||||
'resolution': '100x76',
|
|
||||||
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
|
|
||||||
}],
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, rl):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, rl)
|
video_id = self._match_id(url)
|
||||||
video_id = m.group('id')
|
|
||||||
display_id = video_id[:8]
|
display_id = video_id[:8]
|
||||||
|
|
||||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||||
@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
|
|||||||
elif m['type'] in ('flv', 'mp4'):
|
elif m['type'] in ('flv', 'mp4'):
|
||||||
vcodec = remove_start(m['vcodec'], 'ff')
|
vcodec = remove_start(m['vcodec'], 'ff')
|
||||||
acodec = remove_start(m['acodec'], 'ff')
|
acodec = remove_start(m['acodec'], 'ff')
|
||||||
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
|
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||||
|
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||||
|
tbr = vbr + abr if vbr and abr else None
|
||||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': m['link'],
|
'url': m['link'],
|
||||||
'vcodec': vcodec,
|
'vcodec': vcodec,
|
||||||
'acodec': acodec,
|
'acodec': acodec,
|
||||||
'abr': int(m['abr']) // 1000,
|
'abr': abr,
|
||||||
'vbr': int(m['vbr']) // 1000,
|
'vbr': vbr,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': int(m['w']),
|
'width': int_or_none(m.get('w')),
|
||||||
'height': int(m['h']),
|
'height': int_or_none(m.get('h')),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
@ -18,7 +17,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVIE(SubtitlesInfoExtractor):
|
class BlipTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
|
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
categories = [category.text for category in item.findall('category')]
|
categories = [category.text for category in item.findall('category')]
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles_urls = {}
|
||||||
|
|
||||||
media_group = item.find(media('group'))
|
media_group = item.find(media('group'))
|
||||||
for media_content in media_group.findall(media('content')):
|
for media_content in media_group.findall(media('content')):
|
||||||
@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
}
|
}
|
||||||
lang = role.rpartition('-')[-1].strip().lower()
|
lang = role.rpartition('-')[-1].strip().lower()
|
||||||
langcode = LANGS.get(lang, lang)
|
langcode = LANGS.get(lang, lang)
|
||||||
subtitles[langcode] = url
|
subtitles_urls[langcode] = url
|
||||||
elif media_type.startswith('video/'):
|
elif media_type.startswith('video/'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': real_url,
|
'url': real_url,
|
||||||
@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
# subtitles
|
subtitles = self.extract_subtitles(video_id, subtitles_urls)
|
||||||
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': video_subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _download_subtitle_url(self, sub_lang, url):
|
def _get_subtitles(self, video_id, subtitles_urls):
|
||||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
subtitles = {}
|
||||||
# when we request with a common UA
|
for lang, url in subtitles_urls.items():
|
||||||
req = compat_urllib_request.Request(url)
|
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||||
req.add_header('User-Agent', 'youtube-dl')
|
# when we request with a common UA
|
||||||
return self._download_webpage(req, None, note=False)
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('User-Agent', 'youtube-dl')
|
||||||
|
subtitles[lang] = [{
|
||||||
|
# The extension is 'srt' but it's actually an 'ass' file
|
||||||
|
'ext': 'ass',
|
||||||
|
'data': self._download_webpage(req, None, note=False),
|
||||||
|
}]
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
|
||||||
class BlipTVUserIE(InfoExtractor):
|
class BlipTVUserIE(InfoExtractor):
|
||||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class BloombergIE(InfoExtractor):
|
class BloombergIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||||
@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
name = self._match_id(url)
|
||||||
name = mobj.group('name')
|
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
|
|
||||||
f4m_url = self._search_regex(
|
f4m_url = self._search_regex(
|
||||||
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
||||||
'f4m url')
|
'f4m url')
|
||||||
|
@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Sealife',
|
'title': 'Sealife',
|
||||||
|
'id': '3550319591001',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
},
|
},
|
||||||
@ -108,7 +109,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
|
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
|
||||||
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
|
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
|
||||||
lambda m: m.group(1) + '/>', object_str)
|
lambda m: m.group(1) + '/>', object_str)
|
||||||
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
||||||
object_str = object_str.replace('<--', '<!--')
|
object_str = object_str.replace('<--', '<!--')
|
||||||
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
playlist_info = json_data['videoList']
|
playlist_info = json_data['videoList']
|
||||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||||
|
|
||||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||||
|
|
||||||
def _extract_video_info(self, video_info):
|
def _extract_video_info(self, video_info):
|
||||||
|
@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'skip_download': True, # Got enough YouTube download tests
|
'skip_download': True, # Got enough YouTube download tests
|
||||||
},
|
},
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'look-at-this-cute-dog-omg',
|
||||||
'description': 're:Munchkin the Teddy Bear is back ?!',
|
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||||
},
|
},
|
||||||
@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20141124',
|
'upload_date': '20141124',
|
||||||
'uploader_id': 'CindysMunchkin',
|
'uploader_id': 'CindysMunchkin',
|
||||||
'description': 're:© 2014 Munchkin the Shih Tzu',
|
'description': 're:© 2014 Munchkin the',
|
||||||
'uploader': 'Munchkin the Shih Tzu',
|
'uploader': 're:^Munchkin the',
|
||||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
153
youtube_dl/extractor/camdemy.py
Normal file
153
youtube_dl/extractor/camdemy.py
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
str_to_int,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CamdemyIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# single file
|
||||||
|
'url': 'http://www.camdemy.com/media/5181/',
|
||||||
|
'md5': '5a5562b6a98b37873119102e052e311b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5181',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': '',
|
||||||
|
'creator': 'ss11spring',
|
||||||
|
'upload_date': '20130114',
|
||||||
|
'timestamp': 1358154556,
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# With non-empty description
|
||||||
|
'url': 'http://www.camdemy.com/media/13885',
|
||||||
|
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '13885',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'EverCam + Camdemy QuickStart',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
||||||
|
'creator': 'evercam',
|
||||||
|
'upload_date': '20140620',
|
||||||
|
'timestamp': 1403271569,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# External source
|
||||||
|
'url': 'http://www.camdemy.com/media/14842',
|
||||||
|
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2vsYQzNIsJo',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20130211',
|
||||||
|
'uploader': 'Hun Kim',
|
||||||
|
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||||
|
'uploader_id': 'hunkimtutorials',
|
||||||
|
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
src_from = self._html_search_regex(
|
||||||
|
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
||||||
|
'external source', default=None)
|
||||||
|
if src_from:
|
||||||
|
return self.url_result(src_from)
|
||||||
|
|
||||||
|
oembed_obj = self._download_json(
|
||||||
|
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||||
|
|
||||||
|
thumb_url = oembed_obj['thumbnail_url']
|
||||||
|
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||||
|
file_list_doc = self._download_xml(
|
||||||
|
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||||
|
video_id, 'Filelist XML')
|
||||||
|
file_name = file_list_doc.find('./video/item/fileName').text
|
||||||
|
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||||
|
|
||||||
|
timestamp = parse_iso8601(self._html_search_regex(
|
||||||
|
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||||
|
page, 'creation time', fatal=False),
|
||||||
|
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||||
|
view_count = str_to_int(self._html_search_regex(
|
||||||
|
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||||
|
page, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': oembed_obj['title'],
|
||||||
|
'thumbnail': thumb_url,
|
||||||
|
'description': self._html_search_meta('description', page),
|
||||||
|
'creator': oembed_obj['author_name'],
|
||||||
|
'duration': oembed_obj['duration'],
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CamdemyFolderIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# links with trailing slash
|
||||||
|
'url': 'http://www.camdemy.com/folder/450',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '450',
|
||||||
|
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 145
|
||||||
|
}, {
|
||||||
|
# links without trailing slash
|
||||||
|
# and multi-page
|
||||||
|
'url': 'http://www.camdemy.com/folder/853',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '853',
|
||||||
|
'title': '科學計算 - 使用 Matlab'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20
|
||||||
|
}, {
|
||||||
|
# with displayMode parameter. For testing the codes to add parameters
|
||||||
|
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '853',
|
||||||
|
'title': '科學計算 - 使用 Matlab'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
folder_id = self._match_id(url)
|
||||||
|
|
||||||
|
# Add displayMode=list so that all links are displayed in a single page
|
||||||
|
parsed_url = list(compat_urlparse.urlparse(url))
|
||||||
|
query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
|
||||||
|
query.update({'displayMode': 'list'})
|
||||||
|
parsed_url[4] = compat_urllib_parse.urlencode(query)
|
||||||
|
final_url = compat_urlparse.urlunparse(parsed_url)
|
||||||
|
|
||||||
|
page = self._download_webpage(final_url, folder_id)
|
||||||
|
matches = re.findall(r"href='(/media/\d+/?)'", page)
|
||||||
|
|
||||||
|
entries = [self.url_result('http://www.camdemy.com' + media_path)
|
||||||
|
for media_path in matches]
|
||||||
|
|
||||||
|
folder_title = self._html_search_meta('keywords', page)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, folder_id, folder_title)
|
@ -15,12 +15,13 @@ from ..utils import (
|
|||||||
|
|
||||||
class CanalplusIE(InfoExtractor):
|
class CanalplusIE(InfoExtractor):
|
||||||
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
||||||
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
||||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
|
||||||
_SITE_ID_MAP = {
|
_SITE_ID_MAP = {
|
||||||
'canalplus.fr': 'cplus',
|
'canalplus.fr': 'cplus',
|
||||||
'piwiplus.fr': 'teletoon',
|
'piwiplus.fr': 'teletoon',
|
||||||
'd8.tv': 'd8',
|
'd8.tv': 'd8',
|
||||||
|
'itele.fr': 'itele',
|
||||||
}
|
}
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'upload_date': '20131108',
|
'upload_date': '20131108',
|
||||||
},
|
},
|
||||||
'skip': 'videos get deleted after a while',
|
'skip': 'videos get deleted after a while',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
|
||||||
|
'md5': '65aa83ad62fe107ce29e564bb8712580',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1213714',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
|
||||||
|
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
|
||||||
|
'upload_date': '20150211',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
real_id = self._search_regex(
|
real_id = self._search_regex(
|
||||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||||
|
30
youtube_dl/extractor/cbssports.py
Normal file
30
youtube_dl/extractor/cbssports.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_d5_GbO8p1sT',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'US Open flashbacks: 1990s',
|
||||||
|
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
section = mobj.group('section')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
all_videos = self._download_json(
|
||||||
|
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||||
|
video_id)
|
||||||
|
# The json file contains the info of all the videos in the section
|
||||||
|
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||||
|
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
99
youtube_dl/extractor/ccc.py
Normal file
99
youtube_dl/extractor/ccc.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
qualities,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CCCIE(InfoExtractor):
|
||||||
|
IE_NAME = 'media.ccc.de'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
|
||||||
|
'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20131228183',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Introduction to Processor Design',
|
||||||
|
'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'view_count': int,
|
||||||
|
'upload_date': '20131229',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if self._downloader.params.get('prefer_free_formats'):
|
||||||
|
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
|
||||||
|
else:
|
||||||
|
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r"(?s)<p class='description'>(.*?)</p>",
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
matches = re.finditer(r'''(?xs)
|
||||||
|
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
|
||||||
|
<a\s+href='(?P<http_url>[^']+)'>\s*
|
||||||
|
(?:
|
||||||
|
.*?
|
||||||
|
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
|
||||||
|
)?''', webpage)
|
||||||
|
formats = []
|
||||||
|
for m in matches:
|
||||||
|
format = m.group('format')
|
||||||
|
format_id = self._search_regex(
|
||||||
|
r'.*/([a-z0-9_-]+)/[^/]*$',
|
||||||
|
m.group('http_url'), 'format id', default=None)
|
||||||
|
vcodec = 'h264' if 'h264' in format_id else (
|
||||||
|
'none' if format_id in ('mp3', 'opus') else None
|
||||||
|
)
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'format': format,
|
||||||
|
'url': m.group('http_url'),
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'preference': preference(format_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
if m.group('torrent_url'):
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
|
||||||
|
'format': '%s (torrent)' % format,
|
||||||
|
'proto': 'torrent',
|
||||||
|
'format_note': '(unsupported; will just download the .torrent file)',
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'preference': -100 + preference(format_id),
|
||||||
|
'url': m.group('torrent_url'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
|||||||
subtitles = {}
|
subtitles = {}
|
||||||
subs = item.get('subtitles')
|
subs = item.get('subtitles')
|
||||||
if subs:
|
if subs:
|
||||||
subtitles['cs'] = subs[0]['url']
|
subtitles = self.extract_subtitles(episode_id, subs)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': episode_id,
|
'id': episode_id,
|
||||||
@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_subtitles(self, episode_id, subs):
|
||||||
|
original_subtitles = self._download_webpage(
|
||||||
|
subs[0]['url'], episode_id, 'Downloading subtitles')
|
||||||
|
srt_subs = self._fix_subtitles(original_subtitles)
|
||||||
|
return {
|
||||||
|
'cs': [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': srt_subs,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _fix_subtitles(subtitles):
|
def _fix_subtitles(subtitles):
|
||||||
""" Convert millisecond-based subtitles to SRT """
|
""" Convert millisecond-based subtitles to SRT """
|
||||||
if subtitles is None:
|
|
||||||
return subtitles # subtitles not requested
|
|
||||||
|
|
||||||
def _msectotimecode(msec):
|
def _msectotimecode(msec):
|
||||||
""" Helper utility to convert milliseconds to timecode """
|
""" Helper utility to convert milliseconds to timecode """
|
||||||
@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
yield line
|
yield line
|
||||||
|
|
||||||
fixed_subtitles = {}
|
return "\r\n".join(_fix_subtitle(subtitles))
|
||||||
for k, v in subtitles.items():
|
|
||||||
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
|
|
||||||
return fixed_subtitles
|
|
||||||
|
84
youtube_dl/extractor/chirbit.py
Normal file
84
youtube_dl/extractor/chirbit.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChirbitIE(InfoExtractor):
|
||||||
|
IE_NAME = 'chirbit'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://chirb.it/PrIPv5',
|
||||||
|
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PrIPv5',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Фасадстрой',
|
||||||
|
'duration': 52,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://chirb.it/%s' % audio_id, audio_id)
|
||||||
|
|
||||||
|
audio_url = self._search_regex(
|
||||||
|
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'itemprop="name">([^<]+)', webpage, 'title')
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration', fatal=False))
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'itemprop="playCount"\s*>(\d+)', webpage,
|
||||||
|
'listen count', fatal=False))
|
||||||
|
comment_count = int_or_none(self._search_regex(
|
||||||
|
r'>(\d+) Comments?:', webpage,
|
||||||
|
'comment count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'url': audio_url,
|
||||||
|
'title': title,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ChirbitProfileIE(InfoExtractor):
|
||||||
|
IE_NAME = 'chirbit:profile'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://chirbit.com/ScarletBeauty',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ScarletBeauty',
|
||||||
|
'title': 'Chirbits by ScarletBeauty',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
profile_id = self._match_id(url)
|
||||||
|
|
||||||
|
rss = self._download_xml(
|
||||||
|
'http://chirbit.com/rss/%s' % profile_id, profile_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(audio_url.text, 'Chirbit')
|
||||||
|
for audio_url in rss.findall('./channel/item/link')]
|
||||||
|
|
||||||
|
title = rss.find('./channel/title').text
|
||||||
|
|
||||||
|
return self.playlist_result(entries, profile_id, title)
|
@ -27,7 +27,6 @@ from ..utils import (
|
|||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@ -145,17 +144,25 @@ class InfoExtractor(object):
|
|||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
description: Full video description.
|
description: Full video description.
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
|
creator: The main artist who created the video.
|
||||||
timestamp: UNIX timestamp of the moment the video became available.
|
timestamp: UNIX timestamp of the moment the video became available.
|
||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
If not explicitly set, calculated from timestamp.
|
If not explicitly set, calculated from timestamp.
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
location: Physical location where the video was filmed.
|
location: Physical location where the video was filmed.
|
||||||
subtitles: The subtitle file contents as a dictionary in the format
|
subtitles: The available subtitles as a dictionary in the format
|
||||||
{language: subtitles}.
|
{language: subformats}. "subformats" is a list sorted from
|
||||||
|
lower to higher preference, each element is a dictionary
|
||||||
|
with the "ext" entry and one of:
|
||||||
|
* "data": The subtitles file contents
|
||||||
|
* "url": A url pointing to the subtitles file
|
||||||
|
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||||
|
automatically generated captions
|
||||||
duration: Length of the video in seconds, as an integer.
|
duration: Length of the video in seconds, as an integer.
|
||||||
view_count: How many users have watched the video on the platform.
|
view_count: How many users have watched the video on the platform.
|
||||||
like_count: Number of positive ratings of the video
|
like_count: Number of positive ratings of the video
|
||||||
dislike_count: Number of negative ratings of the video
|
dislike_count: Number of negative ratings of the video
|
||||||
|
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||||
comment_count: Number of comments on the video
|
comment_count: Number of comments on the video
|
||||||
comments: A list of comments, each with one or more of the following
|
comments: A list of comments, each with one or more of the following
|
||||||
properties (all but one of text or html optional):
|
properties (all but one of text or html optional):
|
||||||
@ -263,8 +270,15 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def extract(self, url):
|
def extract(self, url):
|
||||||
"""Extracts URL information and returns it in list of dicts."""
|
"""Extracts URL information and returns it in list of dicts."""
|
||||||
self.initialize()
|
try:
|
||||||
return self._real_extract(url)
|
self.initialize()
|
||||||
|
return self._real_extract(url)
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
|
except compat_http_client.IncompleteRead as e:
|
||||||
|
raise ExtractorError('A network error has occured.', cause=e, expected=True)
|
||||||
|
except (KeyError, StopIteration) as e:
|
||||||
|
raise ExtractorError('An extractor error has occured.', cause=e)
|
||||||
|
|
||||||
def set_downloader(self, downloader):
|
def set_downloader(self, downloader):
|
||||||
"""Sets the downloader for this IE."""
|
"""Sets the downloader for this IE."""
|
||||||
@ -383,6 +397,16 @@ class InfoExtractor(object):
|
|||||||
if blocked_iframe:
|
if blocked_iframe:
|
||||||
msg += ' Visit %s for more details' % blocked_iframe
|
msg += ' Visit %s for more details' % blocked_iframe
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
if '<title>The URL you requested has been blocked</title>' in content[:512]:
|
||||||
|
msg = (
|
||||||
|
'Access to this webpage has been blocked by Indian censorship. '
|
||||||
|
'Use a VPN or proxy server (with --proxy) to route around it.')
|
||||||
|
block_msg = self._html_search_regex(
|
||||||
|
r'</h1><p>(.*?)</p>',
|
||||||
|
content, 'block message', default=None)
|
||||||
|
if block_msg:
|
||||||
|
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
@ -506,7 +530,7 @@ class InfoExtractor(object):
|
|||||||
if mobj:
|
if mobj:
|
||||||
break
|
break
|
||||||
|
|
||||||
if os.name != 'nt' and sys.stderr.isatty():
|
if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
|
||||||
_name = '\033[0;34m%s\033[0m' % name
|
_name = '\033[0;34m%s\033[0m' % name
|
||||||
else:
|
else:
|
||||||
_name = name
|
_name = name
|
||||||
@ -655,6 +679,21 @@ class InfoExtractor(object):
|
|||||||
}
|
}
|
||||||
return RATING_TABLE.get(rating.lower(), None)
|
return RATING_TABLE.get(rating.lower(), None)
|
||||||
|
|
||||||
|
def _family_friendly_search(self, html):
|
||||||
|
# See http://schema.org/VideoObject
|
||||||
|
family_friendly = self._html_search_meta('isFamilyFriendly', html)
|
||||||
|
|
||||||
|
if not family_friendly:
|
||||||
|
return None
|
||||||
|
|
||||||
|
RATING_TABLE = {
|
||||||
|
'1': 0,
|
||||||
|
'true': 0,
|
||||||
|
'0': 18,
|
||||||
|
'false': 18,
|
||||||
|
}
|
||||||
|
return RATING_TABLE.get(family_friendly.lower(), None)
|
||||||
|
|
||||||
def _twitter_search_player(self, html):
|
def _twitter_search_player(self, html):
|
||||||
return self._html_search_meta('twitter:player', html,
|
return self._html_search_meta('twitter:player', html,
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
@ -704,15 +743,15 @@ class InfoExtractor(object):
|
|||||||
preference,
|
preference,
|
||||||
f.get('language_preference') if f.get('language_preference') is not None else -1,
|
f.get('language_preference') if f.get('language_preference') is not None else -1,
|
||||||
f.get('quality') if f.get('quality') is not None else -1,
|
f.get('quality') if f.get('quality') is not None else -1,
|
||||||
|
f.get('tbr') if f.get('tbr') is not None else -1,
|
||||||
|
f.get('filesize') if f.get('filesize') is not None else -1,
|
||||||
|
f.get('vbr') if f.get('vbr') is not None else -1,
|
||||||
f.get('height') if f.get('height') is not None else -1,
|
f.get('height') if f.get('height') is not None else -1,
|
||||||
f.get('width') if f.get('width') is not None else -1,
|
f.get('width') if f.get('width') is not None else -1,
|
||||||
ext_preference,
|
ext_preference,
|
||||||
f.get('tbr') if f.get('tbr') is not None else -1,
|
|
||||||
f.get('vbr') if f.get('vbr') is not None else -1,
|
|
||||||
f.get('abr') if f.get('abr') is not None else -1,
|
f.get('abr') if f.get('abr') is not None else -1,
|
||||||
audio_ext_preference,
|
audio_ext_preference,
|
||||||
f.get('fps') if f.get('fps') is not None else -1,
|
f.get('fps') if f.get('fps') is not None else -1,
|
||||||
f.get('filesize') if f.get('filesize') is not None else -1,
|
|
||||||
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
|
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
|
||||||
f.get('source_preference') if f.get('source_preference') is not None else -1,
|
f.get('source_preference') if f.get('source_preference') is not None else -1,
|
||||||
f.get('format_id'),
|
f.get('format_id'),
|
||||||
@ -729,9 +768,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _is_valid_url(self, url, video_id, item='video'):
|
def _is_valid_url(self, url, video_id, item='video'):
|
||||||
try:
|
try:
|
||||||
self._request_webpage(
|
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||||
HEADRequest(url), video_id,
|
|
||||||
'Checking %s URL' % item)
|
|
||||||
return True
|
return True
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError):
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
@ -764,7 +801,7 @@ class InfoExtractor(object):
|
|||||||
self.to_screen(msg)
|
self.to_screen(msg)
|
||||||
time.sleep(timeout)
|
time.sleep(timeout)
|
||||||
|
|
||||||
def _extract_f4m_formats(self, manifest_url, video_id):
|
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
|
||||||
manifest = self._download_xml(
|
manifest = self._download_xml(
|
||||||
manifest_url, video_id, 'Downloading f4m manifest',
|
manifest_url, video_id, 'Downloading f4m manifest',
|
||||||
'Unable to download f4m manifest')
|
'Unable to download f4m manifest')
|
||||||
@ -777,30 +814,32 @@ class InfoExtractor(object):
|
|||||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||||
for i, media_el in enumerate(media_nodes):
|
for i, media_el in enumerate(media_nodes):
|
||||||
if manifest_version == '2.0':
|
if manifest_version == '2.0':
|
||||||
manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
|
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
|
||||||
|
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||||
format_id = 'f4m-%d' % (i if tbr is None else tbr)
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': int_or_none(media_el.attrib.get('width')),
|
'width': int_or_none(media_el.attrib.get('width')),
|
||||||
'height': int_or_none(media_el.attrib.get('height')),
|
'height': int_or_none(media_el.attrib.get('height')),
|
||||||
|
'preference': preference,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
entry_protocol='m3u8', preference=None):
|
entry_protocol='m3u8', preference=None,
|
||||||
|
m3u8_id=None):
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'm3u8-meta',
|
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
|
||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
'preference': -1,
|
'preference': preference - 1 if preference else -1,
|
||||||
'resolution': 'multiple',
|
'resolution': 'multiple',
|
||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}]
|
}]
|
||||||
@ -815,6 +854,7 @@ class InfoExtractor(object):
|
|||||||
note='Downloading m3u8 information',
|
note='Downloading m3u8 information',
|
||||||
errnote='Failed to download m3u8 information')
|
errnote='Failed to download m3u8 information')
|
||||||
last_info = None
|
last_info = None
|
||||||
|
last_media = None
|
||||||
kv_rex = re.compile(
|
kv_rex = re.compile(
|
||||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||||
for line in m3u8_doc.splitlines():
|
for line in m3u8_doc.splitlines():
|
||||||
@ -825,6 +865,13 @@ class InfoExtractor(object):
|
|||||||
if v.startswith('"'):
|
if v.startswith('"'):
|
||||||
v = v[1:-1]
|
v = v[1:-1]
|
||||||
last_info[m.group('key')] = v
|
last_info[m.group('key')] = v
|
||||||
|
elif line.startswith('#EXT-X-MEDIA:'):
|
||||||
|
last_media = {}
|
||||||
|
for m in kv_rex.finditer(line):
|
||||||
|
v = m.group('val')
|
||||||
|
if v.startswith('"'):
|
||||||
|
v = v[1:-1]
|
||||||
|
last_media[m.group('key')] = v
|
||||||
elif line.startswith('#') or not line.strip():
|
elif line.startswith('#') or not line.strip():
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@ -832,9 +879,8 @@ class InfoExtractor(object):
|
|||||||
formats.append({'url': format_url(line)})
|
formats.append({'url': format_url(line)})
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||||
|
|
||||||
f = {
|
f = {
|
||||||
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
|
||||||
'url': format_url(line.strip()),
|
'url': format_url(line.strip()),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
@ -854,16 +900,22 @@ class InfoExtractor(object):
|
|||||||
width_str, height_str = resolution.split('x')
|
width_str, height_str = resolution.split('x')
|
||||||
f['width'] = int(width_str)
|
f['width'] = int(width_str)
|
||||||
f['height'] = int(height_str)
|
f['height'] = int(height_str)
|
||||||
|
if last_media is not None:
|
||||||
|
f['m3u8_media'] = last_media
|
||||||
|
last_media = None
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
last_info = {}
|
last_info = {}
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
# TODO: improve extraction
|
# TODO: improve extraction
|
||||||
def _extract_smil_formats(self, smil_url, video_id):
|
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
||||||
smil = self._download_xml(
|
smil = self._download_xml(
|
||||||
smil_url, video_id, 'Downloading SMIL file',
|
smil_url, video_id, 'Downloading SMIL file',
|
||||||
'Unable to download SMIL file')
|
'Unable to download SMIL file', fatal=fatal)
|
||||||
|
if smil is False:
|
||||||
|
assert not fatal
|
||||||
|
return []
|
||||||
|
|
||||||
base = smil.find('./head/meta').get('base')
|
base = smil.find('./head/meta').get('base')
|
||||||
|
|
||||||
@ -965,6 +1017,24 @@ class InfoExtractor(object):
|
|||||||
any_restricted = any_restricted or is_restricted
|
any_restricted = any_restricted or is_restricted
|
||||||
return not any_restricted
|
return not any_restricted
|
||||||
|
|
||||||
|
def extract_subtitles(self, *args, **kwargs):
|
||||||
|
if (self._downloader.params.get('writesubtitles', False) or
|
||||||
|
self._downloader.params.get('listsubtitles')):
|
||||||
|
return self._get_subtitles(*args, **kwargs)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _get_subtitles(self, *args, **kwargs):
|
||||||
|
raise NotImplementedError("This method must be implemented by subclasses")
|
||||||
|
|
||||||
|
def extract_automatic_captions(self, *args, **kwargs):
|
||||||
|
if (self._downloader.params.get('writeautomaticsub', False) or
|
||||||
|
self._downloader.params.get('listsubtitles')):
|
||||||
|
return self._get_automatic_captions(*args, **kwargs)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _get_automatic_captions(self, *args, **kwargs):
|
||||||
|
raise NotImplementedError("This method must be implemented by subclasses")
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@ -24,6 +24,23 @@ class CommonMistakesIE(InfoExtractor):
|
|||||||
'That doesn\'t make any sense. '
|
'That doesn\'t make any sense. '
|
||||||
'Simply remove the parameter in your command or configuration.'
|
'Simply remove the parameter in your command or configuration.'
|
||||||
) % url
|
) % url
|
||||||
if self._downloader.params.get('verbose'):
|
if not self._downloader.params.get('verbose'):
|
||||||
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
|
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
|
||||||
|
class UnicodeBOMIE(InfoExtractor):
|
||||||
|
IE_DESC = False
|
||||||
|
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
real_url = self._match_id(url)
|
||||||
|
self.report_warning(
|
||||||
|
'Your URL starts with a Byte Order Mark (BOM). '
|
||||||
|
'Removing the BOM and looking for "%s" ...' % real_url)
|
||||||
|
return self.url_result(real_url)
|
||||||
|
@ -9,7 +9,7 @@ import xml.etree.ElementTree
|
|||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from math import pow, sqrt, floor
|
from math import pow, sqrt, floor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@ -25,10 +25,9 @@ from ..aes import (
|
|||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
inc,
|
inc,
|
||||||
)
|
)
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
class CrunchyrollIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def _get_subtitles(self, video_id, webpage):
|
||||||
|
subtitles = {}
|
||||||
|
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||||
|
sub_page = self._download_webpage(
|
||||||
|
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
||||||
|
video_id, note='Downloading subtitles for ' + sub_name)
|
||||||
|
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||||
|
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||||
|
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||||
|
if not id or not iv or not data:
|
||||||
|
continue
|
||||||
|
id = int(id)
|
||||||
|
iv = base64.b64decode(iv)
|
||||||
|
data = base64.b64decode(data)
|
||||||
|
|
||||||
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||||
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||||
|
if not lang_code:
|
||||||
|
continue
|
||||||
|
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
||||||
|
subtitles[lang_code] = [
|
||||||
|
{
|
||||||
|
'ext': 'srt',
|
||||||
|
'data': self._convert_subtitles_to_srt(sub_root),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'ext': 'ass',
|
||||||
|
'data': self._convert_subtitles_to_ass(sub_root),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
return subtitles
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'format_id': video_format,
|
'format_id': video_format,
|
||||||
})
|
})
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
|
||||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
|
||||||
sub_page = self._download_webpage(
|
|
||||||
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
|
|
||||||
video_id, note='Downloading subtitles for ' + sub_name)
|
|
||||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
|
||||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
|
||||||
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
|
||||||
if not id or not iv or not data:
|
|
||||||
continue
|
|
||||||
id = int(id)
|
|
||||||
iv = base64.b64decode(iv)
|
|
||||||
data = base64.b64decode(data)
|
|
||||||
|
|
||||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
|
||||||
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
|
||||||
if not lang_code:
|
|
||||||
continue
|
|
||||||
sub_root = xml.etree.ElementTree.fromstring(subtitle)
|
|
||||||
if sub_format == 'ass':
|
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
|
|
||||||
else:
|
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
|
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -6,7 +6,6 @@ import json
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||||||
return request
|
return request
|
||||||
|
|
||||||
|
|
||||||
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
"""Information Extractor for Dailymotion"""
|
"""Information Extractor for Dailymotion"""
|
||||||
|
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||||
@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, webpage)
|
|
||||||
return
|
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
r'video_views_count[^>]+>\s+([\d\.,]+)',
|
||||||
@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
sub_list = self._download_webpage(
|
sub_list = self._download_webpage(
|
||||||
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
|
||||||
@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
return {}
|
return {}
|
||||||
info = json.loads(sub_list)
|
info = json.loads(sub_list)
|
||||||
if (info['total'] > 0):
|
if (info['total'] > 0):
|
||||||
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
|
sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
|
||||||
return sub_lang_list
|
return sub_lang_list
|
||||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||||
return {}
|
return {}
|
||||||
@ -194,6 +190,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'SPORT',
|
'title': 'SPORT',
|
||||||
|
'id': 'xv4bw_nqtv_sport',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 20,
|
||||||
}]
|
}]
|
||||||
|
@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
|
|||||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
|
|||||||
r"flashvars.pvg_id=\"(\d+)\";",
|
r"flashvars.pvg_id=\"(\d+)\";",
|
||||||
webpage, 'ID')
|
webpage, 'ID')
|
||||||
|
|
||||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
json_url = (
|
||||||
+ video_id)
|
'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
|
||||||
|
video_id)
|
||||||
info = self._download_json(json_url, title, 'Downloading JSON config')
|
info = self._download_json(json_url, title, 'Downloading JSON config')
|
||||||
video_url = info['renditions'][0]['url']
|
video_url = info['renditions'][0]['url']
|
||||||
|
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DotsubIE(InfoExtractor):
|
class DotsubIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||||
'md5': '0914d4d69605090f623b7ac329fea66e',
|
'md5': '0914d4d69605090f623b7ac329fea66e',
|
||||||
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
|
|||||||
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
||||||
|
'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
|
||||||
|
'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||||
|
'duration': 3169,
|
||||||
'uploader': '4v4l0n42',
|
'uploader': '4v4l0n42',
|
||||||
'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
'timestamp': 1292248482.625,
|
||||||
'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
|
||||||
'upload_date': '20101213',
|
'upload_date': '20101213',
|
||||||
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
|
info = self._download_json(
|
||||||
info = self._download_json(info_url, video_id)
|
'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
|
||||||
date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
|
video_url = info.get('mediaURI')
|
||||||
|
|
||||||
|
if not video_url:
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': info['mediaURI'],
|
'url': video_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'thumbnail': info['screenshotURI'],
|
'description': info.get('description'),
|
||||||
'description': info['description'],
|
'thumbnail': info.get('screenshotURI'),
|
||||||
'uploader': info['user'],
|
'duration': int_or_none(info.get('duration'), 1000),
|
||||||
'view_count': info['numberOfViews'],
|
'uploader': info.get('user'),
|
||||||
'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
'timestamp': float_or_none(info.get('dateCreated'), 1000),
|
||||||
|
'view_count': int_or_none(info.get('numberOfViews')),
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
|
|||||||
'id': '1740434',
|
'id': '1740434',
|
||||||
'display_id': 'hot-perky-blonde-naked-golf',
|
'display_id': 'hot-perky-blonde-naked-golf',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hot Perky Blonde Naked Golf',
|
'title': 'hot perky blonde naked golf',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
|
|||||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
|
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'poster="([^"]+)"',
|
r'poster="([^"]+)"',
|
||||||
|
@ -1,11 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor, ExtractorError
|
||||||
from .common import ExtractorError
|
|
||||||
from ..utils import parse_iso8601
|
from ..utils import parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
class DRTVIE(SubtitlesInfoExtractor):
|
class DRTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
}
|
}
|
||||||
for subs in subtitles_list:
|
for subs in subtitles_list:
|
||||||
lang = subs['Language']
|
lang = subs['Language']
|
||||||
subtitles[LANGS.get(lang, lang)] = subs['Uri']
|
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
|
||||||
|
|
||||||
if not formats and restricted_to_denmark:
|
if not formats and restricted_to_denmark:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
16
youtube_dl/extractor/embedly.py
Normal file
16
youtube_dl/extractor/embedly.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
|
|
||||||
|
|
||||||
|
class EmbedlyIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
|
@ -1,18 +1,17 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EscapistIE(InfoExtractor):
|
class EscapistIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
|
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||||
@ -20,31 +19,37 @@ class EscapistIE(InfoExtractor):
|
|||||||
'id': '6618',
|
'id': '6618',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||||
'uploader': 'the-escapist-presents',
|
'uploader_id': 'the-escapist-presents',
|
||||||
|
'uploader': 'The Escapist Presents',
|
||||||
'title': "Breaking Down Baldur's Gate",
|
'title': "Breaking Down Baldur's Gate",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
showName = mobj.group('showname')
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
videoDesc = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'<meta name="description" content="([^"]*)"',
|
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
|
||||||
webpage, 'description', fatal=False)
|
webpage, 'uploader ID', fatal=False)
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r"<h1\s+class='headline'>(.*?)</a>",
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
description = self._html_search_meta('description', webpage)
|
||||||
|
|
||||||
playerUrl = self._og_search_video_url(webpage, name='player URL')
|
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
||||||
|
title = raw_title.partition(' : ')[2]
|
||||||
|
|
||||||
title = self._html_search_regex(
|
config_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||||
r'<meta name="title" content="([^"]*)"',
|
r'''(?x)
|
||||||
webpage, 'title').split(' : ')[-1]
|
(?:
|
||||||
|
<param\s+name="flashvars"\s+value="config=|
|
||||||
configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
|
flashvars="config=
|
||||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
)
|
||||||
|
([^"&]+)
|
||||||
|
''',
|
||||||
|
webpage, 'config URL'))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
@ -53,18 +58,21 @@ class EscapistIE(InfoExtractor):
|
|||||||
cfgurl, video_id,
|
cfgurl, video_id,
|
||||||
'Downloading ' + name + ' configuration',
|
'Downloading ' + name + ' configuration',
|
||||||
'Unable to download ' + name + ' configuration',
|
'Unable to download ' + name + ' configuration',
|
||||||
transform_source=lambda s: s.replace("'", '"'))
|
transform_source=js_to_json)
|
||||||
|
|
||||||
playlist = config['playlist']
|
playlist = config['playlist']
|
||||||
|
video_url = next(
|
||||||
|
p['url'] for p in playlist
|
||||||
|
if p.get('eventCategory') == 'Video')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': playlist[1]['url'],
|
'url': video_url,
|
||||||
'format_id': name,
|
'format_id': name,
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
})
|
})
|
||||||
|
|
||||||
_add_format('normal', configUrl, quality=0)
|
_add_format('normal', config_url, quality=0)
|
||||||
hq_url = (configUrl +
|
hq_url = (config_url +
|
||||||
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
('&hq=1' if '?' in config_url else config_url + '?hq=1'))
|
||||||
try:
|
try:
|
||||||
_add_format('hq', hq_url, quality=1)
|
_add_format('hq', hq_url, quality=1)
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
@ -75,9 +83,9 @@ class EscapistIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': showName,
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': videoDesc,
|
'description': description,
|
||||||
'player_url': playerUrl,
|
|
||||||
}
|
}
|
||||||
|
@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
|
|||||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||||
params = json.loads(params_raw)
|
params = json.loads(params_raw)
|
||||||
video_data = params['video_data'][0]
|
video_data = params['video_data'][0]
|
||||||
video_url = video_data.get('hd_src')
|
|
||||||
if not video_url:
|
formats = []
|
||||||
video_url = video_data['sd_src']
|
for quality in ['sd', 'hd']:
|
||||||
if not video_url:
|
src = video_data.get('%s_src' % quality)
|
||||||
raise ExtractorError('Cannot find video URL')
|
if src is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': quality,
|
||||||
|
'url': src,
|
||||||
|
})
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('Cannot find video formats')
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
|
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
|
||||||
@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'duration': int_or_none(video_data.get('video_duration')),
|
'duration': int_or_none(video_data.get('video_duration')),
|
||||||
'thumbnail': video_data.get('thumbnail_src'),
|
'thumbnail': video_data.get('thumbnail_src'),
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
page = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_meta('twitter:title', page, 'title')
|
|
||||||
|
title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
|
||||||
description = self._html_search_meta('twitter:description', page, 'title')
|
description = self._html_search_meta('twitter:description', page, 'title')
|
||||||
|
|
||||||
data = self._download_xml(
|
data = self._download_xml(
|
||||||
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
|
|||||||
'height': int(details.find('./height').text.strip()),
|
'height': int(details.find('./height').text.strip()),
|
||||||
} for details in item.findall('./source/file_details') if details.find('./file').text
|
} for details in item.findall('./source/file_details') if details.find('./file').text
|
||||||
]
|
]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -1,52 +1,71 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class FirstTVIE(InfoExtractor):
|
class FirstTVIE(InfoExtractor):
|
||||||
IE_NAME = 'firsttv'
|
IE_NAME = '1tv'
|
||||||
IE_DESC = 'Видеоархив - Первый канал'
|
IE_DESC = 'Первый канал'
|
||||||
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
|
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.1tv.ru/videoarchive/73390',
|
'url': 'http://www.1tv.ru/videoarchive/73390',
|
||||||
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
|
'md5': '777f525feeec4806130f4f764bc18a4f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '73390',
|
'id': '73390',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Олимпийские канатные дороги',
|
'title': 'Олимпийские канатные дороги',
|
||||||
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
|
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||||
'duration': 149,
|
'duration': 149,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
},
|
},
|
||||||
'skip': 'Only works from Russia',
|
'skip': 'Only works from Russia',
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
|
||||||
|
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '35930',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Наедине со всеми. Людмила Сенчина',
|
||||||
|
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||||
|
'duration': 2694,
|
||||||
|
},
|
||||||
|
'skip': 'Only works from Russia',
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
|
r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
|
||||||
|
webpage, 'video URL')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
|
[r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||||
|
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
|
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||||
|
webpage, 'description', default=None) or self._html_search_meta(
|
||||||
|
'description', webpage, 'description')
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
duration = self._og_search_property(
|
||||||
|
'video:duration', webpage,
|
||||||
|
'video duration', fatal=False)
|
||||||
|
|
||||||
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
like_count = self._html_search_regex(
|
||||||
webpage, 'like count', fatal=False)
|
r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||||
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
webpage, 'like count', default=None)
|
||||||
webpage, 'dislike count', fatal=False)
|
dislike_count = self._html_search_regex(
|
||||||
|
r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||||
|
webpage, 'dislike count', default=None)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
|
|||||||
IE_NAME = '5min'
|
IE_NAME = '5min'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||||
|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
|
||||||
5min:)
|
5min:)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
|
@ -1,77 +1,69 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FranceCultureIE(InfoExtractor):
|
class FranceCultureIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4795174',
|
'id': '4795174',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Rendez-vous au pays des geeks',
|
'title': 'Rendez-vous au pays des geeks',
|
||||||
|
'alt_title': 'Carnet nomade | 13-14',
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'uploader': 'Colette Fellous',
|
|
||||||
'upload_date': '20140301',
|
'upload_date': '20140301',
|
||||||
'duration': 3601,
|
|
||||||
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
||||||
'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
|
'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats',
|
||||||
|
'timestamp': 1393700400,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
baseurl = mobj.group('baseurl')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
params_code = self._search_regex(
|
|
||||||
r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
|
video_path = self._search_regex(
|
||||||
webpage, 'parameter code')
|
r'<a id="player".*?href="([^"]+)"', webpage, 'video path')
|
||||||
params = compat_parse_qs(params_code)
|
video_url = compat_urlparse.urljoin(url, video_path)
|
||||||
video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
|
timestamp = int_or_none(self._search_regex(
|
||||||
|
r'<a id="player".*?data-date="([0-9]+)"',
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'<a id="player".*?>\s+<img src="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
|
r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
|
||||||
|
alt_title = self._html_search_regex(
|
||||||
|
r'<span class="title">(.*?)</span>',
|
||||||
|
webpage, 'alt_title', fatal=False)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<span class="description">(.*?)</span>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', default=None)
|
||||||
thumbnail_part = self._html_search_regex(
|
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
|
||||||
r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
|
|
||||||
'thumbnail', fatal=False)
|
|
||||||
if thumbnail_part is None:
|
|
||||||
thumbnail = None
|
|
||||||
else:
|
|
||||||
thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
|
|
||||||
|
|
||||||
info = json.loads(params['infoData'][0])[0]
|
|
||||||
duration = info.get('media_length')
|
|
||||||
upload_date_candidate = info.get('media_section5')
|
|
||||||
upload_date = (
|
|
||||||
upload_date_candidate
|
|
||||||
if (upload_date_candidate is not None and
|
|
||||||
re.match(r'[0-9]{8}$', upload_date_candidate))
|
|
||||||
else None)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
|
'vcodec': vcodec,
|
||||||
'duration': duration,
|
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'upload_date': upload_date,
|
'timestamp': timestamp,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': description,
|
'description': description,
|
||||||
}
|
}
|
||||||
|
@ -1,41 +1,67 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class GamekingsIE(InfoExtractor):
|
class GamekingsIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
_VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||||
# MD5 is flaky, seems to change regularly
|
# MD5 is flaky, seems to change regularly
|
||||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '20130811',
|
'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
||||||
}
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
},
|
||||||
|
}, {
|
||||||
|
# vimeo video
|
||||||
|
'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
|
||||||
|
'md5': '12bf04dfd238e70058046937657ea68d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-legend-of-zelda-majoras-mask',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Legend of Zelda: Majora’s Mask',
|
||||||
|
'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
webpage = self._download_webpage(url, video_id)
|
||||||
name = mobj.group('name')
|
|
||||||
webpage = self._download_webpage(url, name)
|
|
||||||
video_url = self._og_search_video_url(webpage)
|
|
||||||
|
|
||||||
video = re.search(r'[0-9]+', video_url)
|
playlist_id = self._search_regex(
|
||||||
video_id = video.group(0)
|
r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
|
||||||
|
|
||||||
# Todo: add medium format
|
playlist = self._download_xml(
|
||||||
video_url = video_url.replace(video_id, 'large/' + video_id)
|
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'jwplayer': 'http://rss.jwpcdn.com/'
|
||||||
|
}
|
||||||
|
|
||||||
|
item = playlist.find('./channel/item')
|
||||||
|
|
||||||
|
thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
|
||||||
|
video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'ext': 'mp4',
|
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@ from ..compat import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
|
from ..utils import remove_end
|
||||||
|
|
||||||
|
|
||||||
class GDCVaultIE(InfoExtractor):
|
class GDCVaultIE(InfoExtractor):
|
||||||
@ -65,10 +66,12 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
|
|
||||||
def _parse_flv(self, xml_description):
|
def _parse_flv(self, xml_description):
|
||||||
video_formats = []
|
video_formats = []
|
||||||
akami_url = xml_description.find('./metadata/akamaiHost').text
|
akamai_url = xml_description.find('./metadata/akamaiHost').text
|
||||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||||
video_formats.append({
|
video_formats.append({
|
||||||
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
|
'play_path': remove_end(slide_video_path, '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
'format_note': 'slide deck video',
|
'format_note': 'slide deck video',
|
||||||
'quality': -2,
|
'quality': -2,
|
||||||
'preference': -2,
|
'preference': -2,
|
||||||
@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||||
video_formats.append({
|
video_formats.append({
|
||||||
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
|
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
'format_note': 'speaker video',
|
'format_note': 'speaker video',
|
||||||
'quality': -1,
|
'quality': -1,
|
||||||
'preference': -1,
|
'preference': -1,
|
||||||
|
@ -140,6 +140,19 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Ooyala'],
|
'add_ie': ['Ooyala'],
|
||||||
},
|
},
|
||||||
|
# multiple ooyala embeds on SBN network websites
|
||||||
|
{
|
||||||
|
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||||
|
'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['Ooyala'],
|
||||||
|
},
|
||||||
# google redirect
|
# google redirect
|
||||||
{
|
{
|
||||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
@ -460,6 +473,7 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '1986',
|
||||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
@ -511,7 +525,38 @@ class GenericIE(InfoExtractor):
|
|||||||
'upload_date': '20150126',
|
'upload_date': '20150126',
|
||||||
},
|
},
|
||||||
'add_ie': ['Viddler'],
|
'add_ie': ['Viddler'],
|
||||||
}
|
},
|
||||||
|
# jwplayer YouTube
|
||||||
|
{
|
||||||
|
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Mrj4DVp2zeA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20150212',
|
||||||
|
'uploader': 'The National Archives UK',
|
||||||
|
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
||||||
|
'uploader_id': 'NationalArchives08',
|
||||||
|
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# rtl.nl embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aanslagen-kopenhagen',
|
||||||
|
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Zapiks embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '118046',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
@ -756,6 +801,13 @@ class GenericIE(InfoExtractor):
|
|||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Look for embedded rtl.nl player
|
||||||
|
matches = re.findall(
|
||||||
|
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
|
||||||
|
webpage)
|
||||||
|
if matches:
|
||||||
|
return _playlist_from_matches(matches, ie='RtlNl')
|
||||||
|
|
||||||
# Look for embedded (iframe) Vimeo player
|
# Look for embedded (iframe) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||||
@ -763,7 +815,6 @@ class GenericIE(InfoExtractor):
|
|||||||
player_url = unescapeHTML(mobj.group('url'))
|
player_url = unescapeHTML(mobj.group('url'))
|
||||||
surl = smuggle_url(player_url, {'Referer': url})
|
surl = smuggle_url(player_url, {'Referer': url})
|
||||||
return self.url_result(surl)
|
return self.url_result(surl)
|
||||||
|
|
||||||
# Look for embedded (swf embed) Vimeo player
|
# Look for embedded (swf embed) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||||
@ -882,10 +933,19 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||||
|
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||||
|
|
||||||
|
# Look for multiple Ooyala embeds on SBN network websites
|
||||||
|
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
|
||||||
|
if embeds:
|
||||||
|
return _playlist_from_matches(
|
||||||
|
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
|
||||||
|
|
||||||
# Look for Aparat videos
|
# Look for Aparat videos
|
||||||
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@ -1012,7 +1072,12 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded sbs.com.au player
|
# Look for embedded sbs.com.au player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
|
r'''(?x)
|
||||||
|
(?:
|
||||||
|
<meta\s+property="og:video"\s+content=|
|
||||||
|
<iframe[^>]+?src=
|
||||||
|
)
|
||||||
|
(["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'SBS')
|
return self.url_result(mobj.group('url'), 'SBS')
|
||||||
@ -1042,7 +1107,15 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Livestream')
|
return self.url_result(mobj.group('url'), 'Livestream')
|
||||||
|
|
||||||
|
# Look for Zapiks embed
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
|
if YoutubeIE.suitable(vurl):
|
||||||
|
return True
|
||||||
vpath = compat_urlparse.urlparse(vurl).path
|
vpath = compat_urlparse.urlparse(vurl).path
|
||||||
vext = determine_ext(vpath)
|
vext = determine_ext(vpath)
|
||||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
||||||
@ -1060,7 +1133,8 @@ class GenericIE(InfoExtractor):
|
|||||||
JWPlayerOptions|
|
JWPlayerOptions|
|
||||||
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
||||||
)
|
)
|
||||||
.*?file\s*:\s*["\'](.*?)["\']''', webpage))
|
.*?
|
||||||
|
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# Broaden the search a little bit
|
# Broaden the search a little bit
|
||||||
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
|
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
|
||||||
|
@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor):
|
|||||||
duration = parse_duration(self._html_search_regex(
|
duration = parse_duration(self._html_search_regex(
|
||||||
r'<span class="duration">\s*-?\s*(.*?)</span>',
|
r'<span class="duration">\s*-?\s*(.*?)</span>',
|
||||||
webpage, 'duration', fatal=False))
|
webpage, 'duration', fatal=False))
|
||||||
family_friendly = self._html_search_meta(
|
|
||||||
'isFamilyFriendly', webpage, default='false')
|
|
||||||
|
|
||||||
flashvars = compat_parse_qs(self._html_search_regex(
|
flashvars = compat_parse_qs(self._html_search_regex(
|
||||||
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
|
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
|
||||||
@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'age_limit': 0 if family_friendly == 'true' else 18,
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
}
|
}
|
||||||
|
31
youtube_dl/extractor/history.py
Normal file
31
youtube_dl/extractor/history.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
|
class HistoryIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||||
|
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bLx5Dv5Aka1G',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Bet You Didn't Know: Valentine's Day",
|
||||||
|
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||||
|
webpage, 'video url')
|
||||||
|
|
||||||
|
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))
|
@ -34,6 +34,9 @@ class IGNIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
97
youtube_dl/extractor/imgur.py
Normal file
97
youtube_dl/extractor/imgur.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A61SaA1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
|
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/A61SaA1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A61SaA1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
|
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
width = int_or_none(self._search_regex(
|
||||||
|
r'<param name="width" value="([0-9]+)"',
|
||||||
|
webpage, 'width', fatal=False))
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'<param name="height" value="([0-9]+)"',
|
||||||
|
webpage, 'height', fatal=False))
|
||||||
|
|
||||||
|
video_elements = self._search_regex(
|
||||||
|
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||||
|
webpage, 'video elements', default=None)
|
||||||
|
if not video_elements:
|
||||||
|
raise ExtractorError(
|
||||||
|
'No sources found for video %s. Maybe an image?' % video_id,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
||||||
|
formats.append({
|
||||||
|
'format_id': m.group('type').partition('/')[2],
|
||||||
|
'url': self._proto_relative_url(m.group('src')),
|
||||||
|
'ext': mimetype2ext(m.group('type')),
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
gif_json = self._search_regex(
|
||||||
|
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
||||||
|
webpage, 'GIF code', fatal=False)
|
||||||
|
if gif_json:
|
||||||
|
gifd = self._parse_json(
|
||||||
|
gif_json, video_id, transform_source=js_to_json)
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'gif',
|
||||||
|
'preference': -10,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'ext': 'gif',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'gif',
|
||||||
|
'container': 'gif',
|
||||||
|
'url': self._proto_relative_url(gifd['gifUrl']),
|
||||||
|
'filesize': gifd.get('size'),
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
}
|
@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor):
|
|||||||
r'comment_count\s*=\s*\'([^\']+)\';',
|
r'comment_count\s*=\s*\'([^\']+)\';',
|
||||||
webpage, 'comment_count', fatal=False)
|
webpage, 'comment_count', fatal=False)
|
||||||
|
|
||||||
family_friendly = self._html_search_meta(
|
|
||||||
'isFamilyFriendly', webpage, 'age limit', fatal=False)
|
|
||||||
|
|
||||||
content_url = self._html_search_meta(
|
content_url = self._html_search_meta(
|
||||||
'contentURL', webpage, 'content URL', fatal=False)
|
'contentURL', webpage, 'content URL', fatal=False)
|
||||||
ext = determine_ext(content_url, 'mp4')
|
ext = determine_ext(content_url, 'mp4')
|
||||||
@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': int_or_none(view_count),
|
'view_count': int_or_none(view_count),
|
||||||
'comment_count': int_or_none(comment_count),
|
'comment_count': int_or_none(comment_count),
|
||||||
'age_limit': 18 if family_friendly == 'False' else 0,
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -1,23 +1,26 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Laola1TvIE(InfoExtractor):
|
class Laola1TvIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
|
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '250019',
|
'id': '227883',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bitburger Open Grand Prix Gold - Court 1',
|
'title': 'Straubing Tigers - Kölner Haie',
|
||||||
'categories': ['Badminton'],
|
'categories': ['Eishockey'],
|
||||||
'uploader': 'BWF - Badminton World Federation',
|
'is_live': False,
|
||||||
'is_live': True,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -43,15 +46,26 @@ class Laola1TvIE(InfoExtractor):
|
|||||||
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
|
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
|
||||||
flashvars = dict((m[0], m[1]) for m in flashvars_m)
|
flashvars = dict((m[0], m[1]) for m in flashvars_m)
|
||||||
|
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
|
||||||
|
|
||||||
xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
|
xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
|
||||||
'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
|
'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
|
||||||
video_id, portal, lang))
|
video_id, partner_id, portal, lang))
|
||||||
hd_doc = self._download_xml(xml_url, video_id)
|
hd_doc = self._download_xml(xml_url, video_id)
|
||||||
|
|
||||||
title = hd_doc.find('.//video/title').text
|
title = xpath_text(hd_doc, './/video/title', fatal=True)
|
||||||
flash_url = hd_doc.find('.//video/url').text
|
flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
|
||||||
categories = hd_doc.find('.//video/meta_sports').text.split(',')
|
uploader = xpath_text(hd_doc, './/video/meta_organistation')
|
||||||
uploader = hd_doc.find('.//video/meta_organistation').text
|
|
||||||
|
is_live = xpath_text(hd_doc, './/video/islive') == 'true'
|
||||||
|
if is_live:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Live streams are not supported by the f4m downloader.')
|
||||||
|
|
||||||
|
categories = xpath_text(hd_doc, './/video/meta_sports')
|
||||||
|
if categories:
|
||||||
|
categories = categories.split(',')
|
||||||
|
|
||||||
ident = random.randint(10000000, 99999999)
|
ident = random.randint(10000000, 99999999)
|
||||||
token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % (
|
token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % (
|
||||||
@ -60,15 +74,16 @@ class Laola1TvIE(InfoExtractor):
|
|||||||
token_doc = self._download_xml(
|
token_doc = self._download_xml(
|
||||||
token_url, video_id, note='Downloading token')
|
token_url, video_id, note='Downloading token')
|
||||||
token_attrib = token_doc.find('.//token').attrib
|
token_attrib = token_doc.find('.//token').attrib
|
||||||
if token_attrib.get('auth') == 'blocked':
|
if token_attrib.get('auth') in ('blocked', 'restricted'):
|
||||||
raise ExtractorError('Token error: ' % token_attrib.get('comment'))
|
raise ExtractorError(
|
||||||
|
'Token error: %s' % token_attrib.get('comment'), expected=True)
|
||||||
|
|
||||||
video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
|
video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
|
||||||
token_attrib['url'], token_attrib['auth'])
|
token_attrib['url'], token_attrib['auth'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'is_live': True,
|
'is_live': is_live,
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor):
|
|||||||
'url': 'http://new.livestream.com/tedx/cityenglish',
|
'url': 'http://new.livestream.com/tedx/cityenglish',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'TEDCity2.0 (English)',
|
'title': 'TEDCity2.0 (English)',
|
||||||
|
'id': '2245590',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}, {
|
}, {
|
||||||
@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor):
|
|||||||
if is_relevant(video_data, video_id)]
|
if is_relevant(video_data, video_id)]
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
# This is an event page:
|
# This is an event page:
|
||||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
return self.playlist_result(
|
||||||
|
videos, '%s' % info['id'], info['full_name'])
|
||||||
else:
|
else:
|
||||||
if not videos:
|
if not videos:
|
||||||
raise ExtractorError('Cannot find video %s' % video_id)
|
raise ExtractorError('Cannot find video %s' % video_id)
|
||||||
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
@ -16,7 +15,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LyndaIE(SubtitlesInfoExtractor):
|
class LyndaIE(InfoExtractor):
|
||||||
IE_NAME = 'lynda'
|
IE_NAME = 'lynda'
|
||||||
IE_DESC = 'lynda.com videos'
|
IE_DESC = 'lynda.com videos'
|
||||||
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
|
_VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
|
||||||
@ -88,11 +87,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
self._check_formats(formats, video_id)
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
subtitles = self.extract_subtitles(video_id, page)
|
||||||
self._list_available_subtitles(video_id, page)
|
|
||||||
return
|
|
||||||
|
|
||||||
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -144,38 +139,31 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
def _fix_subtitles(self, subtitles):
|
def _fix_subtitles(self, subs):
|
||||||
if subtitles is None:
|
srt = ''
|
||||||
return subtitles # subtitles not requested
|
for pos in range(0, len(subs) - 1):
|
||||||
|
seq_current = subs[pos]
|
||||||
fixed_subtitles = {}
|
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
|
||||||
for k, v in subtitles.items():
|
if m_current is None:
|
||||||
subs = json.loads(v)
|
|
||||||
if len(subs) == 0:
|
|
||||||
continue
|
continue
|
||||||
srt = ''
|
seq_next = subs[pos + 1]
|
||||||
for pos in range(0, len(subs) - 1):
|
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
|
||||||
seq_current = subs[pos]
|
if m_next is None:
|
||||||
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
|
continue
|
||||||
if m_current is None:
|
appear_time = m_current.group('timecode')
|
||||||
continue
|
disappear_time = m_next.group('timecode')
|
||||||
seq_next = subs[pos + 1]
|
text = seq_current['Caption']
|
||||||
m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
|
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
|
||||||
if m_next is None:
|
if srt:
|
||||||
continue
|
return srt
|
||||||
appear_time = m_current.group('timecode')
|
|
||||||
disappear_time = m_next.group('timecode')
|
|
||||||
text = seq_current['Caption']
|
|
||||||
srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
|
|
||||||
if srt:
|
|
||||||
fixed_subtitles[k] = srt
|
|
||||||
return fixed_subtitles
|
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||||
sub = self._download_webpage(url, None, False)
|
subs = self._download_json(url, None, False)
|
||||||
sub_json = json.loads(sub)
|
if subs:
|
||||||
return {'en': url} if len(sub_json) > 0 else {}
|
return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
|
||||||
|
else:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class LyndaCourseIE(InfoExtractor):
|
class LyndaCourseIE(InfoExtractor):
|
||||||
|
@ -5,9 +5,6 @@ import json
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import (
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'upload_date': '20121109',
|
'upload_date': '20121109',
|
||||||
'uploader_id': 'MIT',
|
'uploader_id': 'MIT',
|
||||||
'uploader': 'MIT OpenCourseWare',
|
'uploader': 'MIT OpenCourseWare',
|
||||||
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'uploader_id': 'MIT',
|
'uploader_id': 'MIT',
|
||||||
'uploader': 'MIT OpenCourseWare',
|
'uploader': 'MIT OpenCourseWare',
|
||||||
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
||||||
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor):
|
|||||||
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
|
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
|
||||||
metadata = re.split(r', ?', metadata)
|
metadata = re.split(r', ?', metadata)
|
||||||
yt = metadata[1]
|
yt = metadata[1]
|
||||||
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
|
|
||||||
else:
|
else:
|
||||||
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
|
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
|
||||||
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
|
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
|
||||||
@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor):
|
|||||||
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
|
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
|
||||||
metadata = re.split(r', ?', metadata)
|
metadata = re.split(r', ?', metadata)
|
||||||
yt = metadata[1]
|
yt = metadata[1]
|
||||||
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
|
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unable to find embedded YouTube video.')
|
raise ExtractorError('Unable to find embedded YouTube video.')
|
||||||
video_id = YoutubeIE.extract_id(yt)
|
video_id = YoutubeIE.extract_id(yt)
|
||||||
@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'url': yt,
|
'url': yt,
|
||||||
'url_transparent'
|
|
||||||
'subtitles': subs,
|
|
||||||
'ie_key': 'Youtube',
|
'ie_key': 'Youtube',
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
|
||||||
IE_NAME = 'mixcloud'
|
IE_NAME = 'mixcloud'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach-cryptkeeper',
|
'id': 'dholbach-cryptkeeper',
|
||||||
@ -33,7 +33,20 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'Electric Relaxation vol. 3',
|
||||||
|
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||||
|
'uploader': 'Daniel Drumz',
|
||||||
|
'uploader_id': 'gillespeterson',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _get_url(self, track_id, template_url):
|
def _get_url(self, track_id, template_url):
|
||||||
server_count = 30
|
server_count = 30
|
||||||
@ -60,7 +73,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, track_id)
|
webpage = self._download_webpage(url, track_id)
|
||||||
|
|
||||||
preview_url = self._search_regex(
|
preview_url = self._search_regex(
|
||||||
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
|
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
||||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||||
final_song_url = self._get_url(track_id, template_url)
|
final_song_url = self._get_url(track_id, template_url)
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@ -23,7 +23,7 @@ def _media_xml_tag(tag):
|
|||||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||||
|
|
||||||
|
|
||||||
class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
_MOBILE_TEMPLATE = None
|
_MOBILE_TEMPLATE = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _extract_subtitles(self, mdoc, mtvn_id):
|
def _extract_subtitles(self, mdoc, mtvn_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
FORMATS = {
|
|
||||||
'scc': 'cea-608',
|
|
||||||
'eia-608': 'cea-608',
|
|
||||||
'xml': 'ttml',
|
|
||||||
}
|
|
||||||
subtitles_format = FORMATS.get(
|
|
||||||
self._downloader.params.get('subtitlesformat'), 'ttml')
|
|
||||||
for transcript in mdoc.findall('.//transcript'):
|
for transcript in mdoc.findall('.//transcript'):
|
||||||
if transcript.get('kind') != 'captions':
|
if transcript.get('kind') != 'captions':
|
||||||
continue
|
continue
|
||||||
lang = transcript.get('srclang')
|
lang = transcript.get('srclang')
|
||||||
for typographic in transcript.findall('./typographic'):
|
subtitles[lang] = [{
|
||||||
captions_format = typographic.get('format')
|
'url': compat_str(typographic.get('src')),
|
||||||
if captions_format == subtitles_format:
|
'ext': typographic.get('format')
|
||||||
subtitles[lang] = compat_str(typographic.get('src'))
|
} for typographic in transcript.findall('./typographic')]
|
||||||
break
|
return subtitles
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(mtvn_id, subtitles)
|
|
||||||
return self.extract_subtitles(mtvn_id, subtitles)
|
|
||||||
|
|
||||||
def _get_video_info(self, itemdoc):
|
def _get_video_info(self, itemdoc):
|
||||||
uri = itemdoc.find('guid').text
|
uri = itemdoc.find('guid').text
|
||||||
@ -196,8 +186,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
|||||||
webpage, 'mgid')
|
webpage, 'mgid')
|
||||||
|
|
||||||
videos_info = self._get_videos_info(mgid)
|
videos_info = self._get_videos_info(mgid)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
return
|
|
||||||
return videos_info
|
return videos_info
|
||||||
|
|
||||||
|
|
||||||
|
38
youtube_dl/extractor/nationalgeographic.py
Normal file
38
youtube_dl/extractor/nationalgeographic.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
url_basename,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NationalGeographicIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4DmDACA6Qtk_',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Mating Crabs Busted by Sharks',
|
||||||
|
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
name = url_basename(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, name)
|
||||||
|
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||||
|
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
||||||
|
|
||||||
|
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||||
|
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||||
|
theplatform_id = url_basename(content.attrib.get('url'))
|
||||||
|
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||||
|
# For some reason, the normal links don't work and we must force the use of f4m
|
||||||
|
{'force_smil_url': True}))
|
@ -1,7 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -19,13 +18,13 @@ class NBCIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||||
# md5 checksum is not stable
|
# md5 checksum is not stable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bTmnLCvIbaaH',
|
'id': 'c9xnCo0YPOPH',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'I Am a Firefighter',
|
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -52,9 +51,9 @@ class NBCIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(InfoExtractor):
|
class NBCNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
|
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||||
((video/.+?/(?P<id>\d+))|
|
(?:video/.+?/(?P<id>\d+)|
|
||||||
(feature/[^/]+/(?P<title>.+)))
|
(?:feature|nightly-news)/[^/]+/(?P<title>.+))
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||||
|
'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sekXqyTVnmN3',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||||
|
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
# "feature" pages use theplatform.com
|
# "feature" and "nightly-news" pages use theplatform.com
|
||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
bootstrap_json = self._search_regex(
|
bootstrap_json = self._search_regex(
|
||||||
r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
|
r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
||||||
flags=re.MULTILINE)
|
webpage, 'bootstrap json', flags=re.MULTILINE)
|
||||||
bootstrap = json.loads(bootstrap_json)
|
bootstrap = self._parse_json(bootstrap_json, video_id)
|
||||||
info = bootstrap['results'][0]['video']
|
info = bootstrap['results'][0]['video']
|
||||||
mpxid = info['mpxId']
|
mpxid = info['mpxId']
|
||||||
|
|
||||||
|
80
youtube_dl/extractor/nerdist.py
Normal file
80
youtube_dl/extractor/nerdist.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NerdistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
|
||||||
|
'md5': '3698ed582931b90d9e81e02e26e89f23',
|
||||||
|
'info_dict': {
|
||||||
|
'display_id': 'exclusive-which-dc-characters-w',
|
||||||
|
'id': 'RPHpvJyr',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
|
||||||
|
'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
|
||||||
|
'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
|
||||||
|
'uploader': 'Eric Diaz',
|
||||||
|
'upload_date': '20150202',
|
||||||
|
'timestamp': 1422892808,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'''(?x)<script\s+(?:type="text/javascript"\s+)?
|
||||||
|
src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
|
||||||
|
webpage, 'video ID')
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
|
'shareaholic:article_published_time', webpage, 'upload date'))
|
||||||
|
uploader = self._html_search_meta(
|
||||||
|
'shareaholic:article_author_name', webpage, 'article author')
|
||||||
|
|
||||||
|
doc = self._download_xml(
|
||||||
|
'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id)
|
||||||
|
video_info = doc.find('.//item')
|
||||||
|
title = xpath_text(video_info, './title', fatal=True)
|
||||||
|
description = xpath_text(video_info, './description')
|
||||||
|
thumbnail = xpath_text(
|
||||||
|
video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in video_info.findall('./{http://rss.jwpcdn.com/}source'):
|
||||||
|
vurl = source.attrib['file']
|
||||||
|
ext = determine_ext(vurl)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
vurl, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||||
|
preference=0))
|
||||||
|
elif ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
vurl, video_id, fatal=False
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': ext,
|
||||||
|
'url': vurl,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': uploader,
|
||||||
|
}
|
@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
|
|||||||
'timestamp': 1344858571,
|
'timestamp': 1344858571,
|
||||||
'age_limit': 12,
|
'age_limit': 12,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Download only works from Germany',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -46,7 +46,18 @@ class NFLIE(InfoExtractor):
|
|||||||
'timestamp': 1388354455,
|
'timestamp': 1388354455,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0ap3000000467607',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Frustrations flare on the field',
|
||||||
|
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
||||||
|
'timestamp': 1422850320,
|
||||||
|
'upload_date': '20150202',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -80,7 +91,11 @@ class NFLIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||||
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL'))
|
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
|
||||||
|
default='static/content/static/config/video/config.json'))
|
||||||
|
# For articles, the id in the url is not the video id
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
|
||||||
config = self._download_json(config_url, video_id,
|
config = self._download_json(config_url, video_id,
|
||||||
note='Downloading player config')
|
note='Downloading player config')
|
||||||
url_template = NFLIE.prepend_host(
|
url_template = NFLIE.prepend_host(
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -11,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NormalbootsIE(InfoExtractor):
|
class NormalbootsIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
@ -30,19 +28,22 @@ class NormalbootsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('videoid')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
|
||||||
webpage, 'uploader')
|
|
||||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
|
||||||
webpage, 'date')
|
|
||||||
video_upload_date = unified_strdate(raw_upload_date)
|
|
||||||
|
|
||||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
video_uploader = self._html_search_regex(
|
||||||
|
r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
video_upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
|
webpage, 'date', fatal=False))
|
||||||
|
|
||||||
|
player_url = self._html_search_regex(
|
||||||
|
r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"',
|
||||||
|
webpage, 'player url')
|
||||||
player_page = self._download_webpage(player_url, video_id)
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
video_url = self._html_search_regex(
|
||||||
|
r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -22,7 +22,7 @@ class NPOBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class NPOIE(NPOBaseIE):
|
class NPOIE(NPOBaseIE):
|
||||||
IE_NAME = 'npo.nl'
|
IE_NAME = 'npo.nl'
|
||||||
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -161,6 +161,13 @@ class NPOIE(NPOBaseIE):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
if metadata.get('tt888') == 'ja':
|
||||||
|
subtitles['nl'] = [{
|
||||||
|
'ext': 'vtt',
|
||||||
|
'url': 'http://e.omroep.nl/tt888/%s' % video_id,
|
||||||
|
}]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': metadata['titel'],
|
'title': metadata['titel'],
|
||||||
@ -169,12 +176,13 @@ class NPOIE(NPOBaseIE):
|
|||||||
'upload_date': unified_strdate(metadata.get('gidsdatum')),
|
'upload_date': unified_strdate(metadata.get('gidsdatum')),
|
||||||
'duration': parse_duration(metadata.get('tijdsduur')),
|
'duration': parse_duration(metadata.get('tijdsduur')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NPOLiveIE(NPOBaseIE):
|
class NPOLiveIE(NPOBaseIE):
|
||||||
IE_NAME = 'npo.nl:live'
|
IE_NAME = 'npo.nl:live'
|
||||||
_VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.npo.nl/live/npo-1',
|
'url': 'http://www.npo.nl/live/npo-1',
|
||||||
@ -249,6 +257,84 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NPORadioIE(InfoExtractor):
|
||||||
|
IE_NAME = 'npo.nl:radio'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.npo.nl/radio/radio-1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'radio-1',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _html_get_attribute_regex(attribute):
|
||||||
|
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
self._html_get_attribute_regex('data-channel'), webpage, 'title')
|
||||||
|
|
||||||
|
stream = self._parse_json(
|
||||||
|
self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
codec = stream.get('codec')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': stream['url'],
|
||||||
|
'title': self._live_title(title),
|
||||||
|
'acodec': codec,
|
||||||
|
'ext': codec,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NPORadioFragmentIE(InfoExtractor):
|
||||||
|
IE_NAME = 'npo.nl:radio:fragment'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
|
||||||
|
'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '174356',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Jubileumconcert Willeke Alberti',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, audio_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
|
audio_url = self._search_regex(
|
||||||
|
r"data-streams='([^']+)'", webpage, 'audio url')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'url': audio_url,
|
||||||
|
'title': title,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class TegenlichtVproIE(NPOIE):
|
class TegenlichtVproIE(NPOIE):
|
||||||
IE_NAME = 'tegenlicht.vpro.nl'
|
IE_NAME = 'tegenlicht.vpro.nl'
|
||||||
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
||||||
|
@ -10,7 +10,6 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class NRKIE(InfoExtractor):
|
class NRKIE(InfoExtractor):
|
||||||
@ -73,7 +72,7 @@ class NRKIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NRKTVIE(SubtitlesInfoExtractor):
|
class NRKTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -156,7 +155,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
|
|||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self.to_screen('[debug] %s' % txt)
|
self.to_screen('[debug] %s' % txt)
|
||||||
|
|
||||||
def _extract_captions(self, subtitlesurl, video_id, baseurl):
|
def _get_subtitles(self, subtitlesurl, video_id, baseurl):
|
||||||
url = "%s%s" % (baseurl, subtitlesurl)
|
url = "%s%s" % (baseurl, subtitlesurl)
|
||||||
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
|
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
|
||||||
captions = self._download_xml(url, video_id, 'Downloading subtitles')
|
captions = self._download_xml(url, video_id, 'Downloading subtitles')
|
||||||
@ -170,7 +169,10 @@ class NRKTVIE(SubtitlesInfoExtractor):
|
|||||||
endtime = self._seconds2str(begin + duration)
|
endtime = self._seconds2str(begin + duration)
|
||||||
text = '\n'.join(p.itertext())
|
text = '\n'.join(p.itertext())
|
||||||
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
|
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
|
||||||
return {lang: srt}
|
return {lang: [
|
||||||
|
{'ext': 'ttml', 'url': url},
|
||||||
|
{'ext': 'srt', 'data': srt},
|
||||||
|
]}
|
||||||
|
|
||||||
def _extract_f4m(self, manifest_url, video_id):
|
def _extract_f4m(self, manifest_url, video_id):
|
||||||
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
|
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
|
||||||
@ -243,10 +245,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
|
|||||||
webpage, 'subtitle URL', default=None)
|
webpage, 'subtitle URL', default=None)
|
||||||
subtitles = None
|
subtitles = None
|
||||||
if subtitles_url:
|
if subtitles_url:
|
||||||
subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
|
subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
|
||||||
self._list_available_subtitles(video_id, subtitles)
|
|
||||||
return
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -3,7 +3,9 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML
|
clean_html,
|
||||||
|
xpath_text,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/novosti/863142/',
|
'url': 'http://www.ntv.ru/novosti/863142/',
|
||||||
|
'md5': 'ba7ea172a91cb83eb734cad18c10e723',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '746000',
|
'id': '746000',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||||
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 136,
|
'duration': 136,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||||
|
'md5': 'adecff79691b4d71e25220a191477124',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '750370',
|
'id': '750370',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||||
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 172,
|
'duration': 172,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||||
|
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '747480',
|
'id': '747480',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
|
'title': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||||
'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
|
'description': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 1496,
|
'duration': 1496,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||||
|
'md5': 'f825770930937aa7e5aca0dc0d29319a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '758100',
|
'id': '1007609',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Остросюжетный фильм «Кома»',
|
'title': 'Остросюжетный фильм «Кома»',
|
||||||
'description': 'Остросюжетный фильм «Кома»',
|
'description': 'Остросюжетный фильм «Кома»',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 5592,
|
'duration': 5592,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||||
|
'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '751482',
|
'id': '751482',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '«Дело врачей»: «Деревце жизни»',
|
'title': '«Дело врачей»: «Деревце жизни»',
|
||||||
'description': '«Дело врачей»: «Деревце жизни»',
|
'description': '«Дело врачей»: «Деревце жизни»',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 2590,
|
'duration': 2590,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
|
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||||
title = unescapeHTML(player.find('./data/title').text)
|
|
||||||
description = unescapeHTML(player.find('./data/description').text)
|
player = self._download_xml(
|
||||||
|
'http://www.ntv.ru/vi%s/' % video_id,
|
||||||
|
video_id, 'Downloading video XML')
|
||||||
|
title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
|
||||||
|
description = clean_html(xpath_text(player, './data/description', 'description'))
|
||||||
|
|
||||||
video = player.find('./data/video')
|
video = player.find('./data/video')
|
||||||
video_id = video.find('./id').text
|
video_id = xpath_text(video, './id', 'video id')
|
||||||
thumbnail = video.find('./splash').text
|
thumbnail = xpath_text(video, './splash', 'thumbnail')
|
||||||
duration = int(video.find('./totaltime').text)
|
duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
|
||||||
view_count = int(video.find('./views').text)
|
view_count = int_or_none(xpath_text(video, './views', 'view count'))
|
||||||
puid22 = video.find('./puid22').text
|
|
||||||
|
|
||||||
apps = {
|
token = self._download_webpage(
|
||||||
'4': 'video1',
|
'http://stat.ntv.ru/services/access/token',
|
||||||
'7': 'video2',
|
video_id, 'Downloading access token')
|
||||||
}
|
|
||||||
|
|
||||||
app = apps.get(puid22, apps['4'])
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id in ['', 'hi', 'webm']:
|
for format_id in ['', 'hi', 'webm']:
|
||||||
file = video.find('./%sfile' % format_id)
|
file_ = video.find('./%sfile' % format_id)
|
||||||
if file is None:
|
if file_ is None:
|
||||||
continue
|
continue
|
||||||
size = video.find('./%ssize' % format_id)
|
size = video.find('./%ssize' % format_id)
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': 'rtmp://media.ntv.ru/%s' % app,
|
'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
|
||||||
'app': app,
|
'filesize': int_or_none(size.text if size is not None else None),
|
||||||
'play_path': file.text,
|
|
||||||
'rtmp_conn': 'B:1',
|
|
||||||
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
|
|
||||||
'page_url': 'http://www.ntv.ru',
|
|
||||||
'flash_version': 'LNX 11,2,202,341',
|
|
||||||
'rtmp_live': True,
|
|
||||||
'ext': 'flv',
|
|
||||||
'filesize': int(size.text),
|
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
@ -11,7 +8,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class PatreonIE(InfoExtractor):
|
class PatreonIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
|
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||||
@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*$',
|
'thumbnail': 're:^https?://.*$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.patreon.com/creation?hid=1682498',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'SU4fj_aEMVw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'I\'m on Patreon!',
|
||||||
|
'uploader': 'TraciJHines',
|
||||||
|
'thumbnail': 're:^https?://.*$',
|
||||||
|
'upload_date': '20150211',
|
||||||
|
'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
|
||||||
|
'uploader_id': 'TraciJHines',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
||||||
@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._og_search_title(webpage).strip()
|
title = self._og_search_title(webpage).strip()
|
||||||
|
|
||||||
attach_fn = self._html_search_regex(
|
attach_fn = self._html_search_regex(
|
||||||
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
||||||
webpage, 'attachment URL', default=None)
|
webpage, 'attachment URL', default=None)
|
||||||
|
embed = self._html_search_regex(
|
||||||
|
r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
|
||||||
|
webpage, 'embedded URL', default=None)
|
||||||
|
|
||||||
if attach_fn is not None:
|
if attach_fn is not None:
|
||||||
video_url = 'http://www.patreon.com' + attach_fn
|
video_url = 'http://www.patreon.com' + attach_fn
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
||||||
|
elif embed is not None:
|
||||||
|
return self.url_result(embed)
|
||||||
else:
|
else:
|
||||||
playlist_js = self._search_regex(
|
playlist = self._parse_json(self._search_regex(
|
||||||
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
||||||
webpage, 'playlist JSON')
|
webpage, 'playlist JSON'),
|
||||||
playlist_json = js_to_json(playlist_js)
|
video_id, transform_source=js_to_json)
|
||||||
playlist = json.loads(playlist_json)
|
|
||||||
data = playlist[0]
|
data = playlist[0]
|
||||||
video_url = self._proto_relative_url(data['mp3'])
|
video_url = self._proto_relative_url(data['mp3'])
|
||||||
thumbnail = self._proto_relative_url(data.get('cover'))
|
thumbnail = self._proto_relative_url(data.get('cover'))
|
||||||
|
@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):
|
|||||||
|
|
||||||
quality = qualities(['sd', 'hd'])
|
quality = qualities(['sd', 'hd'])
|
||||||
sources = json.loads(js_to_json(self._search_regex(
|
sources = json.loads(js_to_json(self._search_regex(
|
||||||
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
|
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
|
||||||
|
webpage, 'sources')))
|
||||||
formats = []
|
formats = []
|
||||||
for container, s in sources.items():
|
for qname, video_url in sources.items():
|
||||||
for qname, video_url in s.items():
|
if not video_url:
|
||||||
formats.append({
|
continue
|
||||||
'url': video_url,
|
formats.append({
|
||||||
'container': container,
|
'url': video_url,
|
||||||
'format_id': '%s-%s' % (container, qname),
|
'format_id': qname,
|
||||||
'quality': quality(qname),
|
'quality': quality(qname),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||||
if thumbnail:
|
if thumbnail:
|
||||||
@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PornHubPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.pornhub.com/playlist/6201671',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6201671',
|
||||||
|
'title': 'P0p4',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 35,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
|
||||||
|
for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
|
||||||
|
]
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
||||||
|
playlist_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
||||||
|
88
youtube_dl/extractor/r7.py
Normal file
88
youtube_dl/extractor/r7.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
unescapeHTML,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class R7IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?:
|
||||||
|
(?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
|
||||||
|
noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
|
||||||
|
player\.r7\.com/video/i/
|
||||||
|
)
|
||||||
|
(?P<id>[\da-f]{24})
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
|
||||||
|
'md5': '403c4e393617e8e8ddc748978ee8efde',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '54e7050b0cf2ff57e0279389',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 98,
|
||||||
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://player.r7.com/video/i/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
item = self._parse_json(js_to_json(self._search_regex(
|
||||||
|
r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
|
||||||
|
|
||||||
|
title = unescapeHTML(item['title'])
|
||||||
|
thumbnail = item.get('init', {}).get('thumbUri')
|
||||||
|
duration = None
|
||||||
|
|
||||||
|
statistics = item.get('statistics', {})
|
||||||
|
like_count = int_or_none(statistics.get('likes'))
|
||||||
|
view_count = int_or_none(statistics.get('views'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_key, format_dict in item['playlist'][0].items():
|
||||||
|
src = format_dict.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
format_id = format_dict.get('format') or format_key
|
||||||
|
if duration is None:
|
||||||
|
duration = format_dict.get('duration')
|
||||||
|
if '.f4m' in src:
|
||||||
|
formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
|
||||||
|
elif src.endswith('.m3u8'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': src,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'like_count': like_count,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
|
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://ndr2.radio.de/',
|
'url': 'http://ndr2.radio.de/',
|
||||||
'md5': '3b4cdd011bc59174596b6145cda474a4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ndr2',
|
'id': 'ndr2',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': 'md5:591c49c702db1a33751625ebfb67f273',
|
'description': 'md5:591c49c702db1a33751625ebfb67f273',
|
||||||
'thumbnail': 're:^https?://.*\.png',
|
'thumbnail': 're:^https?://.*\.png',
|
||||||
|
'is_live': True,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
radio_id = self._match_id(url)
|
radio_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, radio_id)
|
webpage = self._download_webpage(url, radio_id)
|
||||||
|
jscode = self._search_regex(
|
||||||
|
r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
|
||||||
|
webpage, 'broadcast')
|
||||||
|
|
||||||
broadcast = json.loads(self._search_regex(
|
broadcast = self._parse_json(jscode, radio_id)
|
||||||
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
|
|
||||||
webpage, 'broadcast'))
|
|
||||||
|
|
||||||
title = self._live_title(broadcast['name'])
|
title = self._live_title(broadcast['name'])
|
||||||
description = broadcast.get('description') or broadcast.get('shortDescription')
|
description = broadcast.get('description') or broadcast.get('shortDescription')
|
||||||
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
|
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': stream['streamUrl'],
|
'url': stream['streamUrl'],
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RaiIE(SubtitlesInfoExtractor):
|
class RaiIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
|
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -89,15 +89,7 @@ class RaiIE(SubtitlesInfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
})
|
})
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
subtitles = self.extract_subtitles(video_id, url)
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
self._list_available_subtitles(video_id, page)
|
|
||||||
return
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
if self._have_to_download_any_subtitles:
|
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
subtitles = self.extract_subtitles(video_id, page)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -111,7 +103,8 @@ class RaiIE(SubtitlesInfoExtractor):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, url):
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
|
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
|
||||||
if m:
|
if m:
|
||||||
@ -120,5 +113,8 @@ class RaiIE(SubtitlesInfoExtractor):
|
|||||||
SRT_EXT = '.srt'
|
SRT_EXT = '.srt'
|
||||||
if captions.endswith(STL_EXT):
|
if captions.endswith(STL_EXT):
|
||||||
captions = captions[:-len(STL_EXT)] + SRT_EXT
|
captions = captions[:-len(STL_EXT)] + SRT_EXT
|
||||||
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
|
subtitles['it'] = [{
|
||||||
|
'ext': 'srt',
|
||||||
|
'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
|
||||||
|
}]
|
||||||
return subtitles
|
return subtitles
|
||||||
|
@ -1,16 +1,25 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_duration
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RtlXlIE(InfoExtractor):
|
class RtlNlIE(InfoExtractor):
|
||||||
IE_NAME = 'rtlxl.nl'
|
IE_NAME = 'rtl.nl'
|
||||||
_VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
IE_DESC = 'rtl.nl and rtlxl.nl'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(www\.)?
|
||||||
|
(?:
|
||||||
|
rtlxl\.nl/\#!/[^/]+/|
|
||||||
|
rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
|
||||||
|
)
|
||||||
|
(?P<id>[0-9a-f-]+)'''
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||||
'md5': 'cc16baa36a6c169391f0764fa6b16654',
|
'md5': 'cc16baa36a6c169391f0764fa6b16654',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -22,21 +31,30 @@ class RtlXlIE(InfoExtractor):
|
|||||||
'upload_date': '20140814',
|
'upload_date': '20140814',
|
||||||
'duration': 576.880,
|
'duration': 576.880,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||||
|
'md5': 'dea7474214af1271d91ef332fb8be7ea',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1424039400,
|
||||||
|
'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
|
||||||
|
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
|
||||||
|
'upload_date': '20150215',
|
||||||
|
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
uuid = self._match_id(url)
|
||||||
uuid = mobj.group('uuid')
|
|
||||||
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||||
uuid)
|
uuid)
|
||||||
|
|
||||||
material = info['material'][0]
|
material = info['material'][0]
|
||||||
episode_info = info['episodes'][0]
|
|
||||||
|
|
||||||
progname = info['abstracts'][0]['name']
|
progname = info['abstracts'][0]['name']
|
||||||
subtitle = material['title'] or info['episodes'][0]['name']
|
subtitle = material['title'] or info['episodes'][0]['name']
|
||||||
|
description = material.get('synopsis') or info['episodes'][0]['synopsis']
|
||||||
|
|
||||||
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
||||||
videopath = material['videopath'].replace('.f4m', '.m3u8')
|
videopath = material['videopath'].replace('.f4m', '.m3u8')
|
||||||
@ -58,14 +76,29 @@ class RtlXlIE(InfoExtractor):
|
|||||||
'quality': 0,
|
'quality': 0,
|
||||||
}
|
}
|
||||||
])
|
])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
meta = info.get('meta', {})
|
||||||
|
for p in ('poster_base_url', '"thumb_base_url"'):
|
||||||
|
if not meta.get(p):
|
||||||
|
continue
|
||||||
|
|
||||||
|
thumbnails.append({
|
||||||
|
'url': self._proto_relative_url(meta[p] + uuid),
|
||||||
|
'width': int_or_none(self._search_regex(
|
||||||
|
r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
|
||||||
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'/sz=[0-9]+x([0-9]+)',
|
||||||
|
meta[p], 'thumbnail height', fatal=False))
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': uuid,
|
'id': uuid,
|
||||||
'title': '%s - %s' % (progname, subtitle),
|
'title': '%s - %s' % (progname, subtitle),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'timestamp': material['original_date'],
|
'timestamp': material['original_date'],
|
||||||
'description': episode_info['synopsis'],
|
'description': description,
|
||||||
'duration': parse_duration(material.get('duration')),
|
'duration': parse_duration(material.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
}
|
}
|
||||||
|
@ -91,6 +91,15 @@ class RTLnowIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '188729',
|
||||||
|
'ext': 'flv',
|
||||||
|
'upload_date': '20150204',
|
||||||
|
'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
|
||||||
|
'title': 'Der Bachelor - Folge 4',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
|
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
@ -134,9 +143,18 @@ class RTLnowIE(InfoExtractor):
|
|||||||
'player_url': video_page_url + 'includes/vodplayer.swf',
|
'player_url': video_page_url + 'includes/vodplayer.swf',
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
fmt = {
|
mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
|
||||||
'url': filename.text,
|
if mobj:
|
||||||
}
|
fmt = {
|
||||||
|
'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
|
||||||
|
'play_path': 'mp4:' + mobj.group('play_path'),
|
||||||
|
'page_url': url,
|
||||||
|
'player_url': video_page_url + 'includes/vodplayer.swf',
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
fmt = {
|
||||||
|
'url': filename.text,
|
||||||
|
}
|
||||||
fmt.update({
|
fmt.update({
|
||||||
'width': int_or_none(filename.get('width')),
|
'width': int_or_none(filename.get('width')),
|
||||||
'height': int_or_none(filename.get('height')),
|
'height': int_or_none(filename.get('height')),
|
||||||
|
@ -1,16 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json
|
|
||||||
|
|
||||||
|
|
||||||
class RTPIE(InfoExtractor):
|
class RTPIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
|
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
||||||
|
'md5': 'e736ce0c665e459ddb818546220b4ef8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'e174042',
|
'id': 'e174042',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
@ -18,9 +18,6 @@ class RTPIE(InfoExtractor):
|
|||||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True, # RTMP download
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -37,20 +34,48 @@ class RTPIE(InfoExtractor):
|
|||||||
|
|
||||||
player_config = self._search_regex(
|
player_config = self._search_regex(
|
||||||
r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
|
r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
|
||||||
config = json.loads(js_to_json(player_config))
|
config = self._parse_json(player_config, video_id)
|
||||||
|
|
||||||
path, ext = config.get('file').rsplit('.', 1)
|
path, ext = config.get('file').rsplit('.', 1)
|
||||||
formats = [{
|
formats = [{
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': config.get('type') == 'audio' and 'none' or None,
|
||||||
|
'preference': -2,
|
||||||
|
'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
|
||||||
'app': config.get('application'),
|
'app': config.get('application'),
|
||||||
'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
|
'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
|
||||||
'page_url': url,
|
'page_url': url,
|
||||||
'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
|
|
||||||
'rtmp_live': config.get('live', False),
|
'rtmp_live': config.get('live', False),
|
||||||
'ext': ext,
|
|
||||||
'vcodec': config.get('type') == 'audio' and 'none' or None,
|
|
||||||
'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
|
'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
|
||||||
|
'rtmp_real_time': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
# Construct regular HTTP download URLs
|
||||||
|
replacements = {
|
||||||
|
'audio': {
|
||||||
|
'format_id': 'mp3',
|
||||||
|
'pattern': r'^nas2\.share/wavrss/',
|
||||||
|
'repl': 'http://rsspod.rtp.pt/podcasts/',
|
||||||
|
'vcodec': 'none',
|
||||||
|
},
|
||||||
|
'video': {
|
||||||
|
'format_id': 'mp4_h264',
|
||||||
|
'pattern': r'^nas2\.share/h264/',
|
||||||
|
'repl': 'http://rsspod.rtp.pt/videocasts/',
|
||||||
|
'vcodec': 'h264',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
r = replacements[config['type']]
|
||||||
|
if re.match(r['pattern'], config['file']) is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': r['format_id'],
|
||||||
|
'url': re.sub(r['pattern'], r['repl'], config['file']),
|
||||||
|
'vcodec': r['vcodec'],
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -6,12 +6,14 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -159,11 +161,27 @@ class RTSIE(InfoExtractor):
|
|||||||
return int_or_none(self._search_regex(
|
return int_or_none(self._search_regex(
|
||||||
r'-([0-9]+)k\.', url, 'bitrate', default=None))
|
r'-([0-9]+)k\.', url, 'bitrate', default=None))
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'format_id': fid,
|
for format_id, format_url in info['streams'].items():
|
||||||
'url': furl,
|
if format_url.endswith('.f4m'):
|
||||||
'tbr': extract_bitrate(furl),
|
token = self._download_xml(
|
||||||
} for fid, furl in info['streams'].items()]
|
'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path,
|
||||||
|
video_id, 'Downloading %s token' % format_id)
|
||||||
|
auth_params = xpath_text(token, './/authparams', 'auth params')
|
||||||
|
if not auth_params:
|
||||||
|
continue
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
'%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params),
|
||||||
|
video_id, f4m_id=format_id))
|
||||||
|
elif format_url.endswith('.m3u8'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': format_url,
|
||||||
|
'tbr': extract_bitrate(format_url),
|
||||||
|
})
|
||||||
|
|
||||||
if 'media' in info:
|
if 'media' in info:
|
||||||
formats.extend([{
|
formats.extend([{
|
||||||
|
@ -6,6 +6,7 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
remove_end,
|
remove_end,
|
||||||
@ -96,12 +97,14 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||||
video_path = self._download_webpage(
|
video_path = self._download_webpage(
|
||||||
auth_url, video_id, 'Getting video url')
|
auth_url, video_id, 'Getting video url')
|
||||||
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
|
# Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
|
||||||
# the right Content-Length header and the mp4 format
|
# the right Content-Length header and the mp4 format
|
||||||
video_url = (
|
video_url = compat_urlparse.urljoin(
|
||||||
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
|
'http://mvod1.akcdn.rtve.es/', video_path)
|
||||||
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
|
|
||||||
)
|
subtitles = None
|
||||||
|
if info.get('sbtFile') is not None:
|
||||||
|
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -109,8 +112,17 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': info.get('image'),
|
'thumbnail': info.get('image'),
|
||||||
'page_url': url,
|
'page_url': url,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_subtitles(self, video_id, sub_file):
|
||||||
|
subs = self._download_json(
|
||||||
|
sub_file + '.json', video_id,
|
||||||
|
'Downloading subtitles info')['page']['items']
|
||||||
|
return dict(
|
||||||
|
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
|
||||||
|
for s in subs)
|
||||||
|
|
||||||
|
|
||||||
class RTVELiveIE(InfoExtractor):
|
class RTVELiveIE(InfoExtractor):
|
||||||
IE_NAME = 'rtve.es:live'
|
IE_NAME = 'rtve.es:live'
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user