Compare commits
233 Commits
2015.02.02
...
2015.02.19
Author | SHA1 | Date | |
---|---|---|---|
a21420389e | |||
6140baf4e1 | |||
8fc642eb5b | |||
e66e1a0046 | |||
d5c69f1da4 | |||
5c8a3f862a | |||
a3b9157f49 | |||
b88ba05356 | |||
b74d505577 | |||
9e2d7dca87 | |||
d236b37ac9 | |||
e880c66bd8 | |||
383456aa29 | |||
1a13940c8d | |||
3d54788495 | |||
71d53ace2f | |||
f37e3f99f0 | |||
bd03ffc16e | |||
1ac1af9b47 | |||
3bf5705316 | |||
1c2528c8a3 | |||
7bd15b1a03 | |||
6b961a85fd | |||
7707004043 | |||
a025d3c5a5 | |||
c460bdd56b | |||
b81a359eb6 | |||
d61aefb24c | |||
d305dd73a3 | |||
93a16ba238 | |||
85d5866177 | |||
9789d7535d | |||
d8443cd3f7 | |||
d47c26e168 | |||
81975f4693 | |||
b8b928d5cb | |||
3eff81fbf7 | |||
785521bf4f | |||
6d1a55a521 | |||
9cad27008b | |||
11e611a7fa | |||
72c1f8de06 | |||
6e99868e4c | |||
4d278fde64 | |||
f21e915fb9 | |||
6f53c63df6 | |||
1def5f359e | |||
15ec669374 | |||
a3fa5da496 | |||
30965ac66a | |||
09ab40b7d1 | |||
fa15607773 | |||
a91a2c1a83 | |||
16e7711e22 | |||
5cda4eda72 | |||
98f000409f | |||
4a8d4a53b1 | |||
4cd95bcbc3 | |||
be24c8697f | |||
0d93378887 | |||
4069766c52 | |||
7010577720 | |||
8ac27a68e6 | |||
46312e0b46 | |||
f9216ed6ad | |||
65bf37ef83 | |||
f740fae2a4 | |||
fbc503d696 | |||
662435f728 | |||
163d966707 | |||
85729c51af | |||
1db5fbcfe3 | |||
59b8ab5834 | |||
a568180441 | |||
85e80f71cd | |||
bfa6bdcd8b | |||
03cd72b007 | |||
5bfd430f81 | |||
73fac4e911 | |||
8fb474fb17 | |||
f813928e4b | |||
b9c7a97318 | |||
9fb2f1cd6d | |||
6ca7732d5e | |||
b0ab0fac49 | |||
a294bce82f | |||
76d1466b08 | |||
1888d3f7b3 | |||
c2787701cc | |||
52e1d0ccc4 | |||
10e3c4c221 | |||
68f2d273bf | |||
7c86c21662 | |||
ae1580d790 | |||
3215c50f25 | |||
36f73e8044 | |||
a4f3d779db | |||
d9aa2b784d | |||
cffcbc02de | |||
9347fddbfc | |||
037e9437e4 | |||
36e7a4ca2e | |||
ae6423d704 | |||
7105440cec | |||
c80b9cd280 | |||
171ca612af | |||
c3d64fc1b3 | |||
7c24ce225d | |||
08b38d5401 | |||
024c53694d | |||
7e6011101f | |||
c40feaba77 | |||
5277f09dfc | |||
2d30521ab9 | |||
050fa43561 | |||
f36f92f4da | |||
124f3bc67d | |||
d304209a85 | |||
8367d3f3cb | |||
c56d7d899d | |||
ea5db8469e | |||
3811c567e7 | |||
8708d76425 | |||
054fe3cc40 | |||
af0d11f244 | |||
9650885be9 | |||
596ac6e31f | |||
612ee37365 | |||
442c37b7a9 | |||
04bbe41330 | |||
8f84f57183 | |||
6a78740211 | |||
c0e1a415fd | |||
bf8f082a90 | |||
2f543a2142 | |||
7e5db8c930 | |||
f7a211dcc8 | |||
845734773d | |||
347de4931c | |||
8829650513 | |||
c73fae1e2e | |||
834bf069d2 | |||
c06a9fa34f | |||
753fad4adc | |||
34814eb66e | |||
3a5bcd0326 | |||
99c2398bc6 | |||
28f1272870 | |||
f18e3a2fc0 | |||
c4c5dc27cb | |||
2caf182f37 | |||
43f244b6d5 | |||
1309b396d0 | |||
ba61796458 | |||
3255fe7141 | |||
e98b8e79ea | |||
196121c51b | |||
5269028951 | |||
f7bc056b5a | |||
a0f7198544 | |||
dd8930684e | |||
bdb186f3b0 | |||
64f9baa084 | |||
b29231c040 | |||
6128bf07a9 | |||
2ec19e9558 | |||
9ddb6925bf | |||
12931e1c6e | |||
41c23b0da5 | |||
2578ab19e4 | |||
d87ec897e9 | |||
3bd4bffb1c | |||
c36b09a502 | |||
641eb10d34 | |||
955c5505e7 | |||
69319969de | |||
a14292e848 | |||
5d678df64a | |||
8ca8cbe2bd | |||
ba322d8209 | |||
2f38289b79 | |||
f23a3ca699 | |||
77d2b106cc | |||
c0e46412e9 | |||
0161353d7d | |||
2b4ecde2c8 | |||
b3a286d69d | |||
467d3c9a0c | |||
ad5747bad1 | |||
d6eb66ed3c | |||
7f2a9f1b49 | |||
1e1896f2de | |||
c831973366 | |||
1a2548d9e9 | |||
3900eec27c | |||
a02d212638 | |||
9c91a8fa70 | |||
41469f335e | |||
67ce4f8820 | |||
bc63d56cca | |||
c893d70805 | |||
3ee6e02564 | |||
e3aaace400 | |||
300753a069 | |||
f13b88c616 | |||
60ca389c64 | |||
1b0f3919c1 | |||
6a348cf7d5 | |||
9e91449c8d | |||
25e5ebf382 | |||
7dfc356625 | |||
58ba6c0160 | |||
f076b63821 | |||
12f0454cd6 | |||
cd7342755f | |||
9bb8e0a3f9 | |||
1a6373ef39 | |||
f6c24009be | |||
d862042301 | |||
23d9ded655 | |||
4c1a017e69 | |||
ee623d9247 | |||
330537d08a | |||
2cf0ecac7b | |||
d200b11c7e | |||
d0eca21021 | |||
c1147c05e1 | |||
55898ad2cf | |||
a465808592 | |||
5c4862bad4 | |||
995029a142 | |||
a57b562cff | |||
531572578e |
3
AUTHORS
3
AUTHORS
@ -108,3 +108,6 @@ Enam Mijbah Noor
|
|||||||
David Luhmer
|
David Luhmer
|
||||||
Shaya Goldberg
|
Shaya Goldberg
|
||||||
Paul Hartmann
|
Paul Hartmann
|
||||||
|
Frans de Jonge
|
||||||
|
Robin de Rooij
|
||||||
|
Ryan Schmidt
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
**Please include the full output of youtube-dl when run with `-v`**.
|
||||||
|
|
||||||
|
The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||||
|
|
||||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||||
|
|
||||||
@ -122,7 +124,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/__init__.py
|
$ git add youtube_dl/extractor/__init__.py
|
||||||
|
5
Makefile
5
Makefile
@ -1,10 +1,7 @@
|
|||||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||||
|
|
||||||
cleanall: clean
|
|
||||||
rm -f youtube-dl youtube-dl.exe
|
|
||||||
|
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
BINDIR ?= $(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
|
67
README.md
67
README.md
@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
on Windows)
|
on Windows)
|
||||||
--flat-playlist Do not extract the videos of a playlist,
|
--flat-playlist Do not extract the videos of a playlist,
|
||||||
only list them.
|
only list them.
|
||||||
|
--no-color Do not emit color codes in output.
|
||||||
|
|
||||||
## Network Options:
|
## Network Options:
|
||||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||||
@ -119,6 +120,23 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
COUNT views
|
COUNT views
|
||||||
--max-views COUNT Do not download any videos with more than
|
--max-views COUNT Do not download any videos with more than
|
||||||
COUNT views
|
COUNT views
|
||||||
|
--match-filter FILTER (Experimental) Generic video filter.
|
||||||
|
Specify any key (see help for -o for a list
|
||||||
|
of available keys) to match if the key is
|
||||||
|
present, !key to check if the key is not
|
||||||
|
present,key > NUMBER (like "comment_count >
|
||||||
|
12", also works with >=, <, <=, !=, =) to
|
||||||
|
compare against a number, and & to require
|
||||||
|
multiple matches. Values which are not
|
||||||
|
known are excluded unless you put a
|
||||||
|
question mark (?) after the operator.For
|
||||||
|
example, to only match videos that have
|
||||||
|
been liked more than 100 times and disliked
|
||||||
|
less than 50 times (or the dislike
|
||||||
|
functionality is not available at the given
|
||||||
|
service), but who also have a description,
|
||||||
|
use --match-filter "like_count > 100 &
|
||||||
|
dislike_count <? 50 & description" .
|
||||||
--no-playlist If the URL refers to a video and a
|
--no-playlist If the URL refers to a video and a
|
||||||
playlist, download only the video.
|
playlist, download only the video.
|
||||||
--age-limit YEARS download only videos suitable for the given
|
--age-limit YEARS download only videos suitable for the given
|
||||||
@ -143,6 +161,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--playlist-reverse Download playlist videos in reverse order
|
--playlist-reverse Download playlist videos in reverse order
|
||||||
--xattr-set-filesize (experimental) set file xattribute
|
--xattr-set-filesize (experimental) set file xattribute
|
||||||
ytdl.filesize with expected filesize
|
ytdl.filesize with expected filesize
|
||||||
|
--hls-prefer-native (experimental) Use the native HLS
|
||||||
|
downloader instead of ffmpeg.
|
||||||
--external-downloader COMMAND (experimental) Use the specified external
|
--external-downloader COMMAND (experimental) Use the specified external
|
||||||
downloader. Currently supports
|
downloader. Currently supports
|
||||||
aria2c,curl,wget
|
aria2c,curl,wget
|
||||||
@ -292,18 +312,20 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
video results by putting a condition in
|
video results by putting a condition in
|
||||||
brackets, as in -f "best[height=720]" (or
|
brackets, as in -f "best[height=720]" (or
|
||||||
-f "[filesize>10M]"). This works for
|
-f "[filesize>10M]"). This works for
|
||||||
filesize, height, width, tbr, abr, vbr, and
|
filesize, height, width, tbr, abr, vbr,
|
||||||
fps and the comparisons <, <=, >, >=, =, !=
|
asr, and fps and the comparisons <, <=, >,
|
||||||
. Formats for which the value is not known
|
>=, =, != and for ext, acodec, vcodec,
|
||||||
are excluded unless you put a question mark
|
container, and protocol and the comparisons
|
||||||
(?) after the operator. You can combine
|
=, != . Formats for which the value is not
|
||||||
format filters, so -f "[height <=?
|
known are excluded unless you put a
|
||||||
720][tbr>500]" selects up to 720p videos
|
question mark (?) after the operator. You
|
||||||
(or videos where the height is not known)
|
can combine format filters, so -f "[height
|
||||||
with a bitrate of at least 500 KBit/s. By
|
<=? 720][tbr>500]" selects up to 720p
|
||||||
default, youtube-dl will pick the best
|
videos (or videos where the height is not
|
||||||
quality. Use commas to download multiple
|
known) with a bitrate of at least 500
|
||||||
audio formats, such as -f
|
KBit/s. By default, youtube-dl will pick
|
||||||
|
the best quality. Use commas to download
|
||||||
|
multiple audio formats, such as -f
|
||||||
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
136/137/mp4/bestvideo,140/m4a/bestaudio.
|
||||||
You can merge the video and audio of two
|
You can merge the video and audio of two
|
||||||
formats into a single file using -f <video-
|
formats into a single file using -f <video-
|
||||||
@ -377,6 +399,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
postprocessors (default)
|
postprocessors (default)
|
||||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||||
postprocessors
|
postprocessors
|
||||||
|
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||||
|
either the path to the binary or its
|
||||||
|
containing directory.
|
||||||
--exec CMD Execute a command on the file after
|
--exec CMD Execute a command on the file after
|
||||||
downloading, similar to find's -exec
|
downloading, similar to find's -exec
|
||||||
syntax. Example: --exec 'adb push {}
|
syntax. Example: --exec 'adb push {}
|
||||||
@ -490,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c
|
|||||||
|
|
||||||
### ERROR: no fmt_url_map or conn information found in video info
|
### ERROR: no fmt_url_map or conn information found in video info
|
||||||
|
|
||||||
youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
### ERROR: unable to download video ###
|
### ERROR: unable to download video ###
|
||||||
|
|
||||||
youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
|
### ExtractorError: Could not find JS function u'OF'
|
||||||
|
|
||||||
|
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
### SyntaxError: Non-ASCII character ###
|
### SyntaxError: Non-ASCII character ###
|
||||||
|
|
||||||
@ -532,6 +561,14 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
|||||||
youtube-dl -- -wNyEUrxzFU
|
youtube-dl -- -wNyEUrxzFU
|
||||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||||
|
|
||||||
|
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||||
|
|
||||||
|
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||||
|
|
||||||
|
A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization.
|
||||||
|
|
||||||
|
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
|
||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
@ -728,7 +765,7 @@ In particular, every site support request issue should only pertain to services
|
|||||||
|
|
||||||
### Is anyone going to need the feature?
|
### Is anyone going to need the feature?
|
||||||
|
|
||||||
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||||
|
|
||||||
### Is your question about youtube-dl?
|
### Is your question about youtube-dl?
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us
|
|||||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||||
|
|
||||||
/bin/echo -e "\n### First of all, testing..."
|
/bin/echo -e "\n### First of all, testing..."
|
||||||
make cleanall
|
make clean
|
||||||
if $skip_tests ; then
|
if $skip_tests ; then
|
||||||
echo 'SKIPPING TESTS'
|
echo 'SKIPPING TESTS'
|
||||||
else
|
else
|
||||||
@ -45,9 +45,9 @@ fi
|
|||||||
/bin/echo -e "\n### Changing version in version.py..."
|
/bin/echo -e "\n### Changing version in version.py..."
|
||||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||||
|
|
||||||
/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
|
/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..."
|
||||||
make README.md
|
make README.md CONTRIBUTING.md supportedsites
|
||||||
git add README.md youtube_dl/version.py
|
git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py
|
||||||
git commit -m "release $version"
|
git commit -m "release $version"
|
||||||
|
|
||||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# Supported sites
|
# Supported sites
|
||||||
|
- **1tv**: Первый канал
|
||||||
- **1up.com**
|
- **1up.com**
|
||||||
- **220.ro**
|
- **220.ro**
|
||||||
- **24video**
|
- **24video**
|
||||||
@ -9,16 +10,21 @@
|
|||||||
- **8tracks**
|
- **8tracks**
|
||||||
- **9gag**
|
- **9gag**
|
||||||
- **abc.net.au**
|
- **abc.net.au**
|
||||||
|
- **Abc7News**
|
||||||
- **AcademicEarth:Course**
|
- **AcademicEarth:Course**
|
||||||
- **AddAnime**
|
- **AddAnime**
|
||||||
- **AdobeTV**
|
- **AdobeTV**
|
||||||
- **AdultSwim**
|
- **AdultSwim**
|
||||||
|
- **Aftenposten**
|
||||||
- **Aftonbladet**
|
- **Aftonbladet**
|
||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
|
- **AlphaPorno**
|
||||||
- **anitube.se**
|
- **anitube.se**
|
||||||
- **AnySex**
|
- **AnySex**
|
||||||
- **Aparat**
|
- **Aparat**
|
||||||
|
- **AppleDailyAnimationNews**
|
||||||
|
- **AppleDailyRealtimeNews**
|
||||||
- **AppleTrailers**
|
- **AppleTrailers**
|
||||||
- **archive.org**: archive.org videos
|
- **archive.org**: archive.org videos
|
||||||
- **ARD**
|
- **ARD**
|
||||||
@ -30,8 +36,10 @@
|
|||||||
- **arte.tv:ddc**
|
- **arte.tv:ddc**
|
||||||
- **arte.tv:embed**
|
- **arte.tv:embed**
|
||||||
- **arte.tv:future**
|
- **arte.tv:future**
|
||||||
|
- **AtresPlayer**
|
||||||
|
- **ATTTechChannel**
|
||||||
- **audiomack**
|
- **audiomack**
|
||||||
- **AUEngine**
|
- **audiomack:album**
|
||||||
- **Azubu**
|
- **Azubu**
|
||||||
- **bambuser**
|
- **bambuser**
|
||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
@ -53,11 +61,14 @@
|
|||||||
- **Brightcove**
|
- **Brightcove**
|
||||||
- **BuzzFeed**
|
- **BuzzFeed**
|
||||||
- **BYUtv**
|
- **BYUtv**
|
||||||
|
- **Camdemy**
|
||||||
|
- **CamdemyFolder**
|
||||||
- **Canal13cl**
|
- **Canal13cl**
|
||||||
- **canalc2.tv**
|
- **canalc2.tv**
|
||||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
|
- **CBSSports**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
- **channel9**: Channel 9
|
- **channel9**: Channel 9
|
||||||
- **Chilloutzone**
|
- **Chilloutzone**
|
||||||
@ -71,8 +82,10 @@
|
|||||||
- **cmt.com**
|
- **cmt.com**
|
||||||
- **CNET**
|
- **CNET**
|
||||||
- **CNN**
|
- **CNN**
|
||||||
|
- **CNNArticle**
|
||||||
- **CNNBlogs**
|
- **CNNBlogs**
|
||||||
- **CollegeHumor**
|
- **CollegeHumor**
|
||||||
|
- **CollegeRama**
|
||||||
- **ComCarCoff**
|
- **ComCarCoff**
|
||||||
- **ComedyCentral**
|
- **ComedyCentral**
|
||||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||||
@ -82,32 +95,38 @@
|
|||||||
- **Crunchyroll**
|
- **Crunchyroll**
|
||||||
- **crunchyroll:playlist**
|
- **crunchyroll:playlist**
|
||||||
- **CSpan**: C-SPAN
|
- **CSpan**: C-SPAN
|
||||||
|
- **CtsNews**
|
||||||
- **culturebox.francetvinfo.fr**
|
- **culturebox.francetvinfo.fr**
|
||||||
- **dailymotion**
|
- **dailymotion**
|
||||||
- **dailymotion:playlist**
|
- **dailymotion:playlist**
|
||||||
- **dailymotion:user**
|
- **dailymotion:user**
|
||||||
- **daum.net**
|
- **daum.net**
|
||||||
- **DBTV**
|
- **DBTV**
|
||||||
|
- **DctpTv**
|
||||||
- **DeezerPlaylist**
|
- **DeezerPlaylist**
|
||||||
- **defense.gouv.fr**
|
- **defense.gouv.fr**
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
- **divxstage**: DivxStage
|
- **divxstage**: DivxStage
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
|
- **DRBonanza**
|
||||||
- **Dropbox**
|
- **Dropbox**
|
||||||
- **DrTuber**
|
- **DrTuber**
|
||||||
- **DRTV**
|
- **DRTV**
|
||||||
- **Dump**
|
- **Dump**
|
||||||
- **dvtv**: http://video.aktualne.cz/
|
- **dvtv**: http://video.aktualne.cz/
|
||||||
- **EbaumsWorld**
|
- **EbaumsWorld**
|
||||||
|
- **EchoMsk**
|
||||||
- **eHow**
|
- **eHow**
|
||||||
- **Einthusan**
|
- **Einthusan**
|
||||||
- **eitb.tv**
|
- **eitb.tv**
|
||||||
- **EllenTV**
|
- **EllenTV**
|
||||||
- **EllenTV:clips**
|
- **EllenTV:clips**
|
||||||
- **ElPais**: El País
|
- **ElPais**: El País
|
||||||
|
- **Embedly**
|
||||||
- **EMPFlix**
|
- **EMPFlix**
|
||||||
- **Engadget**
|
- **Engadget**
|
||||||
- **Eporner**
|
- **Eporner**
|
||||||
|
- **EroProfile**
|
||||||
- **Escapist**
|
- **Escapist**
|
||||||
- **EveryonesMixtape**
|
- **EveryonesMixtape**
|
||||||
- **exfm**: ex.fm
|
- **exfm**: ex.fm
|
||||||
@ -120,7 +139,6 @@
|
|||||||
- **fernsehkritik.tv:postecke**
|
- **fernsehkritik.tv:postecke**
|
||||||
- **Firedrive**
|
- **Firedrive**
|
||||||
- **Firstpost**
|
- **Firstpost**
|
||||||
- **firsttv**: Видеоархив - Первый канал
|
|
||||||
- **Flickr**
|
- **Flickr**
|
||||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||||
- **Foxgay**
|
- **Foxgay**
|
||||||
@ -143,6 +161,7 @@
|
|||||||
- **GDCVault**
|
- **GDCVault**
|
||||||
- **generic**: Generic downloader that works on some sites
|
- **generic**: Generic downloader that works on some sites
|
||||||
- **GiantBomb**
|
- **GiantBomb**
|
||||||
|
- **Giga**
|
||||||
- **Glide**: Glide mobile video messages (glide.me)
|
- **Glide**: Glide mobile video messages (glide.me)
|
||||||
- **Globo**
|
- **Globo**
|
||||||
- **GodTube**
|
- **GodTube**
|
||||||
@ -153,9 +172,15 @@
|
|||||||
- **Grooveshark**
|
- **Grooveshark**
|
||||||
- **Groupon**
|
- **Groupon**
|
||||||
- **Hark**
|
- **Hark**
|
||||||
|
- **HearThisAt**
|
||||||
- **Heise**
|
- **Heise**
|
||||||
|
- **HellPorno**
|
||||||
- **Helsinki**: helsinki.fi
|
- **Helsinki**: helsinki.fi
|
||||||
- **HentaiStigma**
|
- **HentaiStigma**
|
||||||
|
- **HistoricFilms**
|
||||||
|
- **History**
|
||||||
|
- **hitbox**
|
||||||
|
- **hitbox:live**
|
||||||
- **HornBunny**
|
- **HornBunny**
|
||||||
- **HostingBulk**
|
- **HostingBulk**
|
||||||
- **HotNewHipHop**
|
- **HotNewHipHop**
|
||||||
@ -167,6 +192,7 @@
|
|||||||
- **ign.com**
|
- **ign.com**
|
||||||
- **imdb**: Internet Movie Database trailers
|
- **imdb**: Internet Movie Database trailers
|
||||||
- **imdb:list**: Internet Movie Database lists
|
- **imdb:list**: Internet Movie Database lists
|
||||||
|
- **Imgur**
|
||||||
- **Ina**
|
- **Ina**
|
||||||
- **InfoQ**
|
- **InfoQ**
|
||||||
- **Instagram**
|
- **Instagram**
|
||||||
@ -182,6 +208,7 @@
|
|||||||
- **jpopsuki.tv**
|
- **jpopsuki.tv**
|
||||||
- **Jukebox**
|
- **Jukebox**
|
||||||
- **Kankan**
|
- **Kankan**
|
||||||
|
- **Karaoketv**
|
||||||
- **keek**
|
- **keek**
|
||||||
- **KeezMovies**
|
- **KeezMovies**
|
||||||
- **KhanAcademy**
|
- **KhanAcademy**
|
||||||
@ -195,6 +222,7 @@
|
|||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
- **livestream:original**
|
- **livestream:original**
|
||||||
|
- **LnkGo**
|
||||||
- **lrt.lt**
|
- **lrt.lt**
|
||||||
- **lynda**: lynda.com videos
|
- **lynda**: lynda.com videos
|
||||||
- **lynda:course**: lynda.com online courses
|
- **lynda:course**: lynda.com online courses
|
||||||
@ -203,6 +231,7 @@
|
|||||||
- **mailru**: Видео@Mail.Ru
|
- **mailru**: Видео@Mail.Ru
|
||||||
- **Malemotion**
|
- **Malemotion**
|
||||||
- **MDR**
|
- **MDR**
|
||||||
|
- **media.ccc.de**
|
||||||
- **metacafe**
|
- **metacafe**
|
||||||
- **Metacritic**
|
- **Metacritic**
|
||||||
- **Mgoon**
|
- **Mgoon**
|
||||||
@ -235,6 +264,8 @@
|
|||||||
- **MySpass**
|
- **MySpass**
|
||||||
- **myvideo**
|
- **myvideo**
|
||||||
- **MyVidster**
|
- **MyVidster**
|
||||||
|
- **n-tv.de**
|
||||||
|
- **NationalGeographic**
|
||||||
- **Naver**
|
- **Naver**
|
||||||
- **NBA**
|
- **NBA**
|
||||||
- **NBC**
|
- **NBC**
|
||||||
@ -242,11 +273,16 @@
|
|||||||
- **ndr**: NDR.de - Mediathek
|
- **ndr**: NDR.de - Mediathek
|
||||||
- **NDTV**
|
- **NDTV**
|
||||||
- **NerdCubedFeed**
|
- **NerdCubedFeed**
|
||||||
|
- **Nerdist**
|
||||||
|
- **Netzkino**
|
||||||
- **Newgrounds**
|
- **Newgrounds**
|
||||||
- **Newstube**
|
- **Newstube**
|
||||||
|
- **NextMedia**
|
||||||
|
- **NextMediaActionNews**
|
||||||
- **nfb**: National Film Board of Canada
|
- **nfb**: National Film Board of Canada
|
||||||
- **nfl.com**
|
- **nfl.com**
|
||||||
- **nhl.com**
|
- **nhl.com**
|
||||||
|
- **nhl.com:news**: NHL news
|
||||||
- **nhl.com:videocenter**: NHL videocenter category
|
- **nhl.com:videocenter**: NHL videocenter category
|
||||||
- **niconico**: ニコニコ動画
|
- **niconico**: ニコニコ動画
|
||||||
- **NiconicoPlaylist**
|
- **NiconicoPlaylist**
|
||||||
@ -257,18 +293,22 @@
|
|||||||
- **Nowness**
|
- **Nowness**
|
||||||
- **nowvideo**: NowVideo
|
- **nowvideo**: NowVideo
|
||||||
- **npo.nl**
|
- **npo.nl**
|
||||||
|
- **npo.nl:live**
|
||||||
|
- **npo.nl:radio**
|
||||||
|
- **npo.nl:radio:fragment**
|
||||||
- **NRK**
|
- **NRK**
|
||||||
- **NRKTV**
|
- **NRKTV**
|
||||||
- **NTV**
|
- **ntv.ru**
|
||||||
- **Nuvid**
|
- **Nuvid**
|
||||||
- **NYTimes**
|
- **NYTimes**
|
||||||
- **ocw.mit.edu**
|
- **ocw.mit.edu**
|
||||||
- **OktoberfestTV**
|
- **OktoberfestTV**
|
||||||
- **on.aol.com**
|
- **on.aol.com**
|
||||||
- **Ooyala**
|
- **Ooyala**
|
||||||
|
- **OpenFilm**
|
||||||
|
- **orf:fm4**: radio FM4
|
||||||
- **orf:oe1**: Radio Österreich 1
|
- **orf:oe1**: Radio Österreich 1
|
||||||
- **orf:tvthek**: ORF TVthek
|
- **orf:tvthek**: ORF TVthek
|
||||||
- **ORFFM4**: radio FM4
|
|
||||||
- **parliamentlive.tv**: UK parliament videos
|
- **parliamentlive.tv**: UK parliament videos
|
||||||
- **Patreon**
|
- **Patreon**
|
||||||
- **PBS**
|
- **PBS**
|
||||||
@ -283,6 +323,7 @@
|
|||||||
- **podomatic**
|
- **podomatic**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**
|
- **PornHub**
|
||||||
|
- **PornHubPlaylist**
|
||||||
- **Pornotube**
|
- **Pornotube**
|
||||||
- **PornoXO**
|
- **PornoXO**
|
||||||
- **PromptFile**
|
- **PromptFile**
|
||||||
@ -290,6 +331,7 @@
|
|||||||
- **Pyvideo**
|
- **Pyvideo**
|
||||||
- **QuickVid**
|
- **QuickVid**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
|
- **radiobremen**
|
||||||
- **radiofrance**
|
- **radiofrance**
|
||||||
- **Rai**
|
- **Rai**
|
||||||
- **RBMARadio**
|
- **RBMARadio**
|
||||||
@ -300,8 +342,10 @@
|
|||||||
- **RottenTomatoes**
|
- **RottenTomatoes**
|
||||||
- **Roxwel**
|
- **Roxwel**
|
||||||
- **RTBF**
|
- **RTBF**
|
||||||
|
- **Rte**
|
||||||
|
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||||
|
- **RTL2**
|
||||||
- **RTLnow**
|
- **RTLnow**
|
||||||
- **rtlxl.nl**
|
|
||||||
- **RTP**
|
- **RTP**
|
||||||
- **RTS**: RTS.ch
|
- **RTS**: RTS.ch
|
||||||
- **rtve.es:alacarta**: RTVE a la carta
|
- **rtve.es:alacarta**: RTVE a la carta
|
||||||
@ -309,9 +353,11 @@
|
|||||||
- **RUHD**
|
- **RUHD**
|
||||||
- **rutube**: Rutube videos
|
- **rutube**: Rutube videos
|
||||||
- **rutube:channel**: Rutube channels
|
- **rutube:channel**: Rutube channels
|
||||||
|
- **rutube:embed**: Rutube embedded videos
|
||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
|
- **Sandia**: Sandia National Laboratories
|
||||||
- **Sapo**: SAPO Vídeos
|
- **Sapo**: SAPO Vídeos
|
||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
- **SBS**: sbs.com.au
|
- **SBS**: sbs.com.au
|
||||||
@ -351,12 +397,14 @@
|
|||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBox**
|
- **SportBox**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
- **SRMediathek**: Süddeutscher Rundfunk
|
- **SRMediathek**: Saarländischer Rundfunk
|
||||||
- **stanfordoc**: Stanford Open ClassRoom
|
- **stanfordoc**: Stanford Open ClassRoom
|
||||||
- **Steam**
|
- **Steam**
|
||||||
- **streamcloud.eu**
|
- **streamcloud.eu**
|
||||||
- **StreamCZ**
|
- **StreamCZ**
|
||||||
|
- **StreetVoice**
|
||||||
- **SunPorno**
|
- **SunPorno**
|
||||||
|
- **SVTPlay**
|
||||||
- **SWRMediathek**
|
- **SWRMediathek**
|
||||||
- **Syfy**
|
- **Syfy**
|
||||||
- **SztvHu**
|
- **SztvHu**
|
||||||
@ -375,7 +423,9 @@
|
|||||||
- **TeleBruxelles**
|
- **TeleBruxelles**
|
||||||
- **telecinco.es**
|
- **telecinco.es**
|
||||||
- **TeleMB**
|
- **TeleMB**
|
||||||
|
- **TeleTask**
|
||||||
- **TenPlay**
|
- **TenPlay**
|
||||||
|
- **TestTube**
|
||||||
- **TF1**
|
- **TF1**
|
||||||
- **TheOnion**
|
- **TheOnion**
|
||||||
- **ThePlatform**
|
- **ThePlatform**
|
||||||
@ -403,8 +453,16 @@
|
|||||||
- **tv.dfb.de**
|
- **tv.dfb.de**
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvp.pl**
|
- **tvp.pl**
|
||||||
|
- **tvp.pl:Series**
|
||||||
- **TVPlay**: TV3Play and related services
|
- **TVPlay**: TV3Play and related services
|
||||||
- **Twitch**
|
- **Tweakers**
|
||||||
|
- **twitch:bookmarks**
|
||||||
|
- **twitch:chapter**
|
||||||
|
- **twitch:past_broadcasts**
|
||||||
|
- **twitch:profile**
|
||||||
|
- **twitch:stream**
|
||||||
|
- **twitch:video**
|
||||||
|
- **twitch:vod**
|
||||||
- **Ubu**
|
- **Ubu**
|
||||||
- **udemy**
|
- **udemy**
|
||||||
- **udemy:course**
|
- **udemy:course**
|
||||||
@ -433,6 +491,8 @@
|
|||||||
- **videoweed**: VideoWeed
|
- **videoweed**: VideoWeed
|
||||||
- **Vidme**
|
- **Vidme**
|
||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
|
- **vier**
|
||||||
|
- **vier:videos**
|
||||||
- **viki**
|
- **viki**
|
||||||
- **vimeo**
|
- **vimeo**
|
||||||
- **vimeo:album**
|
- **vimeo:album**
|
||||||
@ -460,11 +520,13 @@
|
|||||||
- **WDR**
|
- **WDR**
|
||||||
- **wdr:mobile**
|
- **wdr:mobile**
|
||||||
- **WDRMaus**: Sendung mit der Maus
|
- **WDRMaus**: Sendung mit der Maus
|
||||||
|
- **WebOfStories**
|
||||||
- **Weibo**
|
- **Weibo**
|
||||||
- **Wimp**
|
- **Wimp**
|
||||||
- **Wistia**
|
- **Wistia**
|
||||||
- **WorldStarHipHop**
|
- **WorldStarHipHop**
|
||||||
- **wrzuta.pl**
|
- **wrzuta.pl**
|
||||||
|
- **WSJ**: Wall Street Journal
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XHamster**
|
- **XHamster**
|
||||||
@ -472,8 +534,11 @@
|
|||||||
- **XNXX**
|
- **XNXX**
|
||||||
- **XTube**
|
- **XTube**
|
||||||
- **XTubeUser**: XTube user profile
|
- **XTubeUser**: XTube user profile
|
||||||
|
- **Xuite**
|
||||||
- **XVideos**
|
- **XVideos**
|
||||||
|
- **XXXYMovies**
|
||||||
- **Yahoo**: Yahoo screen and movies
|
- **Yahoo**: Yahoo screen and movies
|
||||||
|
- **Yam**
|
||||||
- **YesJapan**
|
- **YesJapan**
|
||||||
- **Ynet**
|
- **Ynet**
|
||||||
- **YouJizz**
|
- **YouJizz**
|
||||||
@ -491,7 +556,6 @@
|
|||||||
- **youtube:search_url**: YouTube.com search URLs
|
- **youtube:search_url**: YouTube.com search URLs
|
||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
- **youtube:show**: YouTube.com (multi-season) shows
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
|
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
- **ZDF**
|
- **ZDF**
|
||||||
|
@ -3,4 +3,4 @@ universal = True
|
|||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
|
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
|
||||||
ignore = E501
|
ignore = E402,E501,E731
|
||||||
|
@ -103,6 +103,26 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
match_rex.match(got),
|
match_rex.match(got),
|
||||||
'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||||
|
elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
start_str = expected[len('startswith:'):]
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, compat_str),
|
||||||
|
'Expected a %s object, but got %s for field %s' % (
|
||||||
|
compat_str.__name__, type(got).__name__, info_field))
|
||||||
|
self.assertTrue(
|
||||||
|
got.startswith(start_str),
|
||||||
|
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
|
||||||
|
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
contains_str = expected[len('contains:'):]
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, compat_str),
|
||||||
|
'Expected a %s object, but got %s for field %s' % (
|
||||||
|
compat_str.__name__, type(got).__name__, info_field))
|
||||||
|
self.assertTrue(
|
||||||
|
contains_str in got,
|
||||||
|
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
|
||||||
elif isinstance(expected, type):
|
elif isinstance(expected, type):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertTrue(isinstance(got, expected),
|
self.assertTrue(isinstance(got, expected),
|
||||||
@ -153,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
info_dict_str += ''.join(
|
info_dict_str += ''.join(
|
||||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||||
for k, v in test_info_dict.items() if k not in missing_keys)
|
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||||
info_dict_str += '\n'
|
|
||||||
|
if info_dict_str:
|
||||||
|
info_dict_str += '\n'
|
||||||
info_dict_str += ''.join(
|
info_dict_str += ''.join(
|
||||||
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
||||||
for k in missing_keys)
|
for k in missing_keys)
|
||||||
write_string(
|
write_string(
|
||||||
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
'\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
missing_keys,
|
missing_keys,
|
||||||
'Missing keys in test definition: %s' % (
|
'Missing keys in test definition: %s' % (
|
||||||
|
@ -39,5 +39,6 @@
|
|||||||
"writesubtitles": false,
|
"writesubtitles": false,
|
||||||
"allsubtitles": false,
|
"allsubtitles": false,
|
||||||
"listssubtitles": false,
|
"listssubtitles": false,
|
||||||
"socket_timeout": 20
|
"socket_timeout": 20,
|
||||||
|
"fixup": "never"
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,7 @@ import copy
|
|||||||
from test.helper import FakeYDL, assertRegexpMatches
|
from test.helper import FakeYDL, assertRegexpMatches
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
|
|
||||||
|
|
||||||
class YDL(FakeYDL):
|
class YDL(FakeYDL):
|
||||||
@ -370,5 +371,35 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
'vbr': 10,
|
'vbr': 10,
|
||||||
}), '^\s*10k$')
|
}), '^\s*10k$')
|
||||||
|
|
||||||
|
def test_postprocessors(self):
|
||||||
|
filename = 'post-processor-testfile.mp4'
|
||||||
|
audiofile = filename + '.mp3'
|
||||||
|
|
||||||
|
class SimplePP(PostProcessor):
|
||||||
|
def run(self, info):
|
||||||
|
with open(audiofile, 'wt') as f:
|
||||||
|
f.write('EXAMPLE')
|
||||||
|
info['filepath']
|
||||||
|
return False, info
|
||||||
|
|
||||||
|
def run_pp(params):
|
||||||
|
with open(filename, 'wt') as f:
|
||||||
|
f.write('EXAMPLE')
|
||||||
|
ydl = YoutubeDL(params)
|
||||||
|
ydl.add_post_processor(SimplePP())
|
||||||
|
ydl.post_process(filename, {'filepath': filename})
|
||||||
|
|
||||||
|
run_pp({'keepvideo': True})
|
||||||
|
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
|
||||||
|
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||||
|
os.unlink(filename)
|
||||||
|
os.unlink(audiofile)
|
||||||
|
|
||||||
|
run_pp({'keepvideo': False})
|
||||||
|
self.assertFalse(os.path.exists(filename), '%s exists' % filename)
|
||||||
|
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||||
|
os.unlink(audiofile)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self.assertEqual(jsi.call_function('f'), -11)
|
self.assertEqual(jsi.call_function('f'), -11)
|
||||||
|
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
|
'Skipping: Not yet fully implemented'
|
||||||
|
return
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() {
|
function x() {
|
||||||
var x = /* 1 + */ 2;
|
var x = /* 1 + */ 2;
|
||||||
@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x'), 52)
|
self.assertEqual(jsi.call_function('x'), 52)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function f() {
|
||||||
|
var x = "/*";
|
||||||
|
var y = 1 /* comment */ + 2;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 3)
|
||||||
|
|
||||||
def test_precedence(self):
|
def test_precedence(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() {
|
function x() {
|
||||||
|
@ -138,7 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(len(subtitles.keys()), 5)
|
self.assertTrue(len(subtitles.keys()) >= 6)
|
||||||
|
|
||||||
def test_list_subtitles(self):
|
def test_list_subtitles(self):
|
||||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||||
@ -247,7 +247,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
|||||||
def test_subtitles(self):
|
def test_subtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
|
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||||
|
|
||||||
def test_subtitles_lang(self):
|
def test_subtitles_lang(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
|||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
||||||
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
|
self.assertTrue(len(subtitles['cs']) > 20000)
|
||||||
|
|
||||||
def test_nosubtitles(self):
|
def test_nosubtitles(self):
|
||||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||||
|
@ -53,6 +53,7 @@ from youtube_dl.utils import (
|
|||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
render_table,
|
render_table,
|
||||||
|
match_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -156,6 +157,9 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
|
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
|
||||||
'20141126')
|
'20141126')
|
||||||
|
self.assertEqual(
|
||||||
|
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||||
|
'20150202')
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
@ -366,6 +370,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
"playlist":[{"controls":{"all":null}}]
|
"playlist":[{"controls":{"all":null}}]
|
||||||
}''')
|
}''')
|
||||||
|
|
||||||
|
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
|
||||||
|
json_code = js_to_json(inp)
|
||||||
|
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||||
|
|
||||||
def test_js_to_json_edgecases(self):
|
def test_js_to_json_edgecases(self):
|
||||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||||
@ -456,6 +464,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
|||||||
'123 4\n'
|
'123 4\n'
|
||||||
'9999 51')
|
'9999 51')
|
||||||
|
|
||||||
|
def test_match_str(self):
|
||||||
|
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
|
||||||
|
self.assertFalse(match_str('xy', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('!xy', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('!x', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x', {'x': 0}))
|
||||||
|
self.assertFalse(match_str('x>0', {'x': 0}))
|
||||||
|
self.assertFalse(match_str('x>0', {}))
|
||||||
|
self.assertTrue(match_str('x>?0', {}))
|
||||||
|
self.assertTrue(match_str('x>1K', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('x>2K', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 90, 'description': 'foo'}))
|
||||||
|
self.assertTrue(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'description': 'foo'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'dislike_count': 10}))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -8,11 +8,11 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
|
from test.helper import FakeYDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||||
|
|
||||||
@ -64,6 +64,12 @@ _TESTS = [
|
|||||||
'js',
|
'js',
|
||||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||||
|
'js',
|
||||||
|
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||||
|
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -88,7 +94,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
|
|||||||
if not os.path.exists(fn):
|
if not os.path.exists(fn):
|
||||||
compat_urlretrieve(url, fn)
|
compat_urlretrieve(url, fn)
|
||||||
|
|
||||||
ie = YoutubeIE()
|
ydl = FakeYDL()
|
||||||
|
ie = YoutubeIE(ydl)
|
||||||
if stype == 'js':
|
if stype == 'js':
|
||||||
with io.open(fn, encoding='utf-8') as testf:
|
with io.open(fn, encoding='utf-8') as testf:
|
||||||
jscode = testf.read()
|
jscode = testf.read()
|
||||||
|
@ -199,18 +199,25 @@ class YoutubeDL(object):
|
|||||||
postprocessor.
|
postprocessor.
|
||||||
progress_hooks: A list of functions that get called on download
|
progress_hooks: A list of functions that get called on download
|
||||||
progress, with a dictionary with the entries
|
progress, with a dictionary with the entries
|
||||||
* status: One of "downloading" and "finished".
|
* status: One of "downloading", "error", or "finished".
|
||||||
Check this first and ignore unknown values.
|
Check this first and ignore unknown values.
|
||||||
|
|
||||||
If status is one of "downloading" or "finished", the
|
If status is one of "downloading", or "finished", the
|
||||||
following properties may also be present:
|
following properties may also be present:
|
||||||
* filename: The final filename (always present)
|
* filename: The final filename (always present)
|
||||||
|
* tmpfilename: The filename we're currently writing to
|
||||||
* downloaded_bytes: Bytes on disk
|
* downloaded_bytes: Bytes on disk
|
||||||
* total_bytes: Size of the whole file, None if unknown
|
* total_bytes: Size of the whole file, None if unknown
|
||||||
* tmpfilename: The filename we're currently writing to
|
* total_bytes_estimate: Guess of the eventual file size,
|
||||||
|
None if unavailable.
|
||||||
|
* elapsed: The number of seconds since download started.
|
||||||
* eta: The estimated time in seconds, None if unknown
|
* eta: The estimated time in seconds, None if unknown
|
||||||
* speed: The download speed in bytes/second, None if
|
* speed: The download speed in bytes/second, None if
|
||||||
unknown
|
unknown
|
||||||
|
* fragment_index: The counter of the currently
|
||||||
|
downloaded video fragment.
|
||||||
|
* fragment_count: The number of fragments (= individual
|
||||||
|
files that will be merged)
|
||||||
|
|
||||||
Progress hooks are guaranteed to be called at least once
|
Progress hooks are guaranteed to be called at least once
|
||||||
(with status "finished") if the download is successful.
|
(with status "finished") if the download is successful.
|
||||||
@ -225,10 +232,19 @@ class YoutubeDL(object):
|
|||||||
call_home: Boolean, true iff we are allowed to contact the
|
call_home: Boolean, true iff we are allowed to contact the
|
||||||
youtube-dl servers for debugging.
|
youtube-dl servers for debugging.
|
||||||
sleep_interval: Number of seconds to sleep before each download.
|
sleep_interval: Number of seconds to sleep before each download.
|
||||||
external_downloader: Executable of the external downloader to call.
|
|
||||||
listformats: Print an overview of available video formats and exit.
|
listformats: Print an overview of available video formats and exit.
|
||||||
list_thumbnails: Print a table of all thumbnails and exit.
|
list_thumbnails: Print a table of all thumbnails and exit.
|
||||||
|
match_filter: A function that gets called with the info_dict of
|
||||||
|
every video.
|
||||||
|
If it returns a message, the video is ignored.
|
||||||
|
If it returns None, the video is downloaded.
|
||||||
|
match_filter_func in utils.py is one example for this.
|
||||||
|
no_color: Do not emit color codes in output.
|
||||||
|
|
||||||
|
The following options determine which downloader is picked:
|
||||||
|
external_downloader: Executable of the external downloader to call.
|
||||||
|
None or unset for standard (built-in) downloader.
|
||||||
|
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@ -485,7 +501,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
if self.params.get('no_warnings'):
|
if self.params.get('no_warnings'):
|
||||||
return
|
return
|
||||||
if self._err_file.isatty() and os.name != 'nt':
|
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = 'WARNING:'
|
_msg_header = 'WARNING:'
|
||||||
@ -497,7 +513,7 @@ class YoutubeDL(object):
|
|||||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||||
in red if stderr is a tty file.
|
in red if stderr is a tty file.
|
||||||
'''
|
'''
|
||||||
if self._err_file.isatty() and os.name != 'nt':
|
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
_msg_header = '\033[0;31mERROR:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = 'ERROR:'
|
_msg_header = 'ERROR:'
|
||||||
@ -554,7 +570,7 @@ class YoutubeDL(object):
|
|||||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _match_entry(self, info_dict):
|
def _match_entry(self, info_dict, incomplete):
|
||||||
""" Returns None iff the file should be downloaded """
|
""" Returns None iff the file should be downloaded """
|
||||||
|
|
||||||
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||||
@ -583,9 +599,17 @@ class YoutubeDL(object):
|
|||||||
if max_views is not None and view_count > max_views:
|
if max_views is not None and view_count > max_views:
|
||||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||||
return 'Skipping "%s" because it is age restricted' % title
|
return 'Skipping "%s" because it is age restricted' % video_title
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return '%s has already been recorded in archive' % video_title
|
return '%s has already been recorded in archive' % video_title
|
||||||
|
|
||||||
|
if not incomplete:
|
||||||
|
match_filter = self.params.get('match_filter')
|
||||||
|
if match_filter is not None:
|
||||||
|
ret = match_filter(info_dict)
|
||||||
|
if ret is not None:
|
||||||
|
return ret
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -779,7 +803,7 @@ class YoutubeDL(object):
|
|||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
}
|
}
|
||||||
|
|
||||||
reason = self._match_entry(entry)
|
reason = self._match_entry(entry, incomplete=True)
|
||||||
if reason is not None:
|
if reason is not None:
|
||||||
self.to_screen('[download] ' + reason)
|
self.to_screen('[download] ' + reason)
|
||||||
continue
|
continue
|
||||||
@ -826,27 +850,44 @@ class YoutubeDL(object):
|
|||||||
'!=': operator.ne,
|
'!=': operator.ne,
|
||||||
}
|
}
|
||||||
operator_rex = re.compile(r'''(?x)\s*\[
|
operator_rex = re.compile(r'''(?x)\s*\[
|
||||||
(?P<key>width|height|tbr|abr|vbr|filesize|fps)
|
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
|
||||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||||
\]$
|
\]$
|
||||||
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||||
m = operator_rex.search(format_spec)
|
m = operator_rex.search(format_spec)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
comparison_value = int(m.group('value'))
|
||||||
|
except ValueError:
|
||||||
|
comparison_value = parse_filesize(m.group('value'))
|
||||||
|
if comparison_value is None:
|
||||||
|
comparison_value = parse_filesize(m.group('value') + 'B')
|
||||||
|
if comparison_value is None:
|
||||||
|
raise ValueError(
|
||||||
|
'Invalid value %r in format specification %r' % (
|
||||||
|
m.group('value'), format_spec))
|
||||||
|
op = OPERATORS[m.group('op')]
|
||||||
|
|
||||||
|
if not m:
|
||||||
|
STR_OPERATORS = {
|
||||||
|
'=': operator.eq,
|
||||||
|
'!=': operator.ne,
|
||||||
|
}
|
||||||
|
str_operator_rex = re.compile(r'''(?x)\s*\[
|
||||||
|
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||||
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||||
|
\s*(?P<value>[a-zA-Z0-9_-]+)
|
||||||
|
\s*\]$
|
||||||
|
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||||
|
m = str_operator_rex.search(format_spec)
|
||||||
|
if m:
|
||||||
|
comparison_value = m.group('value')
|
||||||
|
op = STR_OPERATORS[m.group('op')]
|
||||||
|
|
||||||
if not m:
|
if not m:
|
||||||
raise ValueError('Invalid format specification %r' % format_spec)
|
raise ValueError('Invalid format specification %r' % format_spec)
|
||||||
|
|
||||||
try:
|
|
||||||
comparison_value = int(m.group('value'))
|
|
||||||
except ValueError:
|
|
||||||
comparison_value = parse_filesize(m.group('value'))
|
|
||||||
if comparison_value is None:
|
|
||||||
comparison_value = parse_filesize(m.group('value') + 'B')
|
|
||||||
if comparison_value is None:
|
|
||||||
raise ValueError(
|
|
||||||
'Invalid value %r in format specification %r' % (
|
|
||||||
m.group('value'), format_spec))
|
|
||||||
op = OPERATORS[m.group('op')]
|
|
||||||
|
|
||||||
def _filter(f):
|
def _filter(f):
|
||||||
actual_value = f.get(m.group('key'))
|
actual_value = f.get(m.group('key'))
|
||||||
if actual_value is None:
|
if actual_value is None:
|
||||||
@ -920,27 +961,9 @@ class YoutubeDL(object):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def _calc_cookies(self, info_dict):
|
def _calc_cookies(self, info_dict):
|
||||||
class _PseudoRequest(object):
|
pr = compat_urllib_request.Request(info_dict['url'])
|
||||||
def __init__(self, url):
|
|
||||||
self.url = url
|
|
||||||
self.headers = {}
|
|
||||||
self.unverifiable = False
|
|
||||||
|
|
||||||
def add_unredirected_header(self, k, v):
|
|
||||||
self.headers[k] = v
|
|
||||||
|
|
||||||
def get_full_url(self):
|
|
||||||
return self.url
|
|
||||||
|
|
||||||
def is_unverifiable(self):
|
|
||||||
return self.unverifiable
|
|
||||||
|
|
||||||
def has_header(self, h):
|
|
||||||
return h in self.headers
|
|
||||||
|
|
||||||
pr = _PseudoRequest(info_dict['url'])
|
|
||||||
self.cookiejar.add_cookie_header(pr)
|
self.cookiejar.add_cookie_header(pr)
|
||||||
return pr.headers.get('Cookie')
|
return pr.get_header('Cookie')
|
||||||
|
|
||||||
def process_video_result(self, info_dict, download=True):
|
def process_video_result(self, info_dict, download=True):
|
||||||
assert info_dict.get('_type', 'video') == 'video'
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
@ -964,9 +987,11 @@ class YoutubeDL(object):
|
|||||||
thumbnails.sort(key=lambda t: (
|
thumbnails.sort(key=lambda t: (
|
||||||
t.get('preference'), t.get('width'), t.get('height'),
|
t.get('preference'), t.get('width'), t.get('height'),
|
||||||
t.get('id'), t.get('url')))
|
t.get('id'), t.get('url')))
|
||||||
for t in thumbnails:
|
for i, t in enumerate(thumbnails):
|
||||||
if 'width' in t and 'height' in t:
|
if 'width' in t and 'height' in t:
|
||||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||||
|
if t.get('id') is None:
|
||||||
|
t['id'] = '%d' % i
|
||||||
|
|
||||||
if thumbnails and 'thumbnail' not in info_dict:
|
if thumbnails and 'thumbnail' not in info_dict:
|
||||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||||
@ -1074,7 +1099,8 @@ class YoutubeDL(object):
|
|||||||
else self.params['merge_output_format'])
|
else self.params['merge_output_format'])
|
||||||
selected_format = {
|
selected_format = {
|
||||||
'requested_formats': formats_info,
|
'requested_formats': formats_info,
|
||||||
'format': rf,
|
'format': '%s+%s' % (formats_info[0].get('format'),
|
||||||
|
formats_info[1].get('format')),
|
||||||
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||||
formats_info[1].get('format_id')),
|
formats_info[1].get('format_id')),
|
||||||
'width': formats_info[0].get('width'),
|
'width': formats_info[0].get('width'),
|
||||||
@ -1130,7 +1156,7 @@ class YoutubeDL(object):
|
|||||||
if 'format' not in info_dict:
|
if 'format' not in info_dict:
|
||||||
info_dict['format'] = info_dict['ext']
|
info_dict['format'] = info_dict['ext']
|
||||||
|
|
||||||
reason = self._match_entry(info_dict)
|
reason = self._match_entry(info_dict, incomplete=False)
|
||||||
if reason is not None:
|
if reason is not None:
|
||||||
self.to_screen('[download] ' + reason)
|
self.to_screen('[download] ' + reason)
|
||||||
return
|
return
|
||||||
@ -1261,7 +1287,7 @@ class YoutubeDL(object):
|
|||||||
downloaded = []
|
downloaded = []
|
||||||
success = True
|
success = True
|
||||||
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
||||||
if not merger._executable:
|
if not merger.available:
|
||||||
postprocessors = []
|
postprocessors = []
|
||||||
self.report_warning('You have requested multiple '
|
self.report_warning('You have requested multiple '
|
||||||
'formats but ffmpeg or avconv are not installed.'
|
'formats but ffmpeg or avconv are not installed.'
|
||||||
@ -1508,30 +1534,18 @@ class YoutubeDL(object):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def list_formats(self, info_dict):
|
def list_formats(self, info_dict):
|
||||||
def line(format, idlen=20):
|
|
||||||
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
|
|
||||||
format['format_id'],
|
|
||||||
format['ext'],
|
|
||||||
self.format_resolution(format),
|
|
||||||
self._format_note(format),
|
|
||||||
))
|
|
||||||
|
|
||||||
formats = info_dict.get('formats', [info_dict])
|
formats = info_dict.get('formats', [info_dict])
|
||||||
idlen = max(len('format code'),
|
table = [
|
||||||
max(len(f['format_id']) for f in formats))
|
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
|
||||||
formats_s = [
|
for f in formats
|
||||||
line(f, idlen) for f in formats
|
|
||||||
if f.get('preference') is None or f['preference'] >= -1000]
|
if f.get('preference') is None or f['preference'] >= -1000]
|
||||||
if len(formats) > 1:
|
if len(formats) > 1:
|
||||||
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
|
||||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
|
||||||
|
|
||||||
header_line = line({
|
header_line = ['format code', 'extension', 'resolution', 'note']
|
||||||
'format_id': 'format code', 'ext': 'extension',
|
|
||||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[info] Available formats for %s:\n%s\n%s' %
|
'[info] Available formats for %s:\n%s' %
|
||||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
(info_dict['id'], render_table(header_line, table)))
|
||||||
|
|
||||||
def list_thumbnails(self, info_dict):
|
def list_thumbnails(self, info_dict):
|
||||||
thumbnails = info_dict.get('thumbnails')
|
thumbnails = info_dict.get('thumbnails')
|
||||||
@ -1611,7 +1625,7 @@ class YoutubeDL(object):
|
|||||||
self._write_string('[debug] Python version %s - %s\n' % (
|
self._write_string('[debug] Python version %s - %s\n' % (
|
||||||
platform.python_version(), platform_name()))
|
platform.python_version(), platform_name()))
|
||||||
|
|
||||||
exe_versions = FFmpegPostProcessor.get_versions()
|
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||||
exe_str = ', '.join(
|
exe_str = ', '.join(
|
||||||
'%s %s' % (exe, v)
|
'%s %s' % (exe, v)
|
||||||
|
@ -23,9 +23,10 @@ from .compat import (
|
|||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
DateRange,
|
DateRange,
|
||||||
DEFAULT_OUTTMPL,
|
|
||||||
decodeOption,
|
decodeOption,
|
||||||
|
DEFAULT_OUTTMPL,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
|
match_filter_func,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
@ -247,6 +248,9 @@ def _real_main(argv=None):
|
|||||||
xattr # Confuse flake8
|
xattr # Confuse flake8
|
||||||
except ImportError:
|
except ImportError:
|
||||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||||
|
match_filter = (
|
||||||
|
None if opts.match_filter is None
|
||||||
|
else match_filter_func(opts.match_filter))
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
@ -344,6 +348,10 @@ def _real_main(argv=None):
|
|||||||
'list_thumbnails': opts.list_thumbnails,
|
'list_thumbnails': opts.list_thumbnails,
|
||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||||
|
'match_filter': match_filter,
|
||||||
|
'no_color': opts.no_color,
|
||||||
|
'ffmpeg_location': opts.ffmpeg_location,
|
||||||
|
'hls_prefer_native': opts.hls_prefer_native,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
from math import ceil
|
from math import ceil
|
||||||
|
|
||||||
@ -329,3 +327,5 @@ def inc(data):
|
|||||||
data[i] = data[i] + 1
|
data[i] = data[i] + 1
|
||||||
break
|
break
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||||
|
@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}):
|
|||||||
if ed.supports(info_dict):
|
if ed.supports(info_dict):
|
||||||
return ed
|
return ed
|
||||||
|
|
||||||
|
if protocol == 'm3u8' and params.get('hls_prefer_native'):
|
||||||
|
return NativeHlsFD
|
||||||
|
|
||||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -54,6 +54,7 @@ class FileDownloader(object):
|
|||||||
self.ydl = ydl
|
self.ydl = ydl
|
||||||
self._progress_hooks = []
|
self._progress_hooks = []
|
||||||
self.params = params
|
self.params = params
|
||||||
|
self.add_progress_hook(self.report_progress)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_seconds(seconds):
|
def format_seconds(seconds):
|
||||||
@ -226,42 +227,64 @@ class FileDownloader(object):
|
|||||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||||
self.to_console_title('youtube-dl ' + msg)
|
self.to_console_title('youtube-dl ' + msg)
|
||||||
|
|
||||||
def report_progress(self, percent, data_len_str, speed, eta):
|
def report_progress(self, s):
|
||||||
"""Report download progress."""
|
if s['status'] == 'finished':
|
||||||
if self.params.get('noprogress', False):
|
if self.params.get('noprogress', False):
|
||||||
|
self.to_screen('[download] Download completed')
|
||||||
|
else:
|
||||||
|
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||||
|
if s.get('elapsed') is not None:
|
||||||
|
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||||
|
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
|
||||||
|
else:
|
||||||
|
msg_template = '100%% of %(_total_bytes_str)s'
|
||||||
|
self._report_progress_status(
|
||||||
|
msg_template % s, is_last_line=True)
|
||||||
|
|
||||||
|
if self.params.get('noprogress'):
|
||||||
return
|
return
|
||||||
if eta is not None:
|
|
||||||
eta_str = self.format_eta(eta)
|
|
||||||
else:
|
|
||||||
eta_str = 'Unknown ETA'
|
|
||||||
if percent is not None:
|
|
||||||
percent_str = self.format_percent(percent)
|
|
||||||
else:
|
|
||||||
percent_str = 'Unknown %'
|
|
||||||
speed_str = self.format_speed(speed)
|
|
||||||
|
|
||||||
msg = ('%s of %s at %s ETA %s' %
|
if s['status'] != 'downloading':
|
||||||
(percent_str, data_len_str, speed_str, eta_str))
|
|
||||||
self._report_progress_status(msg)
|
|
||||||
|
|
||||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
|
||||||
if self.params.get('noprogress', False):
|
|
||||||
return
|
return
|
||||||
downloaded_str = format_bytes(downloaded_data_len)
|
|
||||||
speed_str = self.format_speed(speed)
|
|
||||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
|
||||||
msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
|
||||||
self._report_progress_status(msg)
|
|
||||||
|
|
||||||
def report_finish(self, data_len_str, tot_time):
|
if s.get('eta') is not None:
|
||||||
"""Report download finished."""
|
s['_eta_str'] = self.format_eta(s['eta'])
|
||||||
if self.params.get('noprogress', False):
|
|
||||||
self.to_screen('[download] Download completed')
|
|
||||||
else:
|
else:
|
||||||
self._report_progress_status(
|
s['_eta_str'] = 'Unknown ETA'
|
||||||
('100%% of %s in %s' %
|
|
||||||
(data_len_str, self.format_seconds(tot_time))),
|
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
|
||||||
is_last_line=True)
|
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
|
||||||
|
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
|
||||||
|
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
|
||||||
|
else:
|
||||||
|
if s.get('downloaded_bytes') == 0:
|
||||||
|
s['_percent_str'] = self.format_percent(0)
|
||||||
|
else:
|
||||||
|
s['_percent_str'] = 'Unknown %'
|
||||||
|
|
||||||
|
if s.get('speed') is not None:
|
||||||
|
s['_speed_str'] = self.format_speed(s['speed'])
|
||||||
|
else:
|
||||||
|
s['_speed_str'] = 'Unknown speed'
|
||||||
|
|
||||||
|
if s.get('total_bytes') is not None:
|
||||||
|
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||||
|
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
elif s.get('total_bytes_estimate') is not None:
|
||||||
|
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
|
||||||
|
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
else:
|
||||||
|
if s.get('downloaded_bytes') is not None:
|
||||||
|
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
|
||||||
|
if s.get('elapsed'):
|
||||||
|
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||||
|
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
|
||||||
|
else:
|
||||||
|
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
|
||||||
|
else:
|
||||||
|
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
|
||||||
|
self._report_progress_status(msg_template % s)
|
||||||
|
|
||||||
def report_resuming_byte(self, resume_len):
|
def report_resuming_byte(self, resume_len):
|
||||||
"""Report attempt to resume at given byte."""
|
"""Report attempt to resume at given byte."""
|
||||||
|
@ -75,7 +75,7 @@ class ExternalFD(FileDownloader):
|
|||||||
|
|
||||||
class CurlFD(ExternalFD):
|
class CurlFD(ExternalFD):
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-o', tmpfilename]
|
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._source_address('--interface')
|
cmd += self._source_address('--interface')
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
@ -15,7 +15,6 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_pack,
|
struct_pack,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
format_bytes,
|
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
@ -252,17 +251,6 @@ class F4mFD(FileDownloader):
|
|||||||
requested_bitrate = info_dict.get('tbr')
|
requested_bitrate = info_dict.get('tbr')
|
||||||
self.to_screen('[download] Downloading f4m manifest')
|
self.to_screen('[download] Downloading f4m manifest')
|
||||||
manifest = self.ydl.urlopen(man_url).read()
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
self.report_destination(filename)
|
|
||||||
http_dl = HttpQuietDownloader(
|
|
||||||
self.ydl,
|
|
||||||
{
|
|
||||||
'continuedl': True,
|
|
||||||
'quiet': True,
|
|
||||||
'noprogress': True,
|
|
||||||
'ratelimit': self.params.get('ratelimit', None),
|
|
||||||
'test': self.params.get('test', False),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
doc = etree.fromstring(manifest)
|
doc = etree.fromstring(manifest)
|
||||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||||
@ -298,39 +286,65 @@ class F4mFD(FileDownloader):
|
|||||||
# For some akamai manifests we'll need to add a query to the fragment url
|
# For some akamai manifests we'll need to add a query to the fragment url
|
||||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||||
|
|
||||||
|
self.report_destination(filename)
|
||||||
|
http_dl = HttpQuietDownloader(
|
||||||
|
self.ydl,
|
||||||
|
{
|
||||||
|
'continuedl': True,
|
||||||
|
'quiet': True,
|
||||||
|
'noprogress': True,
|
||||||
|
'ratelimit': self.params.get('ratelimit', None),
|
||||||
|
'test': self.params.get('test', False),
|
||||||
|
}
|
||||||
|
)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||||
|
|
||||||
write_flv_header(dest_stream)
|
write_flv_header(dest_stream)
|
||||||
write_metadata_tag(dest_stream, metadata)
|
write_metadata_tag(dest_stream, metadata)
|
||||||
|
|
||||||
# This dict stores the download progress, it's updated by the progress
|
# This dict stores the download progress, it's updated by the progress
|
||||||
# hook
|
# hook
|
||||||
state = {
|
state = {
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': 0,
|
'downloaded_bytes': 0,
|
||||||
'frag_counter': 0,
|
'frag_index': 0,
|
||||||
|
'frag_count': total_frags,
|
||||||
|
'filename': filename,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
}
|
}
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
def frag_progress_hook(status):
|
def frag_progress_hook(s):
|
||||||
frag_total_bytes = status.get('total_bytes', 0)
|
if s['status'] not in ('downloading', 'finished'):
|
||||||
estimated_size = (state['downloaded_bytes'] +
|
return
|
||||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
|
||||||
if status['status'] == 'finished':
|
frag_total_bytes = s.get('total_bytes', 0)
|
||||||
|
if s['status'] == 'finished':
|
||||||
state['downloaded_bytes'] += frag_total_bytes
|
state['downloaded_bytes'] += frag_total_bytes
|
||||||
state['frag_counter'] += 1
|
state['frag_index'] += 1
|
||||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
|
||||||
byte_counter = state['downloaded_bytes']
|
estimated_size = (
|
||||||
|
(state['downloaded_bytes'] + frag_total_bytes)
|
||||||
|
/ (state['frag_index'] + 1) * total_frags)
|
||||||
|
time_now = time.time()
|
||||||
|
state['total_bytes_estimate'] = estimated_size
|
||||||
|
state['elapsed'] = time_now - start
|
||||||
|
|
||||||
|
if s['status'] == 'finished':
|
||||||
|
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||||
else:
|
else:
|
||||||
frag_downloaded_bytes = status['downloaded_bytes']
|
frag_downloaded_bytes = s['downloaded_bytes']
|
||||||
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
|
||||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||||
frag_total_bytes)
|
frag_total_bytes)
|
||||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||||
progress += frag_progress / float(total_frags)
|
progress += frag_progress / float(total_frags)
|
||||||
|
|
||||||
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
state['eta'] = self.calc_eta(
|
||||||
self.report_progress(progress, format_bytes(estimated_size),
|
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||||
status.get('speed'), eta)
|
state['speed'] = s.get('speed')
|
||||||
|
self._hook_progress(state)
|
||||||
|
|
||||||
http_dl.add_progress_hook(frag_progress_hook)
|
http_dl.add_progress_hook(frag_progress_hook)
|
||||||
|
|
||||||
frags_filenames = []
|
frags_filenames = []
|
||||||
@ -354,8 +368,8 @@ class F4mFD(FileDownloader):
|
|||||||
frags_filenames.append(frag_filename)
|
frags_filenames.append(frag_filename)
|
||||||
|
|
||||||
dest_stream.close()
|
dest_stream.close()
|
||||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
for frag_file in frags_filenames:
|
for frag_file in frags_filenames:
|
||||||
os.remove(frag_file)
|
os.remove(frag_file)
|
||||||
@ -366,6 +380,7 @@ class F4mFD(FileDownloader):
|
|||||||
'total_bytes': fsize,
|
'total_bytes': fsize,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'elapsed': elapsed,
|
||||||
})
|
})
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -23,15 +23,14 @@ class HlsFD(FileDownloader):
|
|||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
ffpp = FFmpegPostProcessor(downloader=self)
|
ffpp = FFmpegPostProcessor(downloader=self)
|
||||||
program = ffpp._executable
|
if not ffpp.available:
|
||||||
if program is None:
|
|
||||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||||
return False
|
return False
|
||||||
ffpp.check_version()
|
ffpp.check_version()
|
||||||
|
|
||||||
args = [
|
args = [
|
||||||
encodeArgument(opt)
|
encodeArgument(opt)
|
||||||
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||||
args.append(encodeFilename(tmpfilename, True))
|
args.append(encodeFilename(tmpfilename, True))
|
||||||
|
|
||||||
retval = subprocess.call(args)
|
retval = subprocess.call(args)
|
||||||
@ -48,7 +47,7 @@ class HlsFD(FileDownloader):
|
|||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self.to_stderr('\n')
|
self.to_stderr('\n')
|
||||||
self.report_error('%s exited with code %d' % (program, retval))
|
self.report_error('%s exited with code %d' % (ffpp.basename, retval))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
from socket import error as SocketError
|
|
||||||
import errno
|
import errno
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -15,7 +14,6 @@ from ..utils import (
|
|||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
format_bytes,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -102,7 +100,7 @@ class HttpFD(FileDownloader):
|
|||||||
resume_len = 0
|
resume_len = 0
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
break
|
break
|
||||||
except SocketError as e:
|
except socket.error as e:
|
||||||
if e.errno != errno.ECONNRESET:
|
if e.errno != errno.ECONNRESET:
|
||||||
# Connection reset is no problem, just retry
|
# Connection reset is no problem, just retry
|
||||||
raise
|
raise
|
||||||
@ -137,7 +135,6 @@ class HttpFD(FileDownloader):
|
|||||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
data_len_str = format_bytes(data_len)
|
|
||||||
byte_counter = 0 + resume_len
|
byte_counter = 0 + resume_len
|
||||||
block_size = self.params.get('buffersize', 1024)
|
block_size = self.params.get('buffersize', 1024)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
@ -196,20 +193,19 @@ class HttpFD(FileDownloader):
|
|||||||
# Progress message
|
# Progress message
|
||||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
||||||
if data_len is None:
|
if data_len is None:
|
||||||
eta = percent = None
|
eta = None
|
||||||
else:
|
else:
|
||||||
percent = self.calc_percent(byte_counter, data_len)
|
|
||||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
'total_bytes': data_len,
|
'total_bytes': data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
|
||||||
'eta': eta,
|
'eta': eta,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
|
'elapsed': now - start,
|
||||||
})
|
})
|
||||||
|
|
||||||
if is_test and byte_counter == data_len:
|
if is_test and byte_counter == data_len:
|
||||||
@ -221,7 +217,13 @@ class HttpFD(FileDownloader):
|
|||||||
return False
|
return False
|
||||||
if tmpfilename != '-':
|
if tmpfilename != '-':
|
||||||
stream.close()
|
stream.close()
|
||||||
self.report_finish(data_len_str, (time.time() - start))
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'status': 'error',
|
||||||
|
})
|
||||||
if data_len is not None and byte_counter != data_len:
|
if data_len is not None and byte_counter != data_len:
|
||||||
raise ContentTooShortError(byte_counter, int(data_len))
|
raise ContentTooShortError(byte_counter, int(data_len))
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
@ -235,6 +237,7 @@ class HttpFD(FileDownloader):
|
|||||||
'total_bytes': byte_counter,
|
'total_bytes': byte_counter,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'elapsed': time.time() - start,
|
||||||
})
|
})
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -11,7 +11,6 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
check_executable,
|
check_executable,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
format_bytes,
|
|
||||||
get_exe_version,
|
get_exe_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
|
|||||||
if not resume_percent:
|
if not resume_percent:
|
||||||
resume_percent = percent
|
resume_percent = percent
|
||||||
resume_downloaded_data_len = downloaded_data_len
|
resume_downloaded_data_len = downloaded_data_len
|
||||||
eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
|
time_now = time.time()
|
||||||
speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
|
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||||
|
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||||
data_len = None
|
data_len = None
|
||||||
if percent > 0:
|
if percent > 0:
|
||||||
data_len = int(downloaded_data_len * 100 / percent)
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
data_len_str = '~' + format_bytes(data_len)
|
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
|
||||||
cursor_in_new_line = False
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': downloaded_data_len,
|
'downloaded_bytes': downloaded_data_len,
|
||||||
'total_bytes': data_len,
|
'total_bytes_estimate': data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
|
||||||
'eta': eta,
|
'eta': eta,
|
||||||
|
'elapsed': time_now - start,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
})
|
})
|
||||||
|
cursor_in_new_line = False
|
||||||
else:
|
else:
|
||||||
# no percent for live streams
|
# no percent for live streams
|
||||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||||
@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
|
|||||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||||
time_now = time.time()
|
time_now = time.time()
|
||||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
|
||||||
cursor_in_new_line = False
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': downloaded_data_len,
|
'downloaded_bytes': downloaded_data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
'status': 'downloading',
|
||||||
|
'elapsed': time_now - start,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
})
|
})
|
||||||
|
cursor_in_new_line = False
|
||||||
elif self.params.get('verbose', False):
|
elif self.params.get('verbose', False):
|
||||||
if not cursor_in_new_line:
|
if not cursor_in_new_line:
|
||||||
self.to_screen('')
|
self.to_screen('')
|
||||||
|
@ -6,6 +6,7 @@ from .academicearth import AcademicEarthCourseIE
|
|||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
from .adobetv import AdobeTVIE
|
from .adobetv import AdobeTVIE
|
||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
|
from .aftenposten import AftenpostenIE
|
||||||
from .aftonbladet import AftonbladetIE
|
from .aftonbladet import AftonbladetIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
@ -48,11 +49,17 @@ from .brightcove import BrightcoveIE
|
|||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
|
from .camdemy import (
|
||||||
|
CamdemyIE,
|
||||||
|
CamdemyFolderIE
|
||||||
|
)
|
||||||
from .canal13cl import Canal13clIE
|
from .canal13cl import Canal13clIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import CBSNewsIE
|
from .cbsnews import CBSNewsIE
|
||||||
|
from .cbssports import CBSSportsIE
|
||||||
|
from .ccc import CCCIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .chilloutzone import ChilloutzoneIE
|
from .chilloutzone import ChilloutzoneIE
|
||||||
@ -73,7 +80,7 @@ from .collegehumor import CollegeHumorIE
|
|||||||
from .collegerama import CollegeRamaIE
|
from .collegerama import CollegeRamaIE
|
||||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||||
from .comcarcoff import ComCarCoffIE
|
from .comcarcoff import ComCarCoffIE
|
||||||
from .commonmistakes import CommonMistakesIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .criterion import CriterionIE
|
from .criterion import CriterionIE
|
||||||
@ -115,6 +122,7 @@ from .ellentv import (
|
|||||||
EllenTVClipsIE,
|
EllenTVClipsIE,
|
||||||
)
|
)
|
||||||
from .elpais import ElPaisIE
|
from .elpais import ElPaisIE
|
||||||
|
from .embedly import EmbedlyIE
|
||||||
from .empflix import EMPFlixIE
|
from .empflix import EMPFlixIE
|
||||||
from .engadget import EngadgetIE
|
from .engadget import EngadgetIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
@ -183,6 +191,7 @@ from .hellporno import HellPornoIE
|
|||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
from .historicfilms import HistoricFilmsIE
|
from .historicfilms import HistoricFilmsIE
|
||||||
|
from .history import HistoryIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
from .hostingbulk import HostingBulkIE
|
from .hostingbulk import HostingBulkIE
|
||||||
@ -197,6 +206,7 @@ from .imdb import (
|
|||||||
ImdbIE,
|
ImdbIE,
|
||||||
ImdbListIE
|
ImdbListIE
|
||||||
)
|
)
|
||||||
|
from .imgur import ImgurIE
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE, InstagramUserIE
|
from .instagram import InstagramIE, InstagramUserIE
|
||||||
@ -275,6 +285,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
|
|||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
|
from .nationalgeographic import NationalGeographicIE
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import (
|
from .nbc import (
|
||||||
@ -285,6 +296,7 @@ from .ndr import NDRIE
|
|||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .netzkino import NetzkinoIE
|
from .netzkino import NetzkinoIE
|
||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
|
from .nerdist import NerdistIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nextmedia import (
|
from .nextmedia import (
|
||||||
@ -311,6 +323,8 @@ from .nowvideo import NowVideoIE
|
|||||||
from .npo import (
|
from .npo import (
|
||||||
NPOIE,
|
NPOIE,
|
||||||
NPOLiveIE,
|
NPOLiveIE,
|
||||||
|
NPORadioIE,
|
||||||
|
NPORadioFragmentIE,
|
||||||
TegenlichtVproIE,
|
TegenlichtVproIE,
|
||||||
)
|
)
|
||||||
from .nrk import (
|
from .nrk import (
|
||||||
@ -340,7 +354,10 @@ from .playfm import PlayFMIE
|
|||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import (
|
||||||
|
PornHubIE,
|
||||||
|
PornHubPlaylistIE,
|
||||||
|
)
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
from .promptfile import PromptFileIE
|
from .promptfile import PromptFileIE
|
||||||
@ -361,7 +378,7 @@ from .rottentomatoes import RottenTomatoesIE
|
|||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
from .rtbf import RTBFIE
|
from .rtbf import RTBFIE
|
||||||
from .rte import RteIE
|
from .rte import RteIE
|
||||||
from .rtlnl import RtlXlIE
|
from .rtlnl import RtlNlIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
from .rtl2 import RTL2IE
|
from .rtl2 import RTL2IE
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
@ -376,6 +393,7 @@ from .rutube import (
|
|||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
|
from .sandia import SandiaIE
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
@ -426,6 +444,7 @@ from .streamcloud import StreamcloudIE
|
|||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
from .streetvoice import StreetVoiceIE
|
from .streetvoice import StreetVoiceIE
|
||||||
from .sunporno import SunPornoIE
|
from .sunporno import SunPornoIE
|
||||||
|
from .svtplay import SVTPlayIE
|
||||||
from .swrmediathek import SWRMediathekIE
|
from .swrmediathek import SWRMediathekIE
|
||||||
from .syfy import SyfyIE
|
from .syfy import SyfyIE
|
||||||
from .sztvhu import SztvHuIE
|
from .sztvhu import SztvHuIE
|
||||||
@ -474,6 +493,7 @@ from .tutv import TutvIE
|
|||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE, TvpSeriesIE
|
from .tvp import TvpIE, TvpSeriesIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
|
from .tweakers import TweakersIE
|
||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
from .twitch import (
|
from .twitch import (
|
||||||
TwitchVideoIE,
|
TwitchVideoIE,
|
||||||
@ -553,6 +573,7 @@ from .wimp import WimpIE
|
|||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
from .wrzuta import WrzutaIE
|
from .wrzuta import WrzutaIE
|
||||||
|
from .wsj import WSJIE
|
||||||
from .xbef import XBefIE
|
from .xbef import XBefIE
|
||||||
from .xboxclips import XboxClipsIE
|
from .xboxclips import XboxClipsIE
|
||||||
from .xhamster import XHamsterIE
|
from .xhamster import XHamsterIE
|
||||||
@ -566,6 +587,7 @@ from .yahoo import (
|
|||||||
YahooIE,
|
YahooIE,
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
)
|
)
|
||||||
|
from .yam import YamIE
|
||||||
from .yesjapan import YesJapanIE
|
from .yesjapan import YesJapanIE
|
||||||
from .ynet import YnetIE
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
|
@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||||
'title': 'Rick and Morty - Pilot',
|
'title': 'Rick and Morty - Pilot',
|
||||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||||
}
|
}
|
||||||
@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '-t8CamQlQ2aYZ49ItZCFog',
|
||||||
'title': 'American Dad - Putting Francine Out of Business',
|
'title': 'American Dad - Putting Francine Out of Business',
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||||
},
|
},
|
||||||
|
103
youtube_dl/extractor/aftenposten.py
Normal file
103
youtube_dl/extractor/aftenposten.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_with_ns,
|
||||||
|
xpath_text,
|
||||||
|
find_xpath_attr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AftenpostenIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
|
||||||
|
'md5': 'fd828cd29774a729bf4d4425fe192972',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '21039',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'TRAILER: "Sweatshop" - I can´t take any more',
|
||||||
|
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
|
||||||
|
'timestamp': 1416927969,
|
||||||
|
'upload_date': '20141125',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._html_search_regex(
|
||||||
|
r'data-xs-id="(\d+)"', webpage, 'video id')
|
||||||
|
|
||||||
|
data = self._download_xml(
|
||||||
|
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'atom': 'http://www.w3.org/2005/Atom',
|
||||||
|
'xt': 'http://xstream.dk/',
|
||||||
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
|
}
|
||||||
|
|
||||||
|
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
|
||||||
|
|
||||||
|
title = xpath_text(
|
||||||
|
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
|
||||||
|
description = xpath_text(
|
||||||
|
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
|
||||||
|
timestamp = parse_iso8601(xpath_text(
|
||||||
|
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
|
||||||
|
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
|
||||||
|
media_url = media_content.get('url')
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
tbr = int_or_none(media_content.get('bitrate'))
|
||||||
|
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
|
||||||
|
if mobj:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'play_path': 'mp4:%s' % mobj.group('playpath'),
|
||||||
|
'app': mobj.group('app'),
|
||||||
|
'ext': 'flv',
|
||||||
|
'tbr': tbr,
|
||||||
|
'format_id': 'rtmp-%d' % tbr,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': media_url,
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
link = find_xpath_attr(
|
||||||
|
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
|
||||||
|
if link is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': link.get('href'),
|
||||||
|
'format_id': link.get('rel'),
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnails = [{
|
||||||
|
'url': splash.get('url'),
|
||||||
|
'width': int_or_none(splash.get('width')),
|
||||||
|
'height': int_or_none(splash.get('height')),
|
||||||
|
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
}
|
@ -1,8 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -21,9 +19,7 @@ class AftonbladetIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.search(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_id = mobj.group('video_id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# find internal video meta data
|
# find internal video meta data
|
||||||
|
@ -20,6 +20,7 @@ class AparatIE(InfoExtractor):
|
|||||||
'id': 'wP8On',
|
'id': 'wP8On',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'تیم گلکسی 11 - زومیت',
|
'title': 'تیم گلکسی 11 - زومیت',
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
# 'skip': 'Extremely unreliable',
|
# 'skip': 'Extremely unreliable',
|
||||||
}
|
}
|
||||||
@ -34,7 +35,8 @@ class AparatIE(InfoExtractor):
|
|||||||
video_id + '/vt/frame')
|
video_id + '/vt/frame')
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
|
video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
|
||||||
|
r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
|
||||||
for i, video_url in enumerate(video_urls):
|
for i, video_url in enumerate(video_urls):
|
||||||
req = HEADRequest(video_url)
|
req = HEADRequest(video_url)
|
||||||
res = self._request_webpage(
|
res = self._request_webpage(
|
||||||
@ -46,7 +48,7 @@ class AparatIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -54,4 +56,5 @@ class AparatIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,9 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'manofsteel',
|
||||||
|
},
|
||||||
"playlist": [
|
"playlist": [
|
||||||
{
|
{
|
||||||
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
||||||
|
@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
|
|||||||
'duration': int(info['length']),
|
'duration': int(info['length']),
|
||||||
'view_count': int(info['views_total']),
|
'view_count': int(info['views_total']),
|
||||||
'uploader': info['username'],
|
'uploader': info['username'],
|
||||||
'uploader_id': info['uid'],
|
'uploader_id': info['owner']['uid'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor):
|
|||||||
|
|
||||||
download_link = m_download.group(1)
|
download_link = m_download.group(1)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'var TralbumData = {.*?id: (?P<id>\d+),?$',
|
r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
|
||||||
webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
|
webpage, 'video id')
|
||||||
|
|
||||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||||
# We get the dictionary of the track from some javascript code
|
# We get the dictionary of the track from some javascript code
|
||||||
info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
|
all_info = self._parse_json(self._search_regex(
|
||||||
info = json.loads(info)[0]
|
r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
|
||||||
|
info = all_info[0]
|
||||||
# We pick mp3-320 for now, until format selection can be easily implemented.
|
# We pick mp3-320 for now, until format selection can be easily implemented.
|
||||||
mp3_info = info['downloads']['mp3-320']
|
mp3_info = info['downloads']['mp3-320']
|
||||||
# If we try to use this url it says the link has expired
|
# If we try to use this url it says the link has expired
|
||||||
initial_url = mp3_info['url']
|
initial_url = mp3_info['url']
|
||||||
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
|
m_url = re.match(
|
||||||
m_url = re.match(re_url, initial_url)
|
r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
|
||||||
|
initial_url)
|
||||||
# We build the url we will use to get the final track url
|
# We build the url we will use to get the final track url
|
||||||
# This url is build in Bandcamp in the script download_bunde_*.js
|
# This url is build in Bandcamp in the script download_bunde_*.js
|
||||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
||||||
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
|
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
|
||||||
# If we could correctly generate the .rand field the url would be
|
# If we could correctly generate the .rand field the url would be
|
||||||
# in the "download_url" key
|
# in the "download_url" key
|
||||||
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
|
final_url = self._search_regex(
|
||||||
|
r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -106,7 +109,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
@ -130,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Jazz Format Mixtape vol.1',
|
'title': 'Jazz Format Mixtape vol.1',
|
||||||
|
'id': 'jazz-format-mixtape-vol-1',
|
||||||
|
'uploader_id': 'blazo',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'playlistend': 2
|
'playlistend': 2
|
||||||
},
|
},
|
||||||
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
'skip': 'Bandcamp imposes download limits.'
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Hierophany of the Open Grave',
|
'title': 'Hierophany of the Open Grave',
|
||||||
|
'uploader_id': 'nightbringer',
|
||||||
|
'id': 'hierophany-of-the-open-grave',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 9,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://dotscale.bandcamp.com',
|
'url': 'http://dotscale.bandcamp.com',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Loom',
|
'title': 'Loom',
|
||||||
|
'id': 'dotscale',
|
||||||
|
'uploader_id': 'dotscale',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
playlist_id = mobj.group('subdomain')
|
uploader_id = mobj.group('subdomain')
|
||||||
title = mobj.group('title')
|
album_id = mobj.group('album_id')
|
||||||
display_id = title or playlist_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||||
if not tracks_paths:
|
if not tracks_paths:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
@ -165,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
'uploader_id': uploader_id,
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
@ -273,7 +273,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
return programme_id, title, description, duration, formats, subtitles
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# fallback to legacy playlist
|
# fallback to legacy playlist
|
||||||
|
@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://beeg.com/5416503',
|
'url': 'http://beeg.com/5416503',
|
||||||
'md5': '634526ae978711f6b748fe0dd6c11f57',
|
'md5': '1bff67111adb785c51d1b42959ec10e5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5416503',
|
'id': '5416503',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Sealife',
|
'title': 'Sealife',
|
||||||
|
'id': '3550319591001',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
},
|
},
|
||||||
@ -108,7 +109,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
|
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
|
||||||
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
|
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
|
||||||
lambda m: m.group(1) + '/>', object_str)
|
lambda m: m.group(1) + '/>', object_str)
|
||||||
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
|
||||||
object_str = object_str.replace('<--', '<!--')
|
object_str = object_str.replace('<--', '<!--')
|
||||||
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
playlist_info = json_data['videoList']
|
playlist_info = json_data['videoList']
|
||||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||||
|
|
||||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||||
|
|
||||||
def _extract_video_info(self, video_info):
|
def _extract_video_info(self, video_info):
|
||||||
|
@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'skip_download': True, # Got enough YouTube download tests
|
'skip_download': True, # Got enough YouTube download tests
|
||||||
},
|
},
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'look-at-this-cute-dog-omg',
|
||||||
'description': 're:Munchkin the Teddy Bear is back ?!',
|
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||||
},
|
},
|
||||||
@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20141124',
|
'upload_date': '20141124',
|
||||||
'uploader_id': 'CindysMunchkin',
|
'uploader_id': 'CindysMunchkin',
|
||||||
'description': 're:© 2014 Munchkin the Shih Tzu',
|
'description': 're:© 2014 Munchkin the',
|
||||||
'uploader': 'Munchkin the Shih Tzu',
|
'uploader': 're:^Munchkin the',
|
||||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
153
youtube_dl/extractor/camdemy.py
Normal file
153
youtube_dl/extractor/camdemy.py
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
str_to_int,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CamdemyIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# single file
|
||||||
|
'url': 'http://www.camdemy.com/media/5181/',
|
||||||
|
'md5': '5a5562b6a98b37873119102e052e311b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5181',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': '',
|
||||||
|
'creator': 'ss11spring',
|
||||||
|
'upload_date': '20130114',
|
||||||
|
'timestamp': 1358154556,
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# With non-empty description
|
||||||
|
'url': 'http://www.camdemy.com/media/13885',
|
||||||
|
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '13885',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'EverCam + Camdemy QuickStart',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
||||||
|
'creator': 'evercam',
|
||||||
|
'upload_date': '20140620',
|
||||||
|
'timestamp': 1403271569,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# External source
|
||||||
|
'url': 'http://www.camdemy.com/media/14842',
|
||||||
|
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2vsYQzNIsJo',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20130211',
|
||||||
|
'uploader': 'Hun Kim',
|
||||||
|
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||||
|
'uploader_id': 'hunkimtutorials',
|
||||||
|
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
src_from = self._html_search_regex(
|
||||||
|
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
||||||
|
'external source', default=None)
|
||||||
|
if src_from:
|
||||||
|
return self.url_result(src_from)
|
||||||
|
|
||||||
|
oembed_obj = self._download_json(
|
||||||
|
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||||
|
|
||||||
|
thumb_url = oembed_obj['thumbnail_url']
|
||||||
|
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||||
|
file_list_doc = self._download_xml(
|
||||||
|
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||||
|
video_id, 'Filelist XML')
|
||||||
|
file_name = file_list_doc.find('./video/item/fileName').text
|
||||||
|
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||||
|
|
||||||
|
timestamp = parse_iso8601(self._html_search_regex(
|
||||||
|
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||||
|
page, 'creation time', fatal=False),
|
||||||
|
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||||
|
view_count = str_to_int(self._html_search_regex(
|
||||||
|
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||||
|
page, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': oembed_obj['title'],
|
||||||
|
'thumbnail': thumb_url,
|
||||||
|
'description': self._html_search_meta('description', page),
|
||||||
|
'creator': oembed_obj['author_name'],
|
||||||
|
'duration': oembed_obj['duration'],
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CamdemyFolderIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# links with trailing slash
|
||||||
|
'url': 'http://www.camdemy.com/folder/450',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '450',
|
||||||
|
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 145
|
||||||
|
}, {
|
||||||
|
# links without trailing slash
|
||||||
|
# and multi-page
|
||||||
|
'url': 'http://www.camdemy.com/folder/853',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '853',
|
||||||
|
'title': '科學計算 - 使用 Matlab'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20
|
||||||
|
}, {
|
||||||
|
# with displayMode parameter. For testing the codes to add parameters
|
||||||
|
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '853',
|
||||||
|
'title': '科學計算 - 使用 Matlab'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
folder_id = self._match_id(url)
|
||||||
|
|
||||||
|
# Add displayMode=list so that all links are displayed in a single page
|
||||||
|
parsed_url = list(compat_urlparse.urlparse(url))
|
||||||
|
query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
|
||||||
|
query.update({'displayMode': 'list'})
|
||||||
|
parsed_url[4] = compat_urllib_parse.urlencode(query)
|
||||||
|
final_url = compat_urlparse.urlunparse(parsed_url)
|
||||||
|
|
||||||
|
page = self._download_webpage(final_url, folder_id)
|
||||||
|
matches = re.findall(r"href='(/media/\d+/?)'", page)
|
||||||
|
|
||||||
|
entries = [self.url_result('http://www.camdemy.com' + media_path)
|
||||||
|
for media_path in matches]
|
||||||
|
|
||||||
|
folder_title = self._html_search_meta('keywords', page)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, folder_id, folder_title)
|
@ -15,12 +15,13 @@ from ..utils import (
|
|||||||
|
|
||||||
class CanalplusIE(InfoExtractor):
|
class CanalplusIE(InfoExtractor):
|
||||||
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
|
||||||
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
||||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
|
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
|
||||||
_SITE_ID_MAP = {
|
_SITE_ID_MAP = {
|
||||||
'canalplus.fr': 'cplus',
|
'canalplus.fr': 'cplus',
|
||||||
'piwiplus.fr': 'teletoon',
|
'piwiplus.fr': 'teletoon',
|
||||||
'd8.tv': 'd8',
|
'd8.tv': 'd8',
|
||||||
|
'itele.fr': 'itele',
|
||||||
}
|
}
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'upload_date': '20131108',
|
'upload_date': '20131108',
|
||||||
},
|
},
|
||||||
'skip': 'videos get deleted after a while',
|
'skip': 'videos get deleted after a while',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
|
||||||
|
'md5': '65aa83ad62fe107ce29e564bb8712580',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1213714',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
|
||||||
|
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
|
||||||
|
'upload_date': '20150211',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
real_id = self._search_regex(
|
real_id = self._search_regex(
|
||||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||||
|
30
youtube_dl/extractor/cbssports.py
Normal file
30
youtube_dl/extractor/cbssports.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_d5_GbO8p1sT',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'US Open flashbacks: 1990s',
|
||||||
|
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
section = mobj.group('section')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
all_videos = self._download_json(
|
||||||
|
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||||
|
video_id)
|
||||||
|
# The json file contains the info of all the videos in the section
|
||||||
|
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||||
|
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
99
youtube_dl/extractor/ccc.py
Normal file
99
youtube_dl/extractor/ccc.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
qualities,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CCCIE(InfoExtractor):
|
||||||
|
IE_NAME = 'media.ccc.de'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
|
||||||
|
'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20131228183',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Introduction to Processor Design',
|
||||||
|
'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'view_count': int,
|
||||||
|
'upload_date': '20131229',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if self._downloader.params.get('prefer_free_formats'):
|
||||||
|
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
|
||||||
|
else:
|
||||||
|
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r"(?s)<p class='description'>(.*?)</p>",
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
matches = re.finditer(r'''(?xs)
|
||||||
|
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
|
||||||
|
<a\s+href='(?P<http_url>[^']+)'>\s*
|
||||||
|
(?:
|
||||||
|
.*?
|
||||||
|
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
|
||||||
|
)?''', webpage)
|
||||||
|
formats = []
|
||||||
|
for m in matches:
|
||||||
|
format = m.group('format')
|
||||||
|
format_id = self._search_regex(
|
||||||
|
r'.*/([a-z0-9_-]+)/[^/]*$',
|
||||||
|
m.group('http_url'), 'format id', default=None)
|
||||||
|
vcodec = 'h264' if 'h264' in format_id else (
|
||||||
|
'none' if format_id in ('mp3', 'opus') else None
|
||||||
|
)
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'format': format,
|
||||||
|
'url': m.group('http_url'),
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'preference': preference(format_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
if m.group('torrent_url'):
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
|
||||||
|
'format': '%s (torrent)' % format,
|
||||||
|
'proto': 'torrent',
|
||||||
|
'format_note': '(unsupported; will just download the .torrent file)',
|
||||||
|
'vcodec': vcodec,
|
||||||
|
'preference': -100 + preference(format_id),
|
||||||
|
'url': m.group('torrent_url'),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -27,7 +27,6 @@ from ..utils import (
|
|||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@ -145,6 +144,7 @@ class InfoExtractor(object):
|
|||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
description: Full video description.
|
description: Full video description.
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
|
creator: The main artist who created the video.
|
||||||
timestamp: UNIX timestamp of the moment the video became available.
|
timestamp: UNIX timestamp of the moment the video became available.
|
||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
If not explicitly set, calculated from timestamp.
|
If not explicitly set, calculated from timestamp.
|
||||||
@ -156,6 +156,7 @@ class InfoExtractor(object):
|
|||||||
view_count: How many users have watched the video on the platform.
|
view_count: How many users have watched the video on the platform.
|
||||||
like_count: Number of positive ratings of the video
|
like_count: Number of positive ratings of the video
|
||||||
dislike_count: Number of negative ratings of the video
|
dislike_count: Number of negative ratings of the video
|
||||||
|
average_rating: Average rating give by users, the scale used depends on the webpage
|
||||||
comment_count: Number of comments on the video
|
comment_count: Number of comments on the video
|
||||||
comments: A list of comments, each with one or more of the following
|
comments: A list of comments, each with one or more of the following
|
||||||
properties (all but one of text or html optional):
|
properties (all but one of text or html optional):
|
||||||
@ -263,8 +264,15 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def extract(self, url):
|
def extract(self, url):
|
||||||
"""Extracts URL information and returns it in list of dicts."""
|
"""Extracts URL information and returns it in list of dicts."""
|
||||||
self.initialize()
|
try:
|
||||||
return self._real_extract(url)
|
self.initialize()
|
||||||
|
return self._real_extract(url)
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
|
except compat_http_client.IncompleteRead as e:
|
||||||
|
raise ExtractorError('A network error has occured.', cause=e, expected=True)
|
||||||
|
except (KeyError, StopIteration) as e:
|
||||||
|
raise ExtractorError('An extractor error has occured.', cause=e)
|
||||||
|
|
||||||
def set_downloader(self, downloader):
|
def set_downloader(self, downloader):
|
||||||
"""Sets the downloader for this IE."""
|
"""Sets the downloader for this IE."""
|
||||||
@ -506,7 +514,7 @@ class InfoExtractor(object):
|
|||||||
if mobj:
|
if mobj:
|
||||||
break
|
break
|
||||||
|
|
||||||
if os.name != 'nt' and sys.stderr.isatty():
|
if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
|
||||||
_name = '\033[0;34m%s\033[0m' % name
|
_name = '\033[0;34m%s\033[0m' % name
|
||||||
else:
|
else:
|
||||||
_name = name
|
_name = name
|
||||||
@ -655,6 +663,21 @@ class InfoExtractor(object):
|
|||||||
}
|
}
|
||||||
return RATING_TABLE.get(rating.lower(), None)
|
return RATING_TABLE.get(rating.lower(), None)
|
||||||
|
|
||||||
|
def _family_friendly_search(self, html):
|
||||||
|
# See http://schema.org/VideoObject
|
||||||
|
family_friendly = self._html_search_meta('isFamilyFriendly', html)
|
||||||
|
|
||||||
|
if not family_friendly:
|
||||||
|
return None
|
||||||
|
|
||||||
|
RATING_TABLE = {
|
||||||
|
'1': 0,
|
||||||
|
'true': 0,
|
||||||
|
'0': 18,
|
||||||
|
'false': 18,
|
||||||
|
}
|
||||||
|
return RATING_TABLE.get(family_friendly.lower(), None)
|
||||||
|
|
||||||
def _twitter_search_player(self, html):
|
def _twitter_search_player(self, html):
|
||||||
return self._html_search_meta('twitter:player', html,
|
return self._html_search_meta('twitter:player', html,
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
@ -704,15 +727,15 @@ class InfoExtractor(object):
|
|||||||
preference,
|
preference,
|
||||||
f.get('language_preference') if f.get('language_preference') is not None else -1,
|
f.get('language_preference') if f.get('language_preference') is not None else -1,
|
||||||
f.get('quality') if f.get('quality') is not None else -1,
|
f.get('quality') if f.get('quality') is not None else -1,
|
||||||
|
f.get('tbr') if f.get('tbr') is not None else -1,
|
||||||
|
f.get('filesize') if f.get('filesize') is not None else -1,
|
||||||
|
f.get('vbr') if f.get('vbr') is not None else -1,
|
||||||
f.get('height') if f.get('height') is not None else -1,
|
f.get('height') if f.get('height') is not None else -1,
|
||||||
f.get('width') if f.get('width') is not None else -1,
|
f.get('width') if f.get('width') is not None else -1,
|
||||||
ext_preference,
|
ext_preference,
|
||||||
f.get('tbr') if f.get('tbr') is not None else -1,
|
|
||||||
f.get('vbr') if f.get('vbr') is not None else -1,
|
|
||||||
f.get('abr') if f.get('abr') is not None else -1,
|
f.get('abr') if f.get('abr') is not None else -1,
|
||||||
audio_ext_preference,
|
audio_ext_preference,
|
||||||
f.get('fps') if f.get('fps') is not None else -1,
|
f.get('fps') if f.get('fps') is not None else -1,
|
||||||
f.get('filesize') if f.get('filesize') is not None else -1,
|
|
||||||
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
|
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
|
||||||
f.get('source_preference') if f.get('source_preference') is not None else -1,
|
f.get('source_preference') if f.get('source_preference') is not None else -1,
|
||||||
f.get('format_id'),
|
f.get('format_id'),
|
||||||
@ -729,9 +752,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _is_valid_url(self, url, video_id, item='video'):
|
def _is_valid_url(self, url, video_id, item='video'):
|
||||||
try:
|
try:
|
||||||
self._request_webpage(
|
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||||
HEADRequest(url), video_id,
|
|
||||||
'Checking %s URL' % item)
|
|
||||||
return True
|
return True
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError):
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
@ -764,7 +785,7 @@ class InfoExtractor(object):
|
|||||||
self.to_screen(msg)
|
self.to_screen(msg)
|
||||||
time.sleep(timeout)
|
time.sleep(timeout)
|
||||||
|
|
||||||
def _extract_f4m_formats(self, manifest_url, video_id):
|
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
|
||||||
manifest = self._download_xml(
|
manifest = self._download_xml(
|
||||||
manifest_url, video_id, 'Downloading f4m manifest',
|
manifest_url, video_id, 'Downloading f4m manifest',
|
||||||
'Unable to download f4m manifest')
|
'Unable to download f4m manifest')
|
||||||
@ -777,26 +798,28 @@ class InfoExtractor(object):
|
|||||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||||
for i, media_el in enumerate(media_nodes):
|
for i, media_el in enumerate(media_nodes):
|
||||||
if manifest_version == '2.0':
|
if manifest_version == '2.0':
|
||||||
manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
|
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
|
||||||
|
+ (media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||||
format_id = 'f4m-%d' % (i if tbr is None else tbr)
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': int_or_none(media_el.attrib.get('width')),
|
'width': int_or_none(media_el.attrib.get('width')),
|
||||||
'height': int_or_none(media_el.attrib.get('height')),
|
'height': int_or_none(media_el.attrib.get('height')),
|
||||||
|
'preference': preference,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
entry_protocol='m3u8', preference=None):
|
entry_protocol='m3u8', preference=None,
|
||||||
|
m3u8_id=None):
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'm3u8-meta',
|
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
|
||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
@ -815,6 +838,7 @@ class InfoExtractor(object):
|
|||||||
note='Downloading m3u8 information',
|
note='Downloading m3u8 information',
|
||||||
errnote='Failed to download m3u8 information')
|
errnote='Failed to download m3u8 information')
|
||||||
last_info = None
|
last_info = None
|
||||||
|
last_media = None
|
||||||
kv_rex = re.compile(
|
kv_rex = re.compile(
|
||||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||||
for line in m3u8_doc.splitlines():
|
for line in m3u8_doc.splitlines():
|
||||||
@ -825,6 +849,13 @@ class InfoExtractor(object):
|
|||||||
if v.startswith('"'):
|
if v.startswith('"'):
|
||||||
v = v[1:-1]
|
v = v[1:-1]
|
||||||
last_info[m.group('key')] = v
|
last_info[m.group('key')] = v
|
||||||
|
elif line.startswith('#EXT-X-MEDIA:'):
|
||||||
|
last_media = {}
|
||||||
|
for m in kv_rex.finditer(line):
|
||||||
|
v = m.group('val')
|
||||||
|
if v.startswith('"'):
|
||||||
|
v = v[1:-1]
|
||||||
|
last_media[m.group('key')] = v
|
||||||
elif line.startswith('#') or not line.strip():
|
elif line.startswith('#') or not line.strip():
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@ -832,9 +863,8 @@ class InfoExtractor(object):
|
|||||||
formats.append({'url': format_url(line)})
|
formats.append({'url': format_url(line)})
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||||
|
|
||||||
f = {
|
f = {
|
||||||
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
|
||||||
'url': format_url(line.strip()),
|
'url': format_url(line.strip()),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
@ -854,16 +884,22 @@ class InfoExtractor(object):
|
|||||||
width_str, height_str = resolution.split('x')
|
width_str, height_str = resolution.split('x')
|
||||||
f['width'] = int(width_str)
|
f['width'] = int(width_str)
|
||||||
f['height'] = int(height_str)
|
f['height'] = int(height_str)
|
||||||
|
if last_media is not None:
|
||||||
|
f['m3u8_media'] = last_media
|
||||||
|
last_media = None
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
last_info = {}
|
last_info = {}
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
# TODO: improve extraction
|
# TODO: improve extraction
|
||||||
def _extract_smil_formats(self, smil_url, video_id):
|
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
||||||
smil = self._download_xml(
|
smil = self._download_xml(
|
||||||
smil_url, video_id, 'Downloading SMIL file',
|
smil_url, video_id, 'Downloading SMIL file',
|
||||||
'Unable to download SMIL file')
|
'Unable to download SMIL file', fatal=fatal)
|
||||||
|
if smil is False:
|
||||||
|
assert not fatal
|
||||||
|
return []
|
||||||
|
|
||||||
base = smil.find('./head/meta').get('base')
|
base = smil.find('./head/meta').get('base')
|
||||||
|
|
||||||
|
@ -24,6 +24,23 @@ class CommonMistakesIE(InfoExtractor):
|
|||||||
'That doesn\'t make any sense. '
|
'That doesn\'t make any sense. '
|
||||||
'Simply remove the parameter in your command or configuration.'
|
'Simply remove the parameter in your command or configuration.'
|
||||||
) % url
|
) % url
|
||||||
if self._downloader.params.get('verbose'):
|
if not self._downloader.params.get('verbose'):
|
||||||
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
|
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
|
|
||||||
|
class UnicodeBOMIE(InfoExtractor):
|
||||||
|
IE_DESC = False
|
||||||
|
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
real_url = self._match_id(url)
|
||||||
|
self.report_warning(
|
||||||
|
'Your URL starts with a Byte Order Mark (BOM). '
|
||||||
|
'Removing the BOM and looking for "%s" ...' % real_url)
|
||||||
|
return self.url_result(real_url)
|
||||||
|
@ -194,6 +194,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'SPORT',
|
'title': 'SPORT',
|
||||||
|
'id': 'xv4bw_nqtv_sport',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 20,
|
||||||
}]
|
}]
|
||||||
|
@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
|
|||||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DotsubIE(InfoExtractor):
|
class DotsubIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||||
'md5': '0914d4d69605090f623b7ac329fea66e',
|
'md5': '0914d4d69605090f623b7ac329fea66e',
|
||||||
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
|
|||||||
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
|
||||||
|
'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
|
||||||
|
'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
||||||
|
'duration': 3169,
|
||||||
'uploader': '4v4l0n42',
|
'uploader': '4v4l0n42',
|
||||||
'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
|
'timestamp': 1292248482.625,
|
||||||
'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
|
|
||||||
'upload_date': '20101213',
|
'upload_date': '20101213',
|
||||||
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
|
info = self._download_json(
|
||||||
info = self._download_json(info_url, video_id)
|
'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
|
||||||
date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
|
video_url = info.get('mediaURI')
|
||||||
|
|
||||||
|
if not video_url:
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': info['mediaURI'],
|
'url': video_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'thumbnail': info['screenshotURI'],
|
'description': info.get('description'),
|
||||||
'description': info['description'],
|
'thumbnail': info.get('screenshotURI'),
|
||||||
'uploader': info['user'],
|
'duration': int_or_none(info.get('duration'), 1000),
|
||||||
'view_count': info['numberOfViews'],
|
'uploader': info.get('user'),
|
||||||
'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
|
'timestamp': float_or_none(info.get('dateCreated'), 1000),
|
||||||
|
'view_count': int_or_none(info.get('numberOfViews')),
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
|
|||||||
'id': '1740434',
|
'id': '1740434',
|
||||||
'display_id': 'hot-perky-blonde-naked-golf',
|
'display_id': 'hot-perky-blonde-naked-golf',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hot Perky Blonde Naked Golf',
|
'title': 'hot perky blonde naked golf',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
|
|||||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
|
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'poster="([^"]+)"',
|
r'poster="([^"]+)"',
|
||||||
|
16
youtube_dl/extractor/embedly.py
Normal file
16
youtube_dl/extractor/embedly.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
|
|
||||||
|
|
||||||
|
class EmbedlyIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
|
@ -1,18 +1,17 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EscapistIE(InfoExtractor):
|
class EscapistIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
|
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||||
@ -20,31 +19,30 @@ class EscapistIE(InfoExtractor):
|
|||||||
'id': '6618',
|
'id': '6618',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||||
'uploader': 'the-escapist-presents',
|
'uploader_id': 'the-escapist-presents',
|
||||||
|
'uploader': 'The Escapist Presents',
|
||||||
'title': "Breaking Down Baldur's Gate",
|
'title': "Breaking Down Baldur's Gate",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
showName = mobj.group('showname')
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
videoDesc = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'<meta name="description" content="([^"]*)"',
|
r"<h1 class='headline'><a href='/videos/view/(.*?)'",
|
||||||
webpage, 'description', fatal=False)
|
webpage, 'uploader ID', fatal=False)
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r"<h1 class='headline'>(.*?)</a>",
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
description = self._html_search_meta('description', webpage)
|
||||||
|
|
||||||
playerUrl = self._og_search_video_url(webpage, name='player URL')
|
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
||||||
|
title = raw_title.partition(' : ')[2]
|
||||||
|
|
||||||
title = self._html_search_regex(
|
player_url = self._og_search_video_url(webpage, name='player URL')
|
||||||
r'<meta name="title" content="([^"]*)"',
|
config_url = compat_urllib_parse.unquote(self._search_regex(
|
||||||
webpage, 'title').split(' : ')[-1]
|
r'config=(.*)$', player_url, 'config URL'))
|
||||||
|
|
||||||
configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
|
|
||||||
configUrl = compat_urllib_parse.unquote(configUrl)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
@ -53,18 +51,21 @@ class EscapistIE(InfoExtractor):
|
|||||||
cfgurl, video_id,
|
cfgurl, video_id,
|
||||||
'Downloading ' + name + ' configuration',
|
'Downloading ' + name + ' configuration',
|
||||||
'Unable to download ' + name + ' configuration',
|
'Unable to download ' + name + ' configuration',
|
||||||
transform_source=lambda s: s.replace("'", '"'))
|
transform_source=js_to_json)
|
||||||
|
|
||||||
playlist = config['playlist']
|
playlist = config['playlist']
|
||||||
|
video_url = next(
|
||||||
|
p['url'] for p in playlist
|
||||||
|
if p.get('eventCategory') == 'Video')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': playlist[1]['url'],
|
'url': video_url,
|
||||||
'format_id': name,
|
'format_id': name,
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
})
|
})
|
||||||
|
|
||||||
_add_format('normal', configUrl, quality=0)
|
_add_format('normal', config_url, quality=0)
|
||||||
hq_url = (configUrl +
|
hq_url = (config_url +
|
||||||
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
|
('&hq=1' if '?' in config_url else config_url + '?hq=1'))
|
||||||
try:
|
try:
|
||||||
_add_format('hq', hq_url, quality=1)
|
_add_format('hq', hq_url, quality=1)
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
@ -75,9 +76,10 @@ class EscapistIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': showName,
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': videoDesc,
|
'description': description,
|
||||||
'player_url': playerUrl,
|
'player_url': player_url,
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
page = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_meta('twitter:title', page, 'title')
|
|
||||||
|
title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
|
||||||
description = self._html_search_meta('twitter:description', page, 'title')
|
description = self._html_search_meta('twitter:description', page, 'title')
|
||||||
|
|
||||||
data = self._download_xml(
|
data = self._download_xml(
|
||||||
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
|
|||||||
'height': int(details.find('./height').text.strip()),
|
'height': int(details.find('./height').text.strip()),
|
||||||
} for details in item.findall('./source/file_details') if details.find('./file').text
|
} for details in item.findall('./source/file_details') if details.find('./file').text
|
||||||
]
|
]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -1,52 +1,71 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class FirstTVIE(InfoExtractor):
|
class FirstTVIE(InfoExtractor):
|
||||||
IE_NAME = 'firsttv'
|
IE_NAME = '1tv'
|
||||||
IE_DESC = 'Видеоархив - Первый канал'
|
IE_DESC = 'Первый канал'
|
||||||
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
|
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.1tv.ru/videoarchive/73390',
|
'url': 'http://www.1tv.ru/videoarchive/73390',
|
||||||
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
|
'md5': '777f525feeec4806130f4f764bc18a4f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '73390',
|
'id': '73390',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Олимпийские канатные дороги',
|
'title': 'Олимпийские канатные дороги',
|
||||||
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
|
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||||
'duration': 149,
|
'duration': 149,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
},
|
},
|
||||||
'skip': 'Only works from Russia',
|
'skip': 'Only works from Russia',
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
|
||||||
|
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '35930',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Наедине со всеми. Людмила Сенчина',
|
||||||
|
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
|
||||||
|
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||||
|
'duration': 2694,
|
||||||
|
},
|
||||||
|
'skip': 'Only works from Russia',
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
|
r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
|
||||||
|
webpage, 'video URL')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
|
[r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||||
|
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
|
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||||
|
webpage, 'description', default=None) or self._html_search_meta(
|
||||||
|
'description', webpage, 'description')
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
duration = self._og_search_property(
|
||||||
|
'video:duration', webpage,
|
||||||
|
'video duration', fatal=False)
|
||||||
|
|
||||||
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
like_count = self._html_search_regex(
|
||||||
webpage, 'like count', fatal=False)
|
r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||||
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
webpage, 'like count', default=None)
|
||||||
webpage, 'dislike count', fatal=False)
|
dislike_count = self._html_search_regex(
|
||||||
|
r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||||
|
webpage, 'dislike count', default=None)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
|
|||||||
IE_NAME = '5min'
|
IE_NAME = '5min'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||||
|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
|
||||||
5min:)
|
5min:)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
|
@ -1,77 +1,69 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FranceCultureIE(InfoExtractor):
|
class FranceCultureIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4795174',
|
'id': '4795174',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Rendez-vous au pays des geeks',
|
'title': 'Rendez-vous au pays des geeks',
|
||||||
|
'alt_title': 'Carnet nomade | 13-14',
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'uploader': 'Colette Fellous',
|
|
||||||
'upload_date': '20140301',
|
'upload_date': '20140301',
|
||||||
'duration': 3601,
|
|
||||||
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
||||||
'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
|
'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats',
|
||||||
|
'timestamp': 1393700400,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
baseurl = mobj.group('baseurl')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
params_code = self._search_regex(
|
|
||||||
r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
|
video_path = self._search_regex(
|
||||||
webpage, 'parameter code')
|
r'<a id="player".*?href="([^"]+)"', webpage, 'video path')
|
||||||
params = compat_parse_qs(params_code)
|
video_url = compat_urlparse.urljoin(url, video_path)
|
||||||
video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
|
timestamp = int_or_none(self._search_regex(
|
||||||
|
r'<a id="player".*?data-date="([0-9]+)"',
|
||||||
|
webpage, 'upload date', fatal=False))
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'<a id="player".*?>\s+<img src="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
|
r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
|
||||||
|
alt_title = self._html_search_regex(
|
||||||
|
r'<span class="title">(.*?)</span>',
|
||||||
|
webpage, 'alt_title', fatal=False)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<span class="description">(.*?)</span>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', default=None)
|
||||||
thumbnail_part = self._html_search_regex(
|
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
|
||||||
r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
|
|
||||||
'thumbnail', fatal=False)
|
|
||||||
if thumbnail_part is None:
|
|
||||||
thumbnail = None
|
|
||||||
else:
|
|
||||||
thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
|
|
||||||
|
|
||||||
info = json.loads(params['infoData'][0])[0]
|
|
||||||
duration = info.get('media_length')
|
|
||||||
upload_date_candidate = info.get('media_section5')
|
|
||||||
upload_date = (
|
|
||||||
upload_date_candidate
|
|
||||||
if (upload_date_candidate is not None and
|
|
||||||
re.match(r'[0-9]{8}$', upload_date_candidate))
|
|
||||||
else None)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
|
'vcodec': vcodec,
|
||||||
'duration': duration,
|
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'upload_date': upload_date,
|
'timestamp': timestamp,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'description': description,
|
'description': description,
|
||||||
}
|
}
|
||||||
|
@ -1,41 +1,67 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class GamekingsIE(InfoExtractor):
|
class GamekingsIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
_VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||||
# MD5 is flaky, seems to change regularly
|
# MD5 is flaky, seems to change regularly
|
||||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '20130811',
|
'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
||||||
}
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
},
|
||||||
|
}, {
|
||||||
|
# vimeo video
|
||||||
|
'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
|
||||||
|
'md5': '12bf04dfd238e70058046937657ea68d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-legend-of-zelda-majoras-mask',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Legend of Zelda: Majora’s Mask',
|
||||||
|
'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
webpage = self._download_webpage(url, video_id)
|
||||||
name = mobj.group('name')
|
|
||||||
webpage = self._download_webpage(url, name)
|
|
||||||
video_url = self._og_search_video_url(webpage)
|
|
||||||
|
|
||||||
video = re.search(r'[0-9]+', video_url)
|
playlist_id = self._search_regex(
|
||||||
video_id = video.group(0)
|
r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
|
||||||
|
|
||||||
# Todo: add medium format
|
playlist = self._download_xml(
|
||||||
video_url = video_url.replace(video_id, 'large/' + video_id)
|
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'jwplayer': 'http://rss.jwpcdn.com/'
|
||||||
|
}
|
||||||
|
|
||||||
|
item = playlist.find('./channel/item')
|
||||||
|
|
||||||
|
thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
|
||||||
|
video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'ext': 'mp4',
|
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
@ -140,6 +140,19 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Ooyala'],
|
'add_ie': ['Ooyala'],
|
||||||
},
|
},
|
||||||
|
# multiple ooyala embeds on SBN network websites
|
||||||
|
{
|
||||||
|
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||||
|
'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['Ooyala'],
|
||||||
|
},
|
||||||
# google redirect
|
# google redirect
|
||||||
{
|
{
|
||||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
@ -460,6 +473,7 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '1986',
|
||||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
@ -511,6 +525,28 @@ class GenericIE(InfoExtractor):
|
|||||||
'upload_date': '20150126',
|
'upload_date': '20150126',
|
||||||
},
|
},
|
||||||
'add_ie': ['Viddler'],
|
'add_ie': ['Viddler'],
|
||||||
|
},
|
||||||
|
# jwplayer YouTube
|
||||||
|
{
|
||||||
|
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Mrj4DVp2zeA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20150212',
|
||||||
|
'uploader': 'The National Archives UK',
|
||||||
|
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
||||||
|
'uploader_id': 'NationalArchives08',
|
||||||
|
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# rtl.nl embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aanslagen-kopenhagen',
|
||||||
|
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -756,6 +792,13 @@ class GenericIE(InfoExtractor):
|
|||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Look for embedded rtl.nl player
|
||||||
|
matches = re.findall(
|
||||||
|
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
|
||||||
|
webpage)
|
||||||
|
if matches:
|
||||||
|
return _playlist_from_matches(matches, ie='RtlNl')
|
||||||
|
|
||||||
# Look for embedded (iframe) Vimeo player
|
# Look for embedded (iframe) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||||
@ -763,7 +806,6 @@ class GenericIE(InfoExtractor):
|
|||||||
player_url = unescapeHTML(mobj.group('url'))
|
player_url = unescapeHTML(mobj.group('url'))
|
||||||
surl = smuggle_url(player_url, {'Referer': url})
|
surl = smuggle_url(player_url, {'Referer': url})
|
||||||
return self.url_result(surl)
|
return self.url_result(surl)
|
||||||
|
|
||||||
# Look for embedded (swf embed) Vimeo player
|
# Look for embedded (swf embed) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||||
@ -882,10 +924,19 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||||
|
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||||
|
|
||||||
|
# Look for multiple Ooyala embeds on SBN network websites
|
||||||
|
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
|
||||||
|
if embeds:
|
||||||
|
return _playlist_from_matches(
|
||||||
|
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
|
||||||
|
|
||||||
# Look for Aparat videos
|
# Look for Aparat videos
|
||||||
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
@ -1012,7 +1063,12 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Look for embedded sbs.com.au player
|
# Look for embedded sbs.com.au player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
|
r'''(?x)
|
||||||
|
(?:
|
||||||
|
<meta\s+property="og:video"\s+content=|
|
||||||
|
<iframe[^>]+?src=
|
||||||
|
)
|
||||||
|
(["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'SBS')
|
return self.url_result(mobj.group('url'), 'SBS')
|
||||||
@ -1043,6 +1099,8 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(mobj.group('url'), 'Livestream')
|
return self.url_result(mobj.group('url'), 'Livestream')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
|
if YoutubeIE.suitable(vurl):
|
||||||
|
return True
|
||||||
vpath = compat_urlparse.urlparse(vurl).path
|
vpath = compat_urlparse.urlparse(vurl).path
|
||||||
vext = determine_ext(vpath)
|
vext = determine_ext(vpath)
|
||||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
||||||
@ -1060,7 +1118,8 @@ class GenericIE(InfoExtractor):
|
|||||||
JWPlayerOptions|
|
JWPlayerOptions|
|
||||||
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
||||||
)
|
)
|
||||||
.*?file\s*:\s*["\'](.*?)["\']''', webpage))
|
.*?
|
||||||
|
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
# Broaden the search a little bit
|
# Broaden the search a little bit
|
||||||
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
|
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
|
||||||
|
@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor):
|
|||||||
duration = parse_duration(self._html_search_regex(
|
duration = parse_duration(self._html_search_regex(
|
||||||
r'<span class="duration">\s*-?\s*(.*?)</span>',
|
r'<span class="duration">\s*-?\s*(.*?)</span>',
|
||||||
webpage, 'duration', fatal=False))
|
webpage, 'duration', fatal=False))
|
||||||
family_friendly = self._html_search_meta(
|
|
||||||
'isFamilyFriendly', webpage, default='false')
|
|
||||||
|
|
||||||
flashvars = compat_parse_qs(self._html_search_regex(
|
flashvars = compat_parse_qs(self._html_search_regex(
|
||||||
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
|
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
|
||||||
@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'age_limit': 0 if family_friendly == 'true' else 18,
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
}
|
}
|
||||||
|
31
youtube_dl/extractor/history.py
Normal file
31
youtube_dl/extractor/history.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
|
class HistoryIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||||
|
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bLx5Dv5Aka1G',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Bet You Didn't Know: Valentine's Day",
|
||||||
|
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||||
|
webpage, 'video url')
|
||||||
|
|
||||||
|
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))
|
@ -34,6 +34,9 @@ class IGNIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
97
youtube_dl/extractor/imgur.py
Normal file
97
youtube_dl/extractor/imgur.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A61SaA1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'MRW gifv is up and running without any bugs',
|
||||||
|
'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/A61SaA1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A61SaA1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'MRW gifv is up and running without any bugs',
|
||||||
|
'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
width = int_or_none(self._search_regex(
|
||||||
|
r'<param name="width" value="([0-9]+)"',
|
||||||
|
webpage, 'width', fatal=False))
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'<param name="height" value="([0-9]+)"',
|
||||||
|
webpage, 'height', fatal=False))
|
||||||
|
|
||||||
|
video_elements = self._search_regex(
|
||||||
|
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||||
|
webpage, 'video elements', default=None)
|
||||||
|
if not video_elements:
|
||||||
|
raise ExtractorError(
|
||||||
|
'No sources found for video %s. Maybe an image?' % video_id,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
||||||
|
formats.append({
|
||||||
|
'format_id': m.group('type').partition('/')[2],
|
||||||
|
'url': self._proto_relative_url(m.group('src')),
|
||||||
|
'ext': mimetype2ext(m.group('type')),
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
gif_json = self._search_regex(
|
||||||
|
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
||||||
|
webpage, 'GIF code', fatal=False)
|
||||||
|
if gif_json:
|
||||||
|
gifd = self._parse_json(
|
||||||
|
gif_json, video_id, transform_source=js_to_json)
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'gif',
|
||||||
|
'preference': -10,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'ext': 'gif',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'gif',
|
||||||
|
'container': 'gif',
|
||||||
|
'url': self._proto_relative_url(gifd['gifUrl']),
|
||||||
|
'filesize': gifd.get('size'),
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
}
|
@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor):
|
|||||||
r'comment_count\s*=\s*\'([^\']+)\';',
|
r'comment_count\s*=\s*\'([^\']+)\';',
|
||||||
webpage, 'comment_count', fatal=False)
|
webpage, 'comment_count', fatal=False)
|
||||||
|
|
||||||
family_friendly = self._html_search_meta(
|
|
||||||
'isFamilyFriendly', webpage, 'age limit', fatal=False)
|
|
||||||
|
|
||||||
content_url = self._html_search_meta(
|
content_url = self._html_search_meta(
|
||||||
'contentURL', webpage, 'content URL', fatal=False)
|
'contentURL', webpage, 'content URL', fatal=False)
|
||||||
ext = determine_ext(content_url, 'mp4')
|
ext = determine_ext(content_url, 'mp4')
|
||||||
@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': int_or_none(view_count),
|
'view_count': int_or_none(view_count),
|
||||||
'comment_count': int_or_none(comment_count),
|
'comment_count': int_or_none(comment_count),
|
||||||
'age_limit': 18 if family_friendly == 'False' else 0,
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor):
|
|||||||
'url': 'http://new.livestream.com/tedx/cityenglish',
|
'url': 'http://new.livestream.com/tedx/cityenglish',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'TEDCity2.0 (English)',
|
'title': 'TEDCity2.0 (English)',
|
||||||
|
'id': '2245590',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}, {
|
}, {
|
||||||
@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor):
|
|||||||
if is_relevant(video_data, video_id)]
|
if is_relevant(video_data, video_id)]
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
# This is an event page:
|
# This is an event page:
|
||||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
return self.playlist_result(
|
||||||
|
videos, '%s' % info['id'], info['full_name'])
|
||||||
else:
|
else:
|
||||||
if not videos:
|
if not videos:
|
||||||
raise ExtractorError('Cannot find video %s' % video_id)
|
raise ExtractorError('Cannot find video %s' % video_id)
|
||||||
|
@ -18,7 +18,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
|
||||||
IE_NAME = 'mixcloud'
|
IE_NAME = 'mixcloud'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'dholbach-cryptkeeper',
|
'id': 'dholbach-cryptkeeper',
|
||||||
@ -33,7 +33,20 @@ class MixcloudIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'Electric Relaxation vol. 3',
|
||||||
|
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||||
|
'uploader': 'Daniel Drumz',
|
||||||
|
'uploader_id': 'gillespeterson',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _get_url(self, track_id, template_url):
|
def _get_url(self, track_id, template_url):
|
||||||
server_count = 30
|
server_count = 30
|
||||||
@ -60,7 +73,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, track_id)
|
webpage = self._download_webpage(url, track_id)
|
||||||
|
|
||||||
preview_url = self._search_regex(
|
preview_url = self._search_regex(
|
||||||
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
|
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
||||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||||
final_song_url = self._get_url(track_id, template_url)
|
final_song_url = self._get_url(track_id, template_url)
|
||||||
|
38
youtube_dl/extractor/nationalgeographic.py
Normal file
38
youtube_dl/extractor/nationalgeographic.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
url_basename,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NationalGeographicIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4DmDACA6Qtk_',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Mating Crabs Busted by Sharks',
|
||||||
|
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
name = url_basename(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, name)
|
||||||
|
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||||
|
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
||||||
|
|
||||||
|
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||||
|
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||||
|
theplatform_id = url_basename(content.attrib.get('url'))
|
||||||
|
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||||
|
# For some reason, the normal links don't work and we must force the use of f4m
|
||||||
|
{'force_smil_url': True}))
|
@ -1,7 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -19,13 +18,13 @@ class NBCIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||||
# md5 checksum is not stable
|
# md5 checksum is not stable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bTmnLCvIbaaH',
|
'id': 'c9xnCo0YPOPH',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'I Am a Firefighter',
|
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -52,9 +51,9 @@ class NBCIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(InfoExtractor):
|
class NBCNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
|
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||||
((video/.+?/(?P<id>\d+))|
|
(?:video/.+?/(?P<id>\d+)|
|
||||||
(feature/[^/]+/(?P<title>.+)))
|
(?:feature|nightly-news)/[^/]+/(?P<title>.+))
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||||
|
'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sekXqyTVnmN3',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||||
|
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
# "feature" pages use theplatform.com
|
# "feature" and "nightly-news" pages use theplatform.com
|
||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
bootstrap_json = self._search_regex(
|
bootstrap_json = self._search_regex(
|
||||||
r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
|
r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
||||||
flags=re.MULTILINE)
|
webpage, 'bootstrap json', flags=re.MULTILINE)
|
||||||
bootstrap = json.loads(bootstrap_json)
|
bootstrap = self._parse_json(bootstrap_json, video_id)
|
||||||
info = bootstrap['results'][0]['video']
|
info = bootstrap['results'][0]['video']
|
||||||
mpxid = info['mpxId']
|
mpxid = info['mpxId']
|
||||||
|
|
||||||
|
80
youtube_dl/extractor/nerdist.py
Normal file
80
youtube_dl/extractor/nerdist.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NerdistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
|
||||||
|
'md5': '3698ed582931b90d9e81e02e26e89f23',
|
||||||
|
'info_dict': {
|
||||||
|
'display_id': 'exclusive-which-dc-characters-w',
|
||||||
|
'id': 'RPHpvJyr',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
|
||||||
|
'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
|
||||||
|
'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
|
||||||
|
'uploader': 'Eric Diaz',
|
||||||
|
'upload_date': '20150202',
|
||||||
|
'timestamp': 1422892808,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'''(?x)<script\s+(?:type="text/javascript"\s+)?
|
||||||
|
src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
|
||||||
|
webpage, 'video ID')
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
|
'shareaholic:article_published_time', webpage, 'upload date'))
|
||||||
|
uploader = self._html_search_meta(
|
||||||
|
'shareaholic:article_author_name', webpage, 'article author')
|
||||||
|
|
||||||
|
doc = self._download_xml(
|
||||||
|
'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id)
|
||||||
|
video_info = doc.find('.//item')
|
||||||
|
title = xpath_text(video_info, './title', fatal=True)
|
||||||
|
description = xpath_text(video_info, './description')
|
||||||
|
thumbnail = xpath_text(
|
||||||
|
video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in video_info.findall('./{http://rss.jwpcdn.com/}source'):
|
||||||
|
vurl = source.attrib['file']
|
||||||
|
ext = determine_ext(vurl)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
vurl, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||||
|
preference=0))
|
||||||
|
elif ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
vurl, video_id, fatal=False
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': ext,
|
||||||
|
'url': vurl,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': uploader,
|
||||||
|
}
|
@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
|
|||||||
'timestamp': 1344858571,
|
'timestamp': 1344858571,
|
||||||
'age_limit': 12,
|
'age_limit': 12,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Download only works from Germany',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -46,7 +46,18 @@ class NFLIE(InfoExtractor):
|
|||||||
'timestamp': 1388354455,
|
'timestamp': 1388354455,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0ap3000000467607',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Frustrations flare on the field',
|
||||||
|
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
||||||
|
'timestamp': 1422850320,
|
||||||
|
'upload_date': '20150202',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -80,7 +91,11 @@ class NFLIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||||
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL'))
|
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
|
||||||
|
default='static/content/static/config/video/config.json'))
|
||||||
|
# For articles, the id in the url is not the video id
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
|
||||||
config = self._download_json(config_url, video_id,
|
config = self._download_json(config_url, video_id,
|
||||||
note='Downloading player config')
|
note='Downloading player config')
|
||||||
url_template = NFLIE.prepend_host(
|
url_template = NFLIE.prepend_host(
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -11,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NormalbootsIE(InfoExtractor):
|
class NormalbootsIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
@ -30,19 +28,22 @@ class NormalbootsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('videoid')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
|
||||||
webpage, 'uploader')
|
|
||||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
|
||||||
webpage, 'date')
|
|
||||||
video_upload_date = unified_strdate(raw_upload_date)
|
|
||||||
|
|
||||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
video_uploader = self._html_search_regex(
|
||||||
|
r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
video_upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
|
webpage, 'date', fatal=False))
|
||||||
|
|
||||||
|
player_url = self._html_search_regex(
|
||||||
|
r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"',
|
||||||
|
webpage, 'player url')
|
||||||
player_page = self._download_webpage(player_url, video_id)
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
video_url = self._html_search_regex(
|
||||||
|
r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
@ -11,7 +12,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NPOBaseIE(InfoExtractor):
|
class NPOBaseIE(SubtitlesInfoExtractor):
|
||||||
def _get_token(self, video_id):
|
def _get_token(self, video_id):
|
||||||
token_page = self._download_webpage(
|
token_page = self._download_webpage(
|
||||||
'http://ida.omroep.nl/npoplayer/i.js',
|
'http://ida.omroep.nl/npoplayer/i.js',
|
||||||
@ -22,7 +23,7 @@ class NPOBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class NPOIE(NPOBaseIE):
|
class NPOIE(NPOBaseIE):
|
||||||
IE_NAME = 'npo.nl'
|
IE_NAME = 'npo.nl'
|
||||||
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -161,6 +162,16 @@ class NPOIE(NPOBaseIE):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
if metadata.get('tt888') == 'ja':
|
||||||
|
subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
|
subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': metadata['titel'],
|
'title': metadata['titel'],
|
||||||
@ -169,12 +180,13 @@ class NPOIE(NPOBaseIE):
|
|||||||
'upload_date': unified_strdate(metadata.get('gidsdatum')),
|
'upload_date': unified_strdate(metadata.get('gidsdatum')),
|
||||||
'duration': parse_duration(metadata.get('tijdsduur')),
|
'duration': parse_duration(metadata.get('tijdsduur')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NPOLiveIE(NPOBaseIE):
|
class NPOLiveIE(NPOBaseIE):
|
||||||
IE_NAME = 'npo.nl:live'
|
IE_NAME = 'npo.nl:live'
|
||||||
_VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.npo.nl/live/npo-1',
|
'url': 'http://www.npo.nl/live/npo-1',
|
||||||
@ -249,6 +261,84 @@ class NPOLiveIE(NPOBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NPORadioIE(InfoExtractor):
|
||||||
|
IE_NAME = 'npo.nl:radio'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.npo.nl/radio/radio-1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'radio-1',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _html_get_attribute_regex(attribute):
|
||||||
|
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
self._html_get_attribute_regex('data-channel'), webpage, 'title')
|
||||||
|
|
||||||
|
stream = self._parse_json(
|
||||||
|
self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
codec = stream.get('codec')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': stream['url'],
|
||||||
|
'title': self._live_title(title),
|
||||||
|
'acodec': codec,
|
||||||
|
'ext': codec,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NPORadioFragmentIE(InfoExtractor):
|
||||||
|
IE_NAME = 'npo.nl:radio:fragment'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
|
||||||
|
'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '174356',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Jubileumconcert Willeke Alberti',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, audio_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
|
audio_url = self._search_regex(
|
||||||
|
r"data-streams='([^']+)'", webpage, 'audio url')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'url': audio_url,
|
||||||
|
'title': title,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class TegenlichtVproIE(NPOIE):
|
class TegenlichtVproIE(NPOIE):
|
||||||
IE_NAME = 'tegenlicht.vpro.nl'
|
IE_NAME = 'tegenlicht.vpro.nl'
|
||||||
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
||||||
|
@ -3,7 +3,9 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML
|
clean_html,
|
||||||
|
xpath_text,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/novosti/863142/',
|
'url': 'http://www.ntv.ru/novosti/863142/',
|
||||||
|
'md5': 'ba7ea172a91cb83eb734cad18c10e723',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '746000',
|
'id': '746000',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||||
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 136,
|
'duration': 136,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||||
|
'md5': 'adecff79691b4d71e25220a191477124',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '750370',
|
'id': '750370',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||||
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 172,
|
'duration': 172,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||||
|
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '747480',
|
'id': '747480',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
|
'title': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||||
'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
|
'description': '«Сегодня». 21 марта 2014 года. 16:00',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 1496,
|
'duration': 1496,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||||
|
'md5': 'f825770930937aa7e5aca0dc0d29319a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '758100',
|
'id': '1007609',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Остросюжетный фильм «Кома»',
|
'title': 'Остросюжетный фильм «Кома»',
|
||||||
'description': 'Остросюжетный фильм «Кома»',
|
'description': 'Остросюжетный фильм «Кома»',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 5592,
|
'duration': 5592,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||||
|
'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '751482',
|
'id': '751482',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '«Дело врачей»: «Деревце жизни»',
|
'title': '«Дело врачей»: «Деревце жизни»',
|
||||||
'description': '«Дело врачей»: «Деревце жизни»',
|
'description': '«Дело врачей»: «Деревце жизни»',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg',
|
||||||
'duration': 2590,
|
'duration': 2590,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
|
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||||
title = unescapeHTML(player.find('./data/title').text)
|
|
||||||
description = unescapeHTML(player.find('./data/description').text)
|
player = self._download_xml(
|
||||||
|
'http://www.ntv.ru/vi%s/' % video_id,
|
||||||
|
video_id, 'Downloading video XML')
|
||||||
|
title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
|
||||||
|
description = clean_html(xpath_text(player, './data/description', 'description'))
|
||||||
|
|
||||||
video = player.find('./data/video')
|
video = player.find('./data/video')
|
||||||
video_id = video.find('./id').text
|
video_id = xpath_text(video, './id', 'video id')
|
||||||
thumbnail = video.find('./splash').text
|
thumbnail = xpath_text(video, './splash', 'thumbnail')
|
||||||
duration = int(video.find('./totaltime').text)
|
duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
|
||||||
view_count = int(video.find('./views').text)
|
view_count = int_or_none(xpath_text(video, './views', 'view count'))
|
||||||
puid22 = video.find('./puid22').text
|
|
||||||
|
|
||||||
apps = {
|
token = self._download_webpage(
|
||||||
'4': 'video1',
|
'http://stat.ntv.ru/services/access/token',
|
||||||
'7': 'video2',
|
video_id, 'Downloading access token')
|
||||||
}
|
|
||||||
|
|
||||||
app = apps.get(puid22, apps['4'])
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id in ['', 'hi', 'webm']:
|
for format_id in ['', 'hi', 'webm']:
|
||||||
file = video.find('./%sfile' % format_id)
|
file_ = video.find('./%sfile' % format_id)
|
||||||
if file is None:
|
if file_ is None:
|
||||||
continue
|
continue
|
||||||
size = video.find('./%ssize' % format_id)
|
size = video.find('./%ssize' % format_id)
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': 'rtmp://media.ntv.ru/%s' % app,
|
'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
|
||||||
'app': app,
|
'filesize': int_or_none(size.text if size is not None else None),
|
||||||
'play_path': file.text,
|
|
||||||
'rtmp_conn': 'B:1',
|
|
||||||
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
|
|
||||||
'page_url': 'http://www.ntv.ru',
|
|
||||||
'flash_version': 'LNX 11,2,202,341',
|
|
||||||
'rtmp_live': True,
|
|
||||||
'ext': 'flv',
|
|
||||||
'filesize': int(size.text),
|
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
@ -11,7 +8,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class PatreonIE(InfoExtractor):
|
class PatreonIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
|
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||||
@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*$',
|
'thumbnail': 're:^https?://.*$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.patreon.com/creation?hid=1682498',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'SU4fj_aEMVw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'I\'m on Patreon!',
|
||||||
|
'uploader': 'TraciJHines',
|
||||||
|
'thumbnail': 're:^https?://.*$',
|
||||||
|
'upload_date': '20150211',
|
||||||
|
'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
|
||||||
|
'uploader_id': 'TraciJHines',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
||||||
@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._og_search_title(webpage).strip()
|
title = self._og_search_title(webpage).strip()
|
||||||
|
|
||||||
attach_fn = self._html_search_regex(
|
attach_fn = self._html_search_regex(
|
||||||
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
||||||
webpage, 'attachment URL', default=None)
|
webpage, 'attachment URL', default=None)
|
||||||
|
embed = self._html_search_regex(
|
||||||
|
r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
|
||||||
|
webpage, 'embedded URL', default=None)
|
||||||
|
|
||||||
if attach_fn is not None:
|
if attach_fn is not None:
|
||||||
video_url = 'http://www.patreon.com' + attach_fn
|
video_url = 'http://www.patreon.com' + attach_fn
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
||||||
|
elif embed is not None:
|
||||||
|
return self.url_result(embed)
|
||||||
else:
|
else:
|
||||||
playlist_js = self._search_regex(
|
playlist = self._parse_json(self._search_regex(
|
||||||
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
||||||
webpage, 'playlist JSON')
|
webpage, 'playlist JSON'),
|
||||||
playlist_json = js_to_json(playlist_js)
|
video_id, transform_source=js_to_json)
|
||||||
playlist = json.loads(playlist_json)
|
|
||||||
data = playlist[0]
|
data = playlist[0]
|
||||||
video_url = self._proto_relative_url(data['mp3'])
|
video_url = self._proto_relative_url(data['mp3'])
|
||||||
thumbnail = self._proto_relative_url(data.get('cover'))
|
thumbnail = self._proto_relative_url(data.get('cover'))
|
||||||
|
@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):
|
|||||||
|
|
||||||
quality = qualities(['sd', 'hd'])
|
quality = qualities(['sd', 'hd'])
|
||||||
sources = json.loads(js_to_json(self._search_regex(
|
sources = json.loads(js_to_json(self._search_regex(
|
||||||
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
|
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
|
||||||
|
webpage, 'sources')))
|
||||||
formats = []
|
formats = []
|
||||||
for container, s in sources.items():
|
for qname, video_url in sources.items():
|
||||||
for qname, video_url in s.items():
|
if not video_url:
|
||||||
formats.append({
|
continue
|
||||||
'url': video_url,
|
formats.append({
|
||||||
'container': container,
|
'url': video_url,
|
||||||
'format_id': '%s-%s' % (container, qname),
|
'format_id': qname,
|
||||||
'quality': quality(qname),
|
'quality': quality(qname),
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||||
if thumbnail:
|
if thumbnail:
|
||||||
@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PornHubPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.pornhub.com/playlist/6201671',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6201671',
|
||||||
|
'title': 'P0p4',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 35,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
|
||||||
|
for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
|
||||||
|
]
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
||||||
|
playlist_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
|
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://ndr2.radio.de/',
|
'url': 'http://ndr2.radio.de/',
|
||||||
'md5': '3b4cdd011bc59174596b6145cda474a4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ndr2',
|
'id': 'ndr2',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': 'md5:591c49c702db1a33751625ebfb67f273',
|
'description': 'md5:591c49c702db1a33751625ebfb67f273',
|
||||||
'thumbnail': 're:^https?://.*\.png',
|
'thumbnail': 're:^https?://.*\.png',
|
||||||
|
'is_live': True,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
radio_id = self._match_id(url)
|
radio_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, radio_id)
|
webpage = self._download_webpage(url, radio_id)
|
||||||
|
jscode = self._search_regex(
|
||||||
|
r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
|
||||||
|
webpage, 'broadcast')
|
||||||
|
|
||||||
broadcast = json.loads(self._search_regex(
|
broadcast = self._parse_json(jscode, radio_id)
|
||||||
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
|
|
||||||
webpage, 'broadcast'))
|
|
||||||
|
|
||||||
title = self._live_title(broadcast['name'])
|
title = self._live_title(broadcast['name'])
|
||||||
description = broadcast.get('description') or broadcast.get('shortDescription')
|
description = broadcast.get('description') or broadcast.get('shortDescription')
|
||||||
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
|
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': stream['streamUrl'],
|
'url': stream['streamUrl'],
|
||||||
|
@ -1,16 +1,25 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import parse_duration
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RtlXlIE(InfoExtractor):
|
class RtlNlIE(InfoExtractor):
|
||||||
IE_NAME = 'rtlxl.nl'
|
IE_NAME = 'rtl.nl'
|
||||||
_VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
IE_DESC = 'rtl.nl and rtlxl.nl'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(www\.)?
|
||||||
|
(?:
|
||||||
|
rtlxl\.nl/\#!/[^/]+/|
|
||||||
|
rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
|
||||||
|
)
|
||||||
|
(?P<id>[0-9a-f-]+)'''
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||||
'md5': 'cc16baa36a6c169391f0764fa6b16654',
|
'md5': 'cc16baa36a6c169391f0764fa6b16654',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -22,21 +31,30 @@ class RtlXlIE(InfoExtractor):
|
|||||||
'upload_date': '20140814',
|
'upload_date': '20140814',
|
||||||
'duration': 576.880,
|
'duration': 576.880,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||||
|
'md5': 'dea7474214af1271d91ef332fb8be7ea',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1424039400,
|
||||||
|
'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
|
||||||
|
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
|
||||||
|
'upload_date': '20150215',
|
||||||
|
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
uuid = self._match_id(url)
|
||||||
uuid = mobj.group('uuid')
|
|
||||||
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||||
uuid)
|
uuid)
|
||||||
|
|
||||||
material = info['material'][0]
|
material = info['material'][0]
|
||||||
episode_info = info['episodes'][0]
|
|
||||||
|
|
||||||
progname = info['abstracts'][0]['name']
|
progname = info['abstracts'][0]['name']
|
||||||
subtitle = material['title'] or info['episodes'][0]['name']
|
subtitle = material['title'] or info['episodes'][0]['name']
|
||||||
|
description = material.get('synopsis') or info['episodes'][0]['synopsis']
|
||||||
|
|
||||||
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
||||||
videopath = material['videopath'].replace('.f4m', '.m3u8')
|
videopath = material['videopath'].replace('.f4m', '.m3u8')
|
||||||
@ -58,14 +76,29 @@ class RtlXlIE(InfoExtractor):
|
|||||||
'quality': 0,
|
'quality': 0,
|
||||||
}
|
}
|
||||||
])
|
])
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
meta = info.get('meta', {})
|
||||||
|
for p in ('poster_base_url', '"thumb_base_url"'):
|
||||||
|
if not meta.get(p):
|
||||||
|
continue
|
||||||
|
|
||||||
|
thumbnails.append({
|
||||||
|
'url': self._proto_relative_url(meta[p] + uuid),
|
||||||
|
'width': int_or_none(self._search_regex(
|
||||||
|
r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
|
||||||
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'/sz=[0-9]+x([0-9]+)',
|
||||||
|
meta[p], 'thumbnail height', fatal=False))
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': uuid,
|
'id': uuid,
|
||||||
'title': '%s - %s' % (progname, subtitle),
|
'title': '%s - %s' % (progname, subtitle),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'timestamp': material['original_date'],
|
'timestamp': material['original_date'],
|
||||||
'description': episode_info['synopsis'],
|
'description': description,
|
||||||
'duration': parse_duration(material.get('duration')),
|
'duration': parse_duration(material.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
}
|
}
|
||||||
|
@ -91,6 +91,15 @@ class RTLnowIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '188729',
|
||||||
|
'ext': 'flv',
|
||||||
|
'upload_date': '20150204',
|
||||||
|
'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
|
||||||
|
'title': 'Der Bachelor - Folge 4',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
|
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
@ -134,9 +143,18 @@ class RTLnowIE(InfoExtractor):
|
|||||||
'player_url': video_page_url + 'includes/vodplayer.swf',
|
'player_url': video_page_url + 'includes/vodplayer.swf',
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
fmt = {
|
mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
|
||||||
'url': filename.text,
|
if mobj:
|
||||||
}
|
fmt = {
|
||||||
|
'url': 'rtmpe://fmspay-fra2.rtl.de/' + mobj.group('hoster'),
|
||||||
|
'play_path': 'mp4:' + mobj.group('play_path'),
|
||||||
|
'page_url': url,
|
||||||
|
'player_url': video_page_url + 'includes/vodplayer.swf',
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
fmt = {
|
||||||
|
'url': filename.text,
|
||||||
|
}
|
||||||
fmt.update({
|
fmt.update({
|
||||||
'width': int_or_none(filename.get('width')),
|
'width': int_or_none(filename.get('width')),
|
||||||
'height': int_or_none(filename.get('height')),
|
'height': int_or_none(filename.get('height')),
|
||||||
|
@ -1,16 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json
|
|
||||||
|
|
||||||
|
|
||||||
class RTPIE(InfoExtractor):
|
class RTPIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
|
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
|
||||||
|
'md5': 'e736ce0c665e459ddb818546220b4ef8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'e174042',
|
'id': 'e174042',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
@ -18,9 +18,6 @@ class RTPIE(InfoExtractor):
|
|||||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True, # RTMP download
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -37,20 +34,48 @@ class RTPIE(InfoExtractor):
|
|||||||
|
|
||||||
player_config = self._search_regex(
|
player_config = self._search_regex(
|
||||||
r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
|
r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
|
||||||
config = json.loads(js_to_json(player_config))
|
config = self._parse_json(player_config, video_id)
|
||||||
|
|
||||||
path, ext = config.get('file').rsplit('.', 1)
|
path, ext = config.get('file').rsplit('.', 1)
|
||||||
formats = [{
|
formats = [{
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': config.get('type') == 'audio' and 'none' or None,
|
||||||
|
'preference': -2,
|
||||||
|
'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
|
||||||
'app': config.get('application'),
|
'app': config.get('application'),
|
||||||
'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
|
'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
|
||||||
'page_url': url,
|
'page_url': url,
|
||||||
'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
|
|
||||||
'rtmp_live': config.get('live', False),
|
'rtmp_live': config.get('live', False),
|
||||||
'ext': ext,
|
|
||||||
'vcodec': config.get('type') == 'audio' and 'none' or None,
|
|
||||||
'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
|
'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
|
||||||
|
'rtmp_real_time': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
# Construct regular HTTP download URLs
|
||||||
|
replacements = {
|
||||||
|
'audio': {
|
||||||
|
'format_id': 'mp3',
|
||||||
|
'pattern': r'^nas2\.share/wavrss/',
|
||||||
|
'repl': 'http://rsspod.rtp.pt/podcasts/',
|
||||||
|
'vcodec': 'none',
|
||||||
|
},
|
||||||
|
'video': {
|
||||||
|
'format_id': 'mp4_h264',
|
||||||
|
'pattern': r'^nas2\.share/h264/',
|
||||||
|
'repl': 'http://rsspod.rtp.pt/videocasts/',
|
||||||
|
'vcodec': 'h264',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
r = replacements[config['type']]
|
||||||
|
if re.match(r['pattern'], config['file']) is not None:
|
||||||
|
formats.append({
|
||||||
|
'format_id': r['format_id'],
|
||||||
|
'url': re.sub(r['pattern'], r['repl'], config['file']),
|
||||||
|
'vcodec': r['vcodec'],
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -6,12 +6,14 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -159,11 +161,27 @@ class RTSIE(InfoExtractor):
|
|||||||
return int_or_none(self._search_regex(
|
return int_or_none(self._search_regex(
|
||||||
r'-([0-9]+)k\.', url, 'bitrate', default=None))
|
r'-([0-9]+)k\.', url, 'bitrate', default=None))
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'format_id': fid,
|
for format_id, format_url in info['streams'].items():
|
||||||
'url': furl,
|
if format_url.endswith('.f4m'):
|
||||||
'tbr': extract_bitrate(furl),
|
token = self._download_xml(
|
||||||
} for fid, furl in info['streams'].items()]
|
'http://tp.srgssr.ch/token/akahd.xml?stream=%s/*' % compat_urllib_parse_urlparse(format_url).path,
|
||||||
|
video_id, 'Downloading %s token' % format_id)
|
||||||
|
auth_params = xpath_text(token, './/authparams', 'auth params')
|
||||||
|
if not auth_params:
|
||||||
|
continue
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
'%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params),
|
||||||
|
video_id, f4m_id=format_id))
|
||||||
|
elif format_url.endswith('.m3u8'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': format_url,
|
||||||
|
'tbr': extract_bitrate(format_url),
|
||||||
|
})
|
||||||
|
|
||||||
if 'media' in info:
|
if 'media' in info:
|
||||||
formats.extend([{
|
formats.extend([{
|
||||||
|
117
youtube_dl/extractor/sandia.py
Normal file
117
youtube_dl/extractor/sandia.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SandiaIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Sandia National Laboratories'
|
||||||
|
_VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
|
||||||
|
'md5': '9422edc9b9a60151727e4b6d8bef393d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Xyce Software Training - Section 1',
|
||||||
|
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
|
||||||
|
'upload_date': '20120904',
|
||||||
|
'duration': 7794,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
js_path = self._search_regex(
|
||||||
|
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
|
||||||
|
webpage, 'JS code URL')
|
||||||
|
js_url = compat_urlparse.urljoin(url, js_path)
|
||||||
|
|
||||||
|
js_code = self._download_webpage(
|
||||||
|
js_url, video_id, note='Downloading player')
|
||||||
|
|
||||||
|
def extract_str(key, **args):
|
||||||
|
return self._search_regex(
|
||||||
|
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
|
||||||
|
js_code, key, **args)
|
||||||
|
|
||||||
|
def extract_data(key, **args):
|
||||||
|
data_json = extract_str(key, **args)
|
||||||
|
if data_json is None:
|
||||||
|
return data_json
|
||||||
|
return self._parse_json(
|
||||||
|
data_json, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for i in itertools.count():
|
||||||
|
fd = extract_data('VideoUrls[%d]' % i, default=None)
|
||||||
|
if fd is None:
|
||||||
|
break
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%s' % i,
|
||||||
|
'format_note': fd['MimeType'].partition('/')[2],
|
||||||
|
'ext': mimetype2ext(fd['MimeType']),
|
||||||
|
'url': fd['Location'],
|
||||||
|
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
slide_baseurl = compat_urlparse.urljoin(
|
||||||
|
url, extract_data('SlideBaseUrl'))
|
||||||
|
slide_template = slide_baseurl + re.sub(
|
||||||
|
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
|
||||||
|
slides = []
|
||||||
|
last_slide_time = 0
|
||||||
|
for i in itertools.count(1):
|
||||||
|
sd = extract_str('Slides[%d]' % i, default=None)
|
||||||
|
if sd is None:
|
||||||
|
break
|
||||||
|
timestamp = int_or_none(self._search_regex(
|
||||||
|
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
|
||||||
|
sd, 'slide %s timestamp' % i, fatal=False))
|
||||||
|
slides.append({
|
||||||
|
'url': slide_template % i,
|
||||||
|
'duration': timestamp - last_slide_time,
|
||||||
|
})
|
||||||
|
last_slide_time = timestamp
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'slides',
|
||||||
|
'protocol': 'slideshow',
|
||||||
|
'url': json.dumps(slides),
|
||||||
|
'preference': -10000, # Downloader not yet written
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = extract_data('Title')
|
||||||
|
description = extract_data('Description', fatal=False)
|
||||||
|
duration = int_or_none(extract_data(
|
||||||
|
'Duration', fatal=False), scale=1000)
|
||||||
|
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
|
|||||||
'id': '437BE28B89D799D7',
|
'id': '437BE28B89D799D7',
|
||||||
'title': 'big_buck_bunny_720p_surround.avi',
|
'title': 'big_buck_bunny_720p_surround.avi',
|
||||||
'ext': 'avi',
|
'ext': 'avi',
|
||||||
'thumbnail': 're:^http://.*\.jpg$',
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
|
|||||||
''', webpage, 'hash')
|
''', webpage, 'hash')
|
||||||
|
|
||||||
fields = {
|
fields = {
|
||||||
"hash": confirm_hash,
|
"hash": confirm_hash.encode('utf-8'),
|
||||||
"confirm": "Continue as Free User"
|
"confirm": "Continue as Free User"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
|
|||||||
webpage, 'title', default=None)
|
webpage, 'title', default=None)
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'<img\s+src="([^"]*)".+?name="bg"',
|
r'<img\s+src="([^"]*)".+?name="bg"',
|
||||||
webpage, 'thumbnail')
|
webpage, 'thumbnail', default=None)
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
|
@ -1,80 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
HEADRequest,
|
|
||||||
urlhandle_detect_ext,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SoulAnimeWatchingIE(InfoExtractor):
|
|
||||||
IE_NAME = "soulanime:watching"
|
|
||||||
IE_DESC = "SoulAnime video"
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
|
|
||||||
'md5': '05fae04abf72298098b528e98abf4298',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'seirei-tsukai-no-blade-dance-episode-9',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'seirei-tsukai-no-blade-dance-episode-9',
|
|
||||||
'description': 'seirei-tsukai-no-blade-dance-episode-9'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
domain = mobj.group('domain')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url_encoded = self._html_search_regex(
|
|
||||||
r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
|
|
||||||
video_url = "http://www.soul-anime." + domain + video_url_encoded
|
|
||||||
|
|
||||||
ext_req = HEADRequest(video_url)
|
|
||||||
ext_handle = self._request_webpage(
|
|
||||||
ext_req, video_id, note='Determining extension')
|
|
||||||
ext = urlhandle_detect_ext(ext_handle)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': ext,
|
|
||||||
'title': video_id,
|
|
||||||
'description': video_id
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SoulAnimeSeriesIE(InfoExtractor):
|
|
||||||
IE_NAME = "soulanime:series"
|
|
||||||
IE_DESC = "SoulAnime Series"
|
|
||||||
|
|
||||||
_VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
|
|
||||||
|
|
||||||
_EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'black-rock-shooter-tv'
|
|
||||||
},
|
|
||||||
'playlist_count': 8
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
series_id = mobj.group('id')
|
|
||||||
domain = mobj.group('domain')
|
|
||||||
|
|
||||||
pattern = re.compile(self._EPISODE_REGEX)
|
|
||||||
|
|
||||||
page = self._download_webpage(url, series_id, "Downloading series page")
|
|
||||||
mobj = pattern.findall(page)
|
|
||||||
|
|
||||||
entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
|
|
||||||
|
|
||||||
return self.playlist_result(entries, series_id)
|
|
@ -1,14 +1,30 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_api_key(api_path):
|
||||||
|
if api_path.endswith('?'):
|
||||||
|
api_path = api_path[:-1]
|
||||||
|
|
||||||
|
api_key = 'fb5f58a820353bd7095de526253c14fd'
|
||||||
|
a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
|
||||||
|
return hashlib.md5(a.encode('ascii')).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
class StreamCZIE(InfoExtractor):
|
class StreamCZIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
|
||||||
|
_API_URL = 'http://www.stream.cz/API'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||||
@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
data = self._download_json(
|
api_path = '/episode/%s' % video_id
|
||||||
'http://www.stream.cz/API/episode/%s' % video_id, video_id)
|
|
||||||
|
req = compat_urllib_request.Request(self._API_URL + api_path)
|
||||||
|
req.add_header('Api-Password', _get_api_key(api_path))
|
||||||
|
data = self._download_json(req, video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for quality, video in enumerate(data['video_qualities']):
|
for quality, video in enumerate(data['video_qualities']):
|
||||||
|
@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
quality = qualities(['mp4', 'flv'])
|
quality = qualities(['mp4', 'flv'])
|
||||||
for video_url in re.findall(r'<source src="([^"]+)"', webpage):
|
for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
|
||||||
video_ext = determine_ext(video_url)
|
video_ext = determine_ext(video_url)
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
56
youtube_dl/extractor/svtplay.py
Normal file
56
youtube_dl/extractor/svtplay.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SVTPlayIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
|
||||||
|
'md5': 'f4a184968bc9c802a9b41316657aaa80',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2609989',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'SM veckan vinter, Örebro - Rally, final',
|
||||||
|
'duration': 4500,
|
||||||
|
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
info = self._download_json(
|
||||||
|
'http://www.svtplay.se/video/%s?output=json' % video_id, video_id)
|
||||||
|
|
||||||
|
title = info['context']['title']
|
||||||
|
thumbnail = info['context'].get('thumbnailImage')
|
||||||
|
|
||||||
|
video_info = info['video']
|
||||||
|
formats = []
|
||||||
|
for vr in video_info['videoReferences']:
|
||||||
|
vurl = vr['url']
|
||||||
|
if determine_ext(vurl) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
vurl, video_id,
|
||||||
|
ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=vr.get('playerType')))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': vr.get('playerType'),
|
||||||
|
'url': vurl,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
duration = video_info.get('materialLength')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
@ -15,7 +15,8 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
'id': '80187',
|
'id': '80187',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
||||||
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
|
||||||
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||||
@ -24,10 +25,16 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
'id': '19705',
|
'id': '19705',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||||
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
"title": "Louis C.K. Interview Pt. 1 11/3/11",
|
||||||
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
_VIDEO_ID_REGEXES = (
|
||||||
|
r'"eVar42"\s*:\s*(\d+)',
|
||||||
|
r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
|
||||||
|
r'"id_not"\s*:\s*(\d+)'
|
||||||
|
)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -38,8 +45,7 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
video_id = mobj.group("video_id")
|
video_id = mobj.group("video_id")
|
||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = self._html_search_regex(
|
video_id = self._html_search_regex(
|
||||||
r'<div\s+class="player".*?data-id="(\d+?)"',
|
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||||
webpage, 'video id')
|
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
data = self._download_xml(
|
data = self._download_xml(
|
||||||
@ -83,4 +89,5 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
}
|
}
|
||||||
|
@ -4,11 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
|
||||||
|
|
||||||
|
|
||||||
class TheOnionIE(InfoExtractor):
|
class TheOnionIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
|
_VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
|
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
|
||||||
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
|
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
|
||||||
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
article_id = mobj.group('article_id')
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, article_id)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'"videoId":\s(\d+),', webpage, 'video ID')
|
r'"videoId":\s(\d+),', webpage, 'video ID')
|
||||||
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
|
|||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
|
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
|
||||||
if not sources:
|
|
||||||
raise ExtractorError(
|
|
||||||
'No sources found for video %s' % video_id, expected=True)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for src, type_ in sources:
|
for src, type_ in sources:
|
||||||
if type_ == 'video/mp4':
|
if type_ == 'video/mp4':
|
||||||
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
elif type_ == 'application/x-mpegURL':
|
elif type_ == 'application/x-mpegURL':
|
||||||
formats.extend(
|
formats.extend(
|
||||||
self._extract_m3u8_formats(src, video_id, preference=-1))
|
self._extract_m3u8_formats(src, display_id, preference=-1))
|
||||||
else:
|
else:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Encountered unexpected format: %s' % type_)
|
'Encountered unexpected format: %s' % type_)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -2,6 +2,11 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
|
import hmac
|
||||||
|
import binascii
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -11,6 +16,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
unsmuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||||
@ -18,7 +24,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
|
|||||||
|
|
||||||
class ThePlatformIE(SubtitlesInfoExtractor):
|
class ThePlatformIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||||
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
||||||
|theplatform:)(?P<id>[^/\?&]+)'''
|
|theplatform:)(?P<id>[^/\?&]+)'''
|
||||||
|
|
||||||
@ -38,18 +44,48 @@ class ThePlatformIE(SubtitlesInfoExtractor):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
|
||||||
|
flags = '10' if include_qs else '00'
|
||||||
|
expiration_date = '%x' % (int(time.time()) + life)
|
||||||
|
|
||||||
|
def str_to_hex(str):
|
||||||
|
return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
|
||||||
|
|
||||||
|
def hex_to_str(hex):
|
||||||
|
return binascii.a2b_hex(hex)
|
||||||
|
|
||||||
|
relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
|
||||||
|
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
|
||||||
|
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
|
||||||
|
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||||
|
return '%s&sig=%s' % (url, sig)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
provider_id = mobj.group('provider_id')
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
if mobj.group('config'):
|
|
||||||
|
if not provider_id:
|
||||||
|
provider_id = 'dJ5BDC'
|
||||||
|
|
||||||
|
if smuggled_data.get('force_smil_url', False):
|
||||||
|
smil_url = url
|
||||||
|
elif mobj.group('config'):
|
||||||
config_url = url + '&form=json'
|
config_url = url + '&form=json'
|
||||||
config_url = config_url.replace('swf/', 'config/')
|
config_url = config_url.replace('swf/', 'config/')
|
||||||
config_url = config_url.replace('onsite/', 'onsite/config/')
|
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||||
config = self._download_json(config_url, video_id, 'Downloading config')
|
config = self._download_json(config_url, video_id, 'Downloading config')
|
||||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||||
else:
|
else:
|
||||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
|
||||||
'format=smil&mbr=true'.format(video_id))
|
'format=smil&mbr=true'.format(provider_id, video_id))
|
||||||
|
|
||||||
|
sig = smuggled_data.get('sig')
|
||||||
|
if sig:
|
||||||
|
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
|
||||||
|
|
||||||
meta = self._download_xml(smil_url, video_id)
|
meta = self._download_xml(smil_url, video_id)
|
||||||
try:
|
try:
|
||||||
@ -62,7 +98,7 @@ class ThePlatformIE(SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
raise ExtractorError(error_msg, expected=True)
|
raise ExtractorError(error_msg, expected=True)
|
||||||
|
|
||||||
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
|
info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
|
||||||
info_json = self._download_webpage(info_url, video_id)
|
info_json = self._download_webpage(info_url, video_id)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
|
|
||||||
|
@ -1,40 +1,55 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class TriluliluIE(InfoExtractor):
|
class TriluliluIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
|
'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
|
||||||
|
'md5': 'c1450a00da251e2769b74b9005601cac',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'big-buck-bunny-1',
|
'id': 'ae2899e124140b',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny',
|
'title': 'Big Buck Bunny',
|
||||||
'description': ':) pentru copilul din noi',
|
'description': ':) pentru copilul din noi',
|
||||||
},
|
},
|
||||||
# Server ignores Range headers (--test)
|
|
||||||
'params': {
|
|
||||||
'skip_download': True
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage):
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is not available in your country.', expected=True)
|
||||||
|
elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage):
|
||||||
|
raise ExtractorError('This video is private.', expected=True)
|
||||||
|
|
||||||
|
flashvars_str = self._search_regex(
|
||||||
|
r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None)
|
||||||
|
|
||||||
|
if flashvars_str:
|
||||||
|
flashvars = self._parse_json(flashvars_str, display_id)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This page does not contain videos', expected=True)
|
||||||
|
|
||||||
|
if flashvars['isMP3'] == 'true':
|
||||||
|
raise ExtractorError(
|
||||||
|
'Audio downloads are currently not supported', expected=True)
|
||||||
|
|
||||||
|
video_id = flashvars['hash']
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage, default=None)
|
||||||
|
|
||||||
log_str = self._search_regex(
|
|
||||||
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
|
|
||||||
log = json.loads(log_str)
|
|
||||||
|
|
||||||
format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
||||||
'video-formats2' % log)
|
'video-formats2' % flashvars)
|
||||||
format_doc = self._download_xml(
|
format_doc = self._download_xml(
|
||||||
format_url, video_id,
|
format_url, video_id,
|
||||||
note='Downloading formats',
|
note='Downloading formats',
|
||||||
@ -44,10 +59,10 @@ class TriluliluIE(InfoExtractor):
|
|||||||
'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
||||||
'&source=site&hash=%(hash)s&username=%(userid)s&'
|
'&source=site&hash=%(hash)s&username=%(userid)s&'
|
||||||
'key=ministhebest&format=%%s&sig=&exp=' %
|
'key=ministhebest&format=%%s&sig=&exp=' %
|
||||||
log)
|
flashvars)
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format': fnode.text,
|
'format_id': fnode.text.partition('-')[2],
|
||||||
'url': video_url_template % fnode.text,
|
'url': video_url_template % fnode.text,
|
||||||
'ext': fnode.text.partition('-')[0]
|
'ext': fnode.text.partition('-')[0]
|
||||||
}
|
}
|
||||||
@ -56,8 +71,8 @@ class TriluliluIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@ -11,7 +13,7 @@ from ..utils import (
|
|||||||
class TvigleIE(InfoExtractor):
|
class TvigleIE(InfoExtractor):
|
||||||
IE_NAME = 'tvigle'
|
IE_NAME = 'tvigle'
|
||||||
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
||||||
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
|
_VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -38,16 +40,22 @@ class TvigleIE(InfoExtractor):
|
|||||||
'duration': 186.080,
|
'duration': 186.080,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
},
|
}, {
|
||||||
|
'url': 'https://cloud.tvigle.ru/video/5267604/',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
if not video_id:
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_id = self._html_search_regex(
|
video_id = self._html_search_regex(
|
||||||
r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')
|
r'<li class="video-preview current_playing" id="(\d+)">',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
|
'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
|
||||||
|
65
youtube_dl/extractor/tweakers.py
Normal file
65
youtube_dl/extractor/tweakers.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TweakersIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
|
||||||
|
'md5': '1b5afa817403bb5baa08359dca31e6df',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9926',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'New Nintendo 3DS XL - Op alle fronten beter',
|
||||||
|
'description': 'md5:f97324cc71e86e11c853f0763820e3ba',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||||
|
'duration': 386,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
playlist = self._download_xml(
|
||||||
|
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'xspf': 'http://xspf.org/ns/0/',
|
||||||
|
's1': 'http://static.streamone.nl/player/ns/0',
|
||||||
|
}
|
||||||
|
|
||||||
|
track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP))
|
||||||
|
|
||||||
|
title = xpath_text(
|
||||||
|
track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
|
||||||
|
description = xpath_text(
|
||||||
|
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||||
|
thumbnail = xpath_text(
|
||||||
|
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||||
|
duration = float_or_none(
|
||||||
|
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'),
|
||||||
|
1000)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': location.text,
|
||||||
|
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||||
|
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||||
|
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||||
|
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -349,6 +349,13 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
|
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
|
||||||
channel_id, 'mp4')
|
channel_id, 'mp4')
|
||||||
|
|
||||||
|
# prefer the 'source' stream, the others are limited to 30 fps
|
||||||
|
def _sort_source(f):
|
||||||
|
if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
formats = sorted(formats, key=_sort_source)
|
||||||
|
|
||||||
view_count = stream.get('viewers')
|
view_count = stream.get('viewers')
|
||||||
timestamp = parse_iso8601(stream.get('created_at'))
|
timestamp = parse_iso8601(stream.get('created_at'))
|
||||||
|
|
||||||
|
@ -49,15 +49,31 @@ class VideoLecturesNetIE(InfoExtractor):
|
|||||||
thumbnail = (
|
thumbnail = (
|
||||||
None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
|
None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'url': v.attrib['src'],
|
for v in switch.findall('./video'):
|
||||||
'width': int_or_none(v.attrib.get('width')),
|
proto = v.attrib.get('proto')
|
||||||
'height': int_or_none(v.attrib.get('height')),
|
if proto not in ['http', 'rtmp']:
|
||||||
'filesize': int_or_none(v.attrib.get('size')),
|
continue
|
||||||
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
|
f = {
|
||||||
'ext': v.attrib.get('ext'),
|
'width': int_or_none(v.attrib.get('width')),
|
||||||
} for v in switch.findall('./video')
|
'height': int_or_none(v.attrib.get('height')),
|
||||||
if v.attrib.get('proto') == 'http']
|
'filesize': int_or_none(v.attrib.get('size')),
|
||||||
|
'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
|
||||||
|
'ext': v.attrib.get('ext'),
|
||||||
|
}
|
||||||
|
src = v.attrib['src']
|
||||||
|
if proto == 'http':
|
||||||
|
if self._is_valid_url(src, video_id):
|
||||||
|
f['url'] = src
|
||||||
|
formats.append(f)
|
||||||
|
elif proto == 'rtmp':
|
||||||
|
f.update({
|
||||||
|
'url': v.attrib['streamer'],
|
||||||
|
'play_path': src,
|
||||||
|
'rtmp_real_time': True,
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -18,6 +18,7 @@ from ..utils import (
|
|||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
|
smuggle_url,
|
||||||
std_headers,
|
std_headers,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -174,7 +175,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
def _verify_video_password(self, url, video_id, webpage):
|
def _verify_video_password(self, url, video_id, webpage):
|
||||||
password = self._downloader.params.get('videopassword', None)
|
password = self._downloader.params.get('videopassword', None)
|
||||||
if password is None:
|
if password is None:
|
||||||
raise ExtractorError('This video is protected by a password, use the --video-password option')
|
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||||
data = compat_urllib_parse.urlencode({
|
data = compat_urllib_parse.urlencode({
|
||||||
'password': password,
|
'password': password,
|
||||||
@ -188,9 +189,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
password_request = compat_urllib_request.Request(pass_url + '/password', data)
|
password_request = compat_urllib_request.Request(pass_url + '/password', data)
|
||||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||||
self._download_webpage(password_request, video_id,
|
return self._download_webpage(
|
||||||
'Verifying the password',
|
password_request, video_id,
|
||||||
'Wrong password')
|
'Verifying the password', 'Wrong password')
|
||||||
|
|
||||||
def _verify_player_video_password(self, url, video_id):
|
def _verify_player_video_password(self, url, video_id):
|
||||||
password = self._downloader.params.get('videopassword', None)
|
password = self._downloader.params.get('videopassword', None)
|
||||||
@ -266,9 +267,12 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
||||||
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
|
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
|
||||||
|
|
||||||
if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
|
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
||||||
|
if data and '_video_password_verified' in data:
|
||||||
|
raise ExtractorError('video password verification failed!')
|
||||||
self._verify_video_password(url, video_id, webpage)
|
self._verify_video_password(url, video_id, webpage)
|
||||||
return self._real_extract(url)
|
return self._real_extract(
|
||||||
|
smuggle_url(url, {'_video_password_verified': 'verified'}))
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unable to extract info section',
|
raise ExtractorError('Unable to extract info section',
|
||||||
cause=e)
|
cause=e)
|
||||||
@ -401,6 +405,7 @@ class VimeoChannelIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vimeo.com/channels/tributes',
|
'url': 'http://vimeo.com/channels/tributes',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'tributes',
|
||||||
'title': 'Vimeo Tributes',
|
'title': 'Vimeo Tributes',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 25,
|
'playlist_mincount': 25,
|
||||||
@ -412,12 +417,47 @@ class VimeoChannelIE(InfoExtractor):
|
|||||||
def _extract_list_title(self, webpage):
|
def _extract_list_title(self, webpage):
|
||||||
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||||
|
|
||||||
|
def _login_list_password(self, page_url, list_id, webpage):
|
||||||
|
login_form = self._search_regex(
|
||||||
|
r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
|
||||||
|
webpage, 'login form', default=None)
|
||||||
|
if not login_form:
|
||||||
|
return webpage
|
||||||
|
|
||||||
|
password = self._downloader.params.get('videopassword', None)
|
||||||
|
if password is None:
|
||||||
|
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||||
|
fields = dict(re.findall(r'''(?x)<input\s+
|
||||||
|
type="hidden"\s+
|
||||||
|
name="([^"]+)"\s+
|
||||||
|
value="([^"]*)"
|
||||||
|
''', login_form))
|
||||||
|
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||||
|
fields['token'] = token
|
||||||
|
fields['password'] = password
|
||||||
|
post = compat_urllib_parse.urlencode(fields)
|
||||||
|
password_path = self._search_regex(
|
||||||
|
r'action="([^"]+)"', login_form, 'password URL')
|
||||||
|
password_url = compat_urlparse.urljoin(page_url, password_path)
|
||||||
|
password_request = compat_urllib_request.Request(password_url, post)
|
||||||
|
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
self._set_cookie('vimeo.com', 'xsrft', token)
|
||||||
|
|
||||||
|
return self._download_webpage(
|
||||||
|
password_request, list_id,
|
||||||
|
'Verifying the password', 'Wrong password')
|
||||||
|
|
||||||
def _extract_videos(self, list_id, base_url):
|
def _extract_videos(self, list_id, base_url):
|
||||||
video_ids = []
|
video_ids = []
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
|
page_url = self._page_url(base_url, pagenum)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
self._page_url(base_url, pagenum), list_id,
|
page_url, list_id,
|
||||||
'Downloading page %s' % pagenum)
|
'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
|
if pagenum == 1:
|
||||||
|
webpage = self._login_list_password(page_url, list_id, webpage)
|
||||||
|
|
||||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||||
break
|
break
|
||||||
@ -444,6 +484,7 @@ class VimeoUserIE(VimeoChannelIE):
|
|||||||
'url': 'http://vimeo.com/nkistudio/videos',
|
'url': 'http://vimeo.com/nkistudio/videos',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Nki',
|
'title': 'Nki',
|
||||||
|
'id': 'nkistudio',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 66,
|
'playlist_mincount': 66,
|
||||||
}]
|
}]
|
||||||
@ -461,17 +502,28 @@ class VimeoAlbumIE(VimeoChannelIE):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vimeo.com/album/2632481',
|
'url': 'http://vimeo.com/album/2632481',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '2632481',
|
||||||
'title': 'Staff Favorites: November 2013',
|
'title': 'Staff Favorites: November 2013',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 13,
|
'playlist_mincount': 13,
|
||||||
|
}, {
|
||||||
|
'note': 'Password-protected album',
|
||||||
|
'url': 'https://vimeo.com/album/3253534',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'test',
|
||||||
|
'id': '3253534',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
'params': {
|
||||||
|
'videopassword': 'youtube-dl',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _page_url(self, base_url, pagenum):
|
def _page_url(self, base_url, pagenum):
|
||||||
return '%s/page:%d/' % (base_url, pagenum)
|
return '%s/page:%d/' % (base_url, pagenum)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
album_id = self._match_id(url)
|
||||||
album_id = mobj.group('id')
|
|
||||||
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
|
return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
|
||||||
|
|
||||||
|
|
||||||
@ -481,6 +533,7 @@ class VimeoGroupsIE(VimeoAlbumIE):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vimeo.com/groups/rolexawards',
|
'url': 'http://vimeo.com/groups/rolexawards',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'rolexawards',
|
||||||
'title': 'Rolex Awards for Enterprise',
|
'title': 'Rolex Awards for Enterprise',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 73,
|
'playlist_mincount': 73,
|
||||||
@ -563,6 +616,7 @@ class VimeoLikesIE(InfoExtractor):
|
|||||||
'url': 'https://vimeo.com/user755559/likes/',
|
'url': 'https://vimeo.com/user755559/likes/',
|
||||||
'playlist_mincount': 293,
|
'playlist_mincount': 293,
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
'id': 'user755559_likes',
|
||||||
"description": "See all the videos urza likes",
|
"description": "See all the videos urza likes",
|
||||||
"title": 'Videos urza likes',
|
"title": 'Videos urza likes',
|
||||||
},
|
},
|
||||||
|
@ -217,6 +217,9 @@ class VKUserVideosIE(InfoExtractor):
|
|||||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://vk.com/videos205387401',
|
'url': 'http://vk.com/videos205387401',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '205387401',
|
||||||
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
|
|||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
story_filename = self._search_regex(
|
embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
|
||||||
r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
|
r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
|
||||||
speaker_id = self._search_regex(
|
webpage, 'embed params').split(',')]
|
||||||
r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
|
|
||||||
story_id = self._search_regex(
|
(
|
||||||
r'\.storyId\((\d+)\)', webpage, 'story ID')
|
_, speaker_id, story_id, story_duration,
|
||||||
speaker_type = self._search_regex(
|
speaker_type, great_life, _thumbnail, _has_subtitles,
|
||||||
r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
|
story_filename, _story_order) = embed_params
|
||||||
great_life = self._search_regex(
|
|
||||||
r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
|
|
||||||
is_great_life_series = great_life == 'true'
|
is_great_life_series = great_life == 'true'
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(story_duration)
|
||||||
r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
|
|
||||||
|
|
||||||
# URL building, see: http://www.webofstories.com/scripts/player.js
|
# URL building, see: http://www.webofstories.com/scripts/player.js
|
||||||
ms_prefix = ''
|
ms_prefix = ''
|
||||||
|
89
youtube_dl/extractor/wsj.py
Normal file
89
youtube_dl/extractor/wsj.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WSJIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P<id>[a-zA-Z0-9-]+)'
|
||||||
|
IE_DESC = 'Wall Street Journal'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||||
|
'md5': '9747d7a6ebc2f4df64b981e1dde9efa9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20150202',
|
||||||
|
'uploader_id': 'jdesai',
|
||||||
|
'creator': 'jdesai',
|
||||||
|
'categories': list, # a long list
|
||||||
|
'duration': 90,
|
||||||
|
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
bitrates = [128, 174, 264, 320, 464, 664, 1264]
|
||||||
|
api_url = (
|
||||||
|
'http://video-api.wsj.com/api-video/find_all_videos.asp?'
|
||||||
|
'type=guid&count=1&query=%s&'
|
||||||
|
'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,'
|
||||||
|
'author,description,name,linkURL,videoStillURL,duration,videoURL,'
|
||||||
|
'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,'
|
||||||
|
'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,'
|
||||||
|
'allthingsd-subsection,sm-section,sm-subsection,provider,'
|
||||||
|
'formattedCreationDate,keywords,keywordsOmniture,column,editor,'
|
||||||
|
'emailURL,emailPartnerID,showName,omnitureProgramName,'
|
||||||
|
'omnitureVideoFormat,linkRelativeURL,touchCastID,'
|
||||||
|
'omniturePublishDate,%s') % (
|
||||||
|
video_id, ','.join('video%dkMP4Url' % br for br in bitrates))
|
||||||
|
info = self._download_json(api_url, video_id)['items'][0]
|
||||||
|
|
||||||
|
# Thumbnails are conveniently in the correct format already
|
||||||
|
thumbnails = info.get('thumbnailList')
|
||||||
|
creator = info.get('author')
|
||||||
|
uploader_id = info.get('editor')
|
||||||
|
categories = info.get('keywords')
|
||||||
|
duration = int_or_none(info.get('duration'))
|
||||||
|
upload_date = unified_strdate(
|
||||||
|
info.get('formattedCreationDate'), day_first=False)
|
||||||
|
title = info.get('name', info.get('titletag'))
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'f4m',
|
||||||
|
'format_note': 'f4m (meta URL)',
|
||||||
|
'url': info['videoURL'],
|
||||||
|
}]
|
||||||
|
if info.get('hls'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
info['hls'], video_id, ext='mp4',
|
||||||
|
preference=0, entry_protocol='m3u8_native'))
|
||||||
|
for br in bitrates:
|
||||||
|
field = 'video%dkMP4Url' % br
|
||||||
|
if info.get(field):
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'mp4-%d' % br,
|
||||||
|
'container': 'mp4',
|
||||||
|
'tbr': br,
|
||||||
|
'url': info[field],
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'creator': creator,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'duration': duration,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'categories': categories,
|
||||||
|
}
|
@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor):
|
|||||||
'id': 'kVTUy_G222_',
|
'id': 'kVTUy_G222_',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'strange erotica',
|
'title': 'strange erotica',
|
||||||
'description': 'http://www.xtube.com an ET kind of thing',
|
'description': 'contains:an ET kind of thing',
|
||||||
'uploader': 'greenshowers',
|
'uploader': 'greenshowers',
|
||||||
'duration': 450,
|
'duration': 450,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -24,7 +24,6 @@ class YahooIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||||
'md5': '4962b075c08be8690a922ee026d05e69',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
|
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
81
youtube_dl/extractor/yam.py
Normal file
81
youtube_dl/extractor/yam.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
month_by_abbreviation,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class YamIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# An audio hosted on Yam
|
||||||
|
'url': 'http://mymedia.yam.com/m/2283921',
|
||||||
|
'md5': 'c011b8e262a52d5473d9c2e3c9963b9c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2283921',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '發現 - 趙薇 京華煙雲主題曲',
|
||||||
|
'uploader_id': 'princekt',
|
||||||
|
'upload_date': '20080807',
|
||||||
|
'duration': 313.0,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# An external video hosted on YouTube
|
||||||
|
'url': 'http://mymedia.yam.com/m/3598173',
|
||||||
|
'md5': '0238ceec479c654e8c2f1223755bf3e9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pJ2Deys283c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20150202',
|
||||||
|
'uploader': '新莊社大瑜伽社',
|
||||||
|
'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
|
||||||
|
'uploader_id': '2323agoy',
|
||||||
|
'title': '外婆的澎湖灣KTV-潘安邦',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# Is it hosted externally on YouTube?
|
||||||
|
youtube_url = self._html_search_regex(
|
||||||
|
r'<embed src="(http://www.youtube.com/[^"]+)"',
|
||||||
|
page, 'YouTube url', default=None)
|
||||||
|
if youtube_url:
|
||||||
|
return self.url_result(youtube_url, 'Youtube')
|
||||||
|
|
||||||
|
api_page = self._download_webpage(
|
||||||
|
'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
|
||||||
|
note='Downloading API page')
|
||||||
|
api_result_obj = compat_urlparse.parse_qs(api_page)
|
||||||
|
|
||||||
|
uploader_id = self._html_search_regex(
|
||||||
|
r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"',
|
||||||
|
page, 'uploader id', fatal=False)
|
||||||
|
mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2}) ' +
|
||||||
|
r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
|
||||||
|
if mobj:
|
||||||
|
upload_date = '%s%02d%02d' % (
|
||||||
|
mobj.group('year'),
|
||||||
|
month_by_abbreviation(mobj.group('mon')),
|
||||||
|
int(mobj.group('day')))
|
||||||
|
else:
|
||||||
|
upload_date = None
|
||||||
|
duration = float_or_none(api_result_obj['totaltime'][0], scale=1000)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': api_result_obj['mp3file'][0],
|
||||||
|
'title': self._html_search_meta('description', page),
|
||||||
|
'duration': duration,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
@ -25,6 +25,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -540,26 +541,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if cache_spec is not None:
|
if cache_spec is not None:
|
||||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||||
|
|
||||||
|
download_note = (
|
||||||
|
'Downloading player %s' % player_url
|
||||||
|
if self._downloader.params.get('verbose') else
|
||||||
|
'Downloading %s player %s' % (player_type, player_id)
|
||||||
|
)
|
||||||
if player_type == 'js':
|
if player_type == 'js':
|
||||||
code = self._download_webpage(
|
code = self._download_webpage(
|
||||||
player_url, video_id,
|
player_url, video_id,
|
||||||
note='Downloading %s player %s' % (player_type, player_id),
|
note=download_note,
|
||||||
errnote='Download of %s failed' % player_url)
|
errnote='Download of %s failed' % player_url)
|
||||||
res = self._parse_sig_js(code)
|
res = self._parse_sig_js(code)
|
||||||
elif player_type == 'swf':
|
elif player_type == 'swf':
|
||||||
urlh = self._request_webpage(
|
urlh = self._request_webpage(
|
||||||
player_url, video_id,
|
player_url, video_id,
|
||||||
note='Downloading %s player %s' % (player_type, player_id),
|
note=download_note,
|
||||||
errnote='Download of %s failed' % player_url)
|
errnote='Download of %s failed' % player_url)
|
||||||
code = urlh.read()
|
code = urlh.read()
|
||||||
res = self._parse_sig_swf(code)
|
res = self._parse_sig_swf(code)
|
||||||
else:
|
else:
|
||||||
assert False, 'Invalid player type %r' % player_type
|
assert False, 'Invalid player type %r' % player_type
|
||||||
|
|
||||||
if cache_spec is None:
|
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
cache_res = res(test_string)
|
||||||
cache_res = res(test_string)
|
cache_spec = [ord(c) for c in cache_res]
|
||||||
cache_spec = [ord(c) for c in cache_res]
|
|
||||||
|
|
||||||
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||||
return res
|
return res
|
||||||
@ -780,8 +785,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
fo for fo in formats
|
fo for fo in formats
|
||||||
if fo['format_id'] == format_id)
|
if fo['format_id'] == format_id)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
f.update(self._formats.get(format_id, {}).items())
|
full_info = self._formats.get(format_id, {}).copy()
|
||||||
formats.append(f)
|
full_info.update(f)
|
||||||
|
formats.append(full_info)
|
||||||
else:
|
else:
|
||||||
existing_format.update(f)
|
existing_format.update(f)
|
||||||
return formats
|
return formats
|
||||||
@ -1123,6 +1129,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'dislike_count': dislike_count,
|
'dislike_count': dislike_count,
|
||||||
|
'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user