Compare commits
296 Commits
2015.07.28
...
2015.08.28
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a4962b80d6 | ||
|
|
5307c33232 | ||
|
|
1b660cce12 | ||
|
|
8df8c278b6 | ||
|
|
d7e8264517 | ||
|
|
f11c316347 | ||
|
|
f62e02c24f | ||
|
|
70113c38c9 | ||
|
|
3d8132f5e2 | ||
|
|
39affb5aa4 | ||
|
|
a882c5f474 | ||
|
|
61a7ff1622 | ||
|
|
42e7373bd3 | ||
|
|
e269d3ae7d | ||
|
|
e7ddaef5bd | ||
|
|
62984e4584 | ||
|
|
3c53455d15 | ||
|
|
bbb43a39fd | ||
|
|
43e7d3c945 | ||
|
|
2f72e83bbd | ||
|
|
57179b4ca1 | ||
|
|
4bc8eec4eb | ||
|
|
baf510bf8c | ||
|
|
6d53cdd6ce | ||
|
|
ebbf078c7d | ||
|
|
95e431e9ec | ||
|
|
11addc50ff | ||
|
|
e4df2f98cc | ||
|
|
e7c14660d3 | ||
|
|
90076b6172 | ||
|
|
28b83495d8 | ||
|
|
551c7837ac | ||
|
|
59e6acc757 | ||
|
|
9990c960f2 | ||
|
|
2006a06eff | ||
|
|
2b6bda1ed8 | ||
|
|
468083d2f5 | ||
|
|
483fc223bb | ||
|
|
66ce97024d | ||
|
|
8c97f81943 | ||
|
|
d7c1630570 | ||
|
|
5e1a5ac8de | ||
|
|
9eb4ab6ad9 | ||
|
|
4932a817a0 | ||
|
|
5d003e29b1 | ||
|
|
dc95bd503e | ||
|
|
f738dd7b7c | ||
|
|
f908b74fa3 | ||
|
|
c687ac745b | ||
|
|
912e0b7e46 | ||
|
|
03bc7237ad | ||
|
|
dd565ac1ad | ||
|
|
5cdefc4625 | ||
|
|
ce00af8767 | ||
|
|
51047444aa | ||
|
|
aa6cd05ed8 | ||
|
|
dac14bf311 | ||
|
|
05fe2594e4 | ||
|
|
26e1c3514f | ||
|
|
22c83245c5 | ||
|
|
7900aede14 | ||
|
|
f877c6ae5a | ||
|
|
ca681f7041 | ||
|
|
a01da8bbf8 | ||
|
|
f3a65d9636 | ||
|
|
559f4c550f | ||
|
|
03c635a4b5 | ||
|
|
34a4cd0a34 | ||
|
|
3b9b32f404 | ||
|
|
9c724a9802 | ||
|
|
7a6e8a1b17 | ||
|
|
369c12e038 | ||
|
|
0fa5795b85 | ||
|
|
c00c7c0af0 | ||
|
|
cbaed4bb5e | ||
|
|
f74a7348f6 | ||
|
|
8626b23e4e | ||
|
|
0086874277 | ||
|
|
7fc18d9309 | ||
|
|
974f1a385a | ||
|
|
6900b4f6f5 | ||
|
|
d90e4bdb74 | ||
|
|
276c989772 | ||
|
|
ea99110d24 | ||
|
|
221a59fe6f | ||
|
|
eaa5646483 | ||
|
|
041bc3adc5 | ||
|
|
e64b756943 | ||
|
|
201ea3ee8e | ||
|
|
9303ce3e69 | ||
|
|
06c085ab6e | ||
|
|
c576ef1e7c | ||
|
|
11bed5827d | ||
|
|
fab83e2456 | ||
|
|
1d25e9d173 | ||
|
|
9c21f22923 | ||
|
|
3aa697f993 | ||
|
|
8b9848ac56 | ||
|
|
8b8c1093b6 | ||
|
|
d0d6c097fc | ||
|
|
6be5e46994 | ||
|
|
45694b504a | ||
|
|
41dbc50f9c | ||
|
|
4d2ad866f3 | ||
|
|
3cafca04aa | ||
|
|
594f51b859 | ||
|
|
fb56131dd9 | ||
|
|
a34e19629c | ||
|
|
3c12a027d4 | ||
|
|
cb28e03386 | ||
|
|
7393746da2 | ||
|
|
6828c809e4 | ||
|
|
28479149cc | ||
|
|
237c03c8ea | ||
|
|
e73c85cb23 | ||
|
|
b6b2711298 | ||
|
|
3b7130439a | ||
|
|
2c919adb74 | ||
|
|
60231c65b9 | ||
|
|
f196047832 | ||
|
|
240ca32e57 | ||
|
|
fa37c26c4d | ||
|
|
d7dbfc7cc1 | ||
|
|
d9ab5262b1 | ||
|
|
fb124e3741 | ||
|
|
479bf783d2 | ||
|
|
f0f3a6c99d | ||
|
|
f57b7835e2 | ||
|
|
1df3186e0e | ||
|
|
0b7c27828d | ||
|
|
0a19d4ccd6 | ||
|
|
9f3da13860 | ||
|
|
bf812ef714 | ||
|
|
b1ac38fadc | ||
|
|
fb0d12c6cb | ||
|
|
34952f09e1 | ||
|
|
34a7de2970 | ||
|
|
0ff827419e | ||
|
|
b29440aee6 | ||
|
|
11b5605815 | ||
|
|
844587669e | ||
|
|
f6c3664d71 | ||
|
|
c5864a8ce6 | ||
|
|
27c7114af6 | ||
|
|
0791ac1b44 | ||
|
|
1de5cd3ba5 | ||
|
|
729accb482 | ||
|
|
942acef594 | ||
|
|
fb2f339fec | ||
|
|
98044462b1 | ||
|
|
0dcb318f62 | ||
|
|
f32143469f | ||
|
|
3a30508b94 | ||
|
|
e0b9d78fab | ||
|
|
8d6765cf48 | ||
|
|
12bb392a0f | ||
|
|
08df685fe7 | ||
|
|
c8d1be772d | ||
|
|
887e9bc7b5 | ||
|
|
9f2e7c2f34 | ||
|
|
d7bb8884af | ||
|
|
464e792496 | ||
|
|
18c3281f9e | ||
|
|
8e2b1be127 | ||
|
|
b61b7787cb | ||
|
|
b465083f45 | ||
|
|
154655a85a | ||
|
|
59e89e62d7 | ||
|
|
d5d7bdaeb5 | ||
|
|
b2f82948ee | ||
|
|
428e4e4a85 | ||
|
|
1e83741c9a | ||
|
|
621d6a9516 | ||
|
|
3550821fb4 | ||
|
|
5b0c40da24 | ||
|
|
e0ac521438 | ||
|
|
c29458f3ec | ||
|
|
bf94d763ba | ||
|
|
8a37aa1517 | ||
|
|
f3d24df6f2 | ||
|
|
fd5d8270dc | ||
|
|
be612d9e0c | ||
|
|
4a7434d0b0 | ||
|
|
ad2141be2d | ||
|
|
f94639fadf | ||
|
|
89faae660f | ||
|
|
0f422256d6 | ||
|
|
acc1adbe7a | ||
|
|
8002ac9e0a | ||
|
|
6d30cf04db | ||
|
|
430b092a5f | ||
|
|
3eb5fdb581 | ||
|
|
9663bd3abb | ||
|
|
5a4d9ddb21 | ||
|
|
3be3c622dc | ||
|
|
cd6b555e19 | ||
|
|
671302b5c0 | ||
|
|
4f34cdb0a8 | ||
|
|
bd690a9f93 | ||
|
|
51f267d9d4 | ||
|
|
47f53ad958 | ||
|
|
c73cdd800f | ||
|
|
f535ec8278 | ||
|
|
238755752f | ||
|
|
c71a3195af | ||
|
|
54a9328b20 | ||
|
|
3e48522477 | ||
|
|
251a44b776 | ||
|
|
be7a8379b4 | ||
|
|
defce60385 | ||
|
|
354b4b8604 | ||
|
|
5b7dab2dd6 | ||
|
|
8a5601e42f | ||
|
|
232541df44 | ||
|
|
a346b1ff57 | ||
|
|
d96d604e53 | ||
|
|
e704f87f86 | ||
|
|
8f5639afcb | ||
|
|
03950c90f7 | ||
|
|
47a8b7c14a | ||
|
|
2a04d2c799 | ||
|
|
8de922724b | ||
|
|
67b8a28a2f | ||
|
|
51a575159a | ||
|
|
524229a297 | ||
|
|
754e70cf3e | ||
|
|
84bc4dcb0f | ||
|
|
10eaa8ef1d | ||
|
|
c3124c3085 | ||
|
|
8d5b8b477e | ||
|
|
d7d2a9a3db | ||
|
|
25a4c5a9ed | ||
|
|
5c45bbe57b | ||
|
|
d41d04c0f5 | ||
|
|
e422d7f4f7 | ||
|
|
cdc682d5a4 | ||
|
|
9cc93c64aa | ||
|
|
fa7a1cc5ef | ||
|
|
17712eeb19 | ||
|
|
41c3a5a7be | ||
|
|
8765222d22 | ||
|
|
645f814544 | ||
|
|
308cfe0ab3 | ||
|
|
e5e8d20a3a | ||
|
|
a107193e4b | ||
|
|
55eae65b39 | ||
|
|
3f125c8c70 | ||
|
|
75e8b2ac87 | ||
|
|
ee114368ad | ||
|
|
525a87f58e | ||
|
|
44cae2fb2e | ||
|
|
30a453884e | ||
|
|
3b58d94f71 | ||
|
|
8abb86fec4 | ||
|
|
16a089780e | ||
|
|
09b6468d30 | ||
|
|
80fb6d4aa4 | ||
|
|
1f04873517 | ||
|
|
799207e838 | ||
|
|
34866b4836 | ||
|
|
be530dfea2 | ||
|
|
d12a1a47d5 | ||
|
|
8d42e3501e | ||
|
|
2711e41bcd | ||
|
|
5e1eddb939 | ||
|
|
23e7f53bd3 | ||
|
|
000b6b5ae5 | ||
|
|
864f24bd2c | ||
|
|
5d8df28d27 | ||
|
|
f9a5affad9 | ||
|
|
ab81ef8fa7 | ||
|
|
95d8f7ea12 | ||
|
|
5316bf7487 | ||
|
|
a6f774e901 | ||
|
|
f171bc8b59 | ||
|
|
cb23bcba29 | ||
|
|
2c7ed24796 | ||
|
|
dfaba1ab95 | ||
|
|
a62fd1af27 | ||
|
|
d8f0a9ecea | ||
|
|
cf7e015f25 | ||
|
|
1a117a7728 | ||
|
|
3c07a729a6 | ||
|
|
84c0ed50a5 | ||
|
|
02c126a7c2 | ||
|
|
114ed20e64 | ||
|
|
9d681c2bb3 | ||
|
|
3af1fac7b0 | ||
|
|
dc48695ab9 | ||
|
|
0a31a35098 | ||
|
|
bb8e553662 | ||
|
|
f5f4a27a96 | ||
|
|
cf2ac6df68 | ||
|
|
0130afb76e | ||
|
|
5acfa126c8 | ||
|
|
67134eaba1 |
@@ -5,9 +5,7 @@ python:
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -yqq rtmpdump
|
||||
sudo: false
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
||||
5
AUTHORS
5
AUTHORS
@@ -135,3 +135,8 @@ Bernhard Minks
|
||||
sceext
|
||||
Zach Bruggeman
|
||||
Tjark Saul
|
||||
slangangular
|
||||
Behrouz Abbasi
|
||||
ngld
|
||||
nyuszika7h
|
||||
Shaun Walbridge
|
||||
|
||||
@@ -125,7 +125,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
|
||||
24
README.md
24
README.md
@@ -108,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||
|
||||
## Filesystem Options:
|
||||
@@ -236,7 +236,14 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, system wide configuration file is located at `/etc/youtube-dl.conf` and user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configration file youtube-dl will always extract the audio, not copy the mtime and use proxy:
|
||||
```
|
||||
--extract-audio
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
```
|
||||
|
||||
You can use `--ignore-config` if you want to disable configuration file for a particular youtube-dl run.
|
||||
|
||||
### Authentication with `.netrc` file ###
|
||||
|
||||
@@ -272,6 +279,7 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||
- `playlist`: The name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The index of the video in the playlist, a five-digit number.
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
@@ -289,7 +297,7 @@ youtube-dl_test_video_.mp4 # A simple file name
|
||||
By default youtube-dl tries to download the best quality, but sometimes you may want to download other format.
|
||||
The simplest case is requesting a specific format, for example `-f 22`. You can get the list of available formats using `--list-formats`, you can also use a file extension (currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names `best`, `bestvideo`, `bestaudio` and `worst`.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`.
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
@@ -439,6 +447,12 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
||||
youtube-dl -- -wNyEUrxzFU
|
||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||
|
||||
### How do I pass cookies to youtube-dl?
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that cookies file must be in Mozilla/Netscape format and the first line of cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in cookies file and convert newlines if necessary to correspond your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when particular extractor does not implement it explicitly.
|
||||
|
||||
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||
|
||||
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||
@@ -538,7 +552,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
@@ -566,7 +580,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L117-L265). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **Cinemassacre**
|
||||
- **clipfish**
|
||||
- **Clipfish**
|
||||
- **cliphunter**
|
||||
- **Clipsyndicate**
|
||||
- **Cloudy**
|
||||
@@ -116,6 +116,7 @@
|
||||
- **DailymotionCloud**
|
||||
- **daum.net**
|
||||
- **DBTV**
|
||||
- **DCN**
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
@@ -149,6 +150,7 @@
|
||||
- **EroProfile**
|
||||
- **Escapist**
|
||||
- **ESPN** (Currently broken)
|
||||
- **EsriVideo**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
@@ -164,7 +166,7 @@
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Foxgay**
|
||||
- **FoxNews**
|
||||
- **FoxNews**: Fox News and Fox Business Video
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
@@ -219,6 +221,8 @@
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **Ina**
|
||||
- **Indavideo**
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
@@ -297,6 +301,7 @@
|
||||
- **Moviezine**
|
||||
- **movshare**: MovShare
|
||||
- **MPORA**
|
||||
- **MSNBC**
|
||||
- **MTV**
|
||||
- **mtviggy.com**
|
||||
- **mtvservices:embedded**
|
||||
@@ -304,6 +309,7 @@
|
||||
- **MusicPlayOn**
|
||||
- **MusicVault**
|
||||
- **muzu.tv**
|
||||
- **Mwave**
|
||||
- **MySpace**
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
@@ -351,7 +357,6 @@
|
||||
- **NowTV**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
@@ -377,6 +382,7 @@
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **PBS**
|
||||
- **Periscope**: Periscope
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
@@ -385,8 +391,11 @@
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
- **Playwire**
|
||||
- **pluralsight**
|
||||
- **pluralsight:course**
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
@@ -406,6 +415,7 @@
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **Quickscope**: Quick Scope
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
@@ -430,6 +440,7 @@
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RTVNH**
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
@@ -453,7 +464,8 @@
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||
- **Shared**
|
||||
- **Shahid**
|
||||
- **Shared**: shared.sx and vivo.sx
|
||||
- **ShareSix**
|
||||
- **Sina**
|
||||
- **Slideshare**
|
||||
@@ -518,6 +530,7 @@
|
||||
- **ted**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **TenPlay**
|
||||
@@ -525,6 +538,7 @@
|
||||
- **TF1**
|
||||
- **TheOnion**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheSixtyOne**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
@@ -590,7 +604,6 @@
|
||||
- **Viddler**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
- **VideoBam**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videolectures.net**
|
||||
@@ -621,6 +634,7 @@
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl and ntr.nl
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
|
||||
@@ -133,8 +133,8 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(
|
||||
isinstance(got, list),
|
||||
'Expected field %s to be a list, but it is of type %s' % (
|
||||
isinstance(got, (list, dict)),
|
||||
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||
info_field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
@@ -160,7 +160,7 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
# Are checkable fields missing from the test case definition?
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in got_dict.items()
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit'))
|
||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||
if missing_keys:
|
||||
def _repr(v):
|
||||
|
||||
@@ -15,7 +15,7 @@ from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_str
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.postprocessor.common import PostProcessor
|
||||
from youtube_dl.utils import match_filter_func
|
||||
from youtube_dl.utils import ExtractorError, match_filter_func
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
@@ -105,6 +105,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||
{'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
|
||||
@@ -136,6 +137,11 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '35')
|
||||
|
||||
ydl = YDL({'format': 'example-with-dashes'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'example-with-dashes')
|
||||
|
||||
def test_format_selection_audio(self):
|
||||
formats = [
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||
@@ -229,21 +235,70 @@ class TestFormatSelection(unittest.TestCase):
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
|
||||
for f1id, f2id in zip(order, order[1:]):
|
||||
f1 = YoutubeIE._formats[f1id].copy()
|
||||
f1['format_id'] = f1id
|
||||
f1['url'] = 'url:' + f1id
|
||||
f2 = YoutubeIE._formats[f2id].copy()
|
||||
f2['format_id'] = f2id
|
||||
f2['url'] = 'url:' + f2id
|
||||
def format_info(f_id):
|
||||
info = YoutubeIE._formats[f_id].copy()
|
||||
info['format_id'] = f_id
|
||||
info['url'] = 'url:' + f_id
|
||||
return info
|
||||
formats_order = [format_info(f_id) for f_id in order]
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo+bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '137+141')
|
||||
self.assertEqual(downloaded['ext'], 'mp4')
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], '38')
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': 'bestvideo/best,bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['137', '141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['137+141', '248+141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['136+141', '247+141'])
|
||||
|
||||
info_dict = _make_result(list(formats_order), extractor='youtube')
|
||||
ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['248+141'])
|
||||
|
||||
for f1, f2 in zip(formats_order, formats_order[1:]):
|
||||
info_dict = _make_result([f1, f2], extractor='youtube')
|
||||
ydl = YDL({'format': 'best/bestvideo'})
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1id)
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
info_dict = _make_result([f2, f1], extractor='youtube')
|
||||
ydl = YDL({'format': 'best/bestvideo'})
|
||||
@@ -251,7 +306,18 @@ class TestFormatSelection(unittest.TestCase):
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1id)
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
def test_invalid_format_specs(self):
|
||||
def assert_syntax_error(format_spec):
|
||||
ydl = YDL({'format': format_spec})
|
||||
info_dict = _make_result([{'format_id': 'foo', 'url': TEST_URL}])
|
||||
self.assertRaises(SyntaxError, ydl.process_ie_result, info_dict)
|
||||
|
||||
assert_syntax_error('bestvideo,,best')
|
||||
assert_syntax_error('+bestaudio')
|
||||
assert_syntax_error('bestvideo+')
|
||||
assert_syntax_error('/')
|
||||
|
||||
def test_format_filtering(self):
|
||||
formats = [
|
||||
@@ -308,6 +374,18 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'G')
|
||||
|
||||
ydl = YDL({'format': 'all[width>=400][width<=600]'})
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
|
||||
self.assertEqual(downloaded_ids, ['B', 'C', 'D'])
|
||||
|
||||
ydl = YDL({'format': 'best[height<40]'})
|
||||
try:
|
||||
ydl.process_ie_result(info_dict)
|
||||
except ExtractorError:
|
||||
pass
|
||||
self.assertEqual(ydl.downloaded_info_dicts, [])
|
||||
|
||||
|
||||
class TestYoutubeDL(unittest.TestCase):
|
||||
def test_subtitles(self):
|
||||
|
||||
@@ -136,7 +136,9 @@ def generator(test_case):
|
||||
# We're not using .download here sine that is just a shim
|
||||
# for outside error handling, and returns the exit code
|
||||
# instead of the result dict.
|
||||
res_dict = ydl.extract_info(test_case['url'])
|
||||
res_dict = ydl.extract_info(
|
||||
test_case['url'],
|
||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
|
||||
@@ -25,6 +25,7 @@ from youtube_dl.extractor import (
|
||||
RaiIE,
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
)
|
||||
@@ -307,6 +308,18 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||
|
||||
|
||||
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
|
||||
IE = ThePlatformFeedIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
|
||||
|
||||
|
||||
class TestRtveSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||
IE = RTVEALaCartaIE
|
||||
|
||||
@@ -235,12 +235,21 @@ class TestUtil(unittest.TestCase):
|
||||
<node x="a"/>
|
||||
<node x="a" y="c" />
|
||||
<node x="b" y="d" />
|
||||
<node x="" />
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None)
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3])
|
||||
self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4])
|
||||
|
||||
def test_xpath_with_ns(self):
|
||||
testxml = '''<root xmlns:media="http://example.com/">
|
||||
|
||||
@@ -21,24 +21,24 @@ import subprocess
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import tokenize
|
||||
import traceback
|
||||
|
||||
if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_get_terminal_size,
|
||||
compat_http_client,
|
||||
compat_kwargs,
|
||||
compat_str,
|
||||
compat_tokenize_tokenize,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .utils import (
|
||||
escape_url,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
@@ -49,7 +49,6 @@ from .utils import (
|
||||
ExtractorError,
|
||||
format_bytes,
|
||||
formatSeconds,
|
||||
HEADRequest,
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
@@ -853,8 +852,8 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
def _apply_format_filter(self, format_spec, available_formats):
|
||||
" Returns a tuple of the remaining format_spec and filtered formats "
|
||||
def _build_format_filter(self, filter_spec):
|
||||
" Returns a function to filter the formats according to the filter_spec "
|
||||
|
||||
OPERATORS = {
|
||||
'<': operator.lt,
|
||||
@@ -864,13 +863,13 @@ class YoutubeDL(object):
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*\[
|
||||
operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||
\]$
|
||||
$
|
||||
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||
m = operator_rex.search(format_spec)
|
||||
m = operator_rex.search(filter_spec)
|
||||
if m:
|
||||
try:
|
||||
comparison_value = int(m.group('value'))
|
||||
@@ -881,7 +880,7 @@ class YoutubeDL(object):
|
||||
if comparison_value is None:
|
||||
raise ValueError(
|
||||
'Invalid value %r in format specification %r' % (
|
||||
m.group('value'), format_spec))
|
||||
m.group('value'), filter_spec))
|
||||
op = OPERATORS[m.group('op')]
|
||||
|
||||
if not m:
|
||||
@@ -889,85 +888,283 @@ class YoutubeDL(object):
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)\s*\[
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9_-]+)
|
||||
\s*\]$
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.search(format_spec)
|
||||
m = str_operator_rex.search(filter_spec)
|
||||
if m:
|
||||
comparison_value = m.group('value')
|
||||
op = STR_OPERATORS[m.group('op')]
|
||||
|
||||
if not m:
|
||||
raise ValueError('Invalid format specification %r' % format_spec)
|
||||
raise ValueError('Invalid filter specification %r' % filter_spec)
|
||||
|
||||
def _filter(f):
|
||||
actual_value = f.get(m.group('key'))
|
||||
if actual_value is None:
|
||||
return m.group('none_inclusive')
|
||||
return op(actual_value, comparison_value)
|
||||
new_formats = [f for f in available_formats if _filter(f)]
|
||||
return _filter
|
||||
|
||||
new_format_spec = format_spec[:-len(m.group(0))]
|
||||
if not new_format_spec:
|
||||
new_format_spec = 'best'
|
||||
def build_format_selector(self, format_spec):
|
||||
def syntax_error(note, start):
|
||||
message = (
|
||||
'Invalid format specification: '
|
||||
'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
|
||||
return SyntaxError(message)
|
||||
|
||||
return (new_format_spec, new_formats)
|
||||
PICKFIRST = 'PICKFIRST'
|
||||
MERGE = 'MERGE'
|
||||
SINGLE = 'SINGLE'
|
||||
GROUP = 'GROUP'
|
||||
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
|
||||
|
||||
def select_format(self, format_spec, available_formats):
|
||||
while format_spec.endswith(']'):
|
||||
format_spec, available_formats = self._apply_format_filter(
|
||||
format_spec, available_formats)
|
||||
if not available_formats:
|
||||
return None
|
||||
def _parse_filter(tokens):
|
||||
filter_parts = []
|
||||
for type, string, start, _, _ in tokens:
|
||||
if type == tokenize.OP and string == ']':
|
||||
return ''.join(filter_parts)
|
||||
else:
|
||||
filter_parts.append(string)
|
||||
|
||||
if format_spec in ['best', 'worst', None]:
|
||||
format_idx = 0 if format_spec == 'worst' else -1
|
||||
audiovideo_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
return audiovideo_formats[format_idx]
|
||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||
elif (all(f.get('acodec') != 'none' for f in available_formats) or
|
||||
all(f.get('vcodec') != 'none' for f in available_formats)):
|
||||
return available_formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
return audio_formats[-1]
|
||||
elif format_spec == 'worstaudio':
|
||||
audio_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
return audio_formats[0]
|
||||
elif format_spec == 'bestvideo':
|
||||
video_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
return video_formats[-1]
|
||||
elif format_spec == 'worstvideo':
|
||||
video_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
return video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
filter_f = lambda f: f['format_id'] == format_spec
|
||||
matches = list(filter(filter_f, available_formats))
|
||||
if matches:
|
||||
return matches[-1]
|
||||
return None
|
||||
def _remove_unused_ops(tokens):
|
||||
# Remove operators that we don't use and join them with the sourrounding strings
|
||||
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
||||
last_string, last_start, last_end, last_line = None, None, None, None
|
||||
for type, string, start, end, line in tokens:
|
||||
if type == tokenize.OP and string == '[':
|
||||
if last_string:
|
||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||
last_string = None
|
||||
yield type, string, start, end, line
|
||||
# everything inside brackets will be handled by _parse_filter
|
||||
for type, string, start, end, line in tokens:
|
||||
yield type, string, start, end, line
|
||||
if type == tokenize.OP and string == ']':
|
||||
break
|
||||
elif type == tokenize.OP and string in ALLOWED_OPS:
|
||||
if last_string:
|
||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||
last_string = None
|
||||
yield type, string, start, end, line
|
||||
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
|
||||
if not last_string:
|
||||
last_string = string
|
||||
last_start = start
|
||||
last_end = end
|
||||
else:
|
||||
last_string += string
|
||||
if last_string:
|
||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||
|
||||
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
|
||||
selectors = []
|
||||
current_selector = None
|
||||
for type, string, start, _, _ in tokens:
|
||||
# ENCODING is only defined in python 3.x
|
||||
if type == getattr(tokenize, 'ENCODING', None):
|
||||
continue
|
||||
elif type in [tokenize.NAME, tokenize.NUMBER]:
|
||||
current_selector = FormatSelector(SINGLE, string, [])
|
||||
elif type == tokenize.OP:
|
||||
if string == ')':
|
||||
if not inside_group:
|
||||
# ')' will be handled by the parentheses group
|
||||
tokens.restore_last_token()
|
||||
break
|
||||
elif inside_merge and string in ['/', ',']:
|
||||
tokens.restore_last_token()
|
||||
break
|
||||
elif inside_choice and string == ',':
|
||||
tokens.restore_last_token()
|
||||
break
|
||||
elif string == ',':
|
||||
if not current_selector:
|
||||
raise syntax_error('"," must follow a format selector', start)
|
||||
selectors.append(current_selector)
|
||||
current_selector = None
|
||||
elif string == '/':
|
||||
if not current_selector:
|
||||
raise syntax_error('"/" must follow a format selector', start)
|
||||
first_choice = current_selector
|
||||
second_choice = _parse_format_selection(tokens, inside_choice=True)
|
||||
current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
|
||||
elif string == '[':
|
||||
if not current_selector:
|
||||
current_selector = FormatSelector(SINGLE, 'best', [])
|
||||
format_filter = _parse_filter(tokens)
|
||||
current_selector.filters.append(format_filter)
|
||||
elif string == '(':
|
||||
if current_selector:
|
||||
raise syntax_error('Unexpected "("', start)
|
||||
group = _parse_format_selection(tokens, inside_group=True)
|
||||
current_selector = FormatSelector(GROUP, group, [])
|
||||
elif string == '+':
|
||||
video_selector = current_selector
|
||||
audio_selector = _parse_format_selection(tokens, inside_merge=True)
|
||||
if not video_selector or not audio_selector:
|
||||
raise syntax_error('"+" must be between two format selectors', start)
|
||||
current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
|
||||
else:
|
||||
raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
|
||||
elif type == tokenize.ENDMARKER:
|
||||
break
|
||||
if current_selector:
|
||||
selectors.append(current_selector)
|
||||
return selectors
|
||||
|
||||
def _build_selector_function(selector):
|
||||
if isinstance(selector, list):
|
||||
fs = [_build_selector_function(s) for s in selector]
|
||||
|
||||
def selector_function(formats):
|
||||
for f in fs:
|
||||
for format in f(formats):
|
||||
yield format
|
||||
return selector_function
|
||||
elif selector.type == GROUP:
|
||||
selector_function = _build_selector_function(selector.selector)
|
||||
elif selector.type == PICKFIRST:
|
||||
fs = [_build_selector_function(s) for s in selector.selector]
|
||||
|
||||
def selector_function(formats):
|
||||
for f in fs:
|
||||
picked_formats = list(f(formats))
|
||||
if picked_formats:
|
||||
return picked_formats
|
||||
return []
|
||||
elif selector.type == SINGLE:
|
||||
format_spec = selector.selector
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
if not formats:
|
||||
return
|
||||
if format_spec == 'all':
|
||||
for f in formats:
|
||||
yield f
|
||||
elif format_spec in ['best', 'worst', None]:
|
||||
format_idx = 0 if format_spec == 'worst' else -1
|
||||
audiovideo_formats = [
|
||||
f for f in formats
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
yield audiovideo_formats[format_idx]
|
||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||
elif (all(f.get('acodec') != 'none' for f in formats) or
|
||||
all(f.get('vcodec') != 'none' for f in formats)):
|
||||
yield formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
f for f in formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
yield audio_formats[-1]
|
||||
elif format_spec == 'worstaudio':
|
||||
audio_formats = [
|
||||
f for f in formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
yield audio_formats[0]
|
||||
elif format_spec == 'bestvideo':
|
||||
video_formats = [
|
||||
f for f in formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
yield video_formats[-1]
|
||||
elif format_spec == 'worstvideo':
|
||||
video_formats = [
|
||||
f for f in formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
yield video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
filter_f = lambda f: f['format_id'] == format_spec
|
||||
matches = list(filter(filter_f, formats))
|
||||
if matches:
|
||||
yield matches[-1]
|
||||
elif selector.type == MERGE:
|
||||
def _merge(formats_info):
|
||||
format_1, format_2 = [f['format_id'] for f in formats_info]
|
||||
# The first format must contain the video and the
|
||||
# second the audio
|
||||
if formats_info[0].get('vcodec') == 'none':
|
||||
self.report_error('The first format must '
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
output_ext = (
|
||||
formats_info[0]['ext']
|
||||
if self.params.get('merge_output_format') is None
|
||||
else self.params['merge_output_format'])
|
||||
return {
|
||||
'requested_formats': formats_info,
|
||||
'format': '%s+%s' % (formats_info[0].get('format'),
|
||||
formats_info[1].get('format')),
|
||||
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||
formats_info[1].get('format_id')),
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
'resolution': formats_info[0].get('resolution'),
|
||||
'fps': formats_info[0].get('fps'),
|
||||
'vcodec': formats_info[0].get('vcodec'),
|
||||
'vbr': formats_info[0].get('vbr'),
|
||||
'stretched_ratio': formats_info[0].get('stretched_ratio'),
|
||||
'acodec': formats_info[1].get('acodec'),
|
||||
'abr': formats_info[1].get('abr'),
|
||||
'ext': output_ext,
|
||||
}
|
||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
for pair in itertools.product(video_selector(formats), audio_selector(formats)):
|
||||
yield _merge(pair)
|
||||
|
||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||
|
||||
def final_selector(formats):
|
||||
for _filter in filters:
|
||||
formats = list(filter(_filter, formats))
|
||||
return selector_function(formats)
|
||||
return final_selector
|
||||
|
||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||
try:
|
||||
tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
|
||||
except tokenize.TokenError:
|
||||
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
|
||||
|
||||
class TokenIterator(object):
|
||||
def __init__(self, tokens):
|
||||
self.tokens = tokens
|
||||
self.counter = 0
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self.counter >= len(self.tokens):
|
||||
raise StopIteration()
|
||||
value = self.tokens[self.counter]
|
||||
self.counter += 1
|
||||
return value
|
||||
|
||||
next = __next__
|
||||
|
||||
def restore_last_token(self):
|
||||
self.counter -= 1
|
||||
|
||||
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
|
||||
return _build_selector_function(parsed_selector)
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = std_headers.copy()
|
||||
@@ -1111,56 +1308,8 @@ class YoutubeDL(object):
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
formats_to_download = []
|
||||
if req_format == 'all':
|
||||
formats_to_download = formats
|
||||
else:
|
||||
for rfstr in req_format.split(','):
|
||||
# We can accept formats requested in the format: 34/5/best, we pick
|
||||
# the first that is available, starting from left
|
||||
req_formats = rfstr.split('/')
|
||||
for rf in req_formats:
|
||||
if re.match(r'.+?\+.+?', rf) is not None:
|
||||
# Two formats have been requested like '137+139'
|
||||
format_1, format_2 = rf.split('+')
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
# The first format must contain the video and the
|
||||
# second the audio
|
||||
if formats_info[0].get('vcodec') == 'none':
|
||||
self.report_error('The first format must '
|
||||
'contain the video, try using '
|
||||
'"-f %s+%s"' % (format_2, format_1))
|
||||
return
|
||||
output_ext = (
|
||||
formats_info[0]['ext']
|
||||
if self.params.get('merge_output_format') is None
|
||||
else self.params['merge_output_format'])
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': '%s+%s' % (formats_info[0].get('format'),
|
||||
formats_info[1].get('format')),
|
||||
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||
formats_info[1].get('format_id')),
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
'resolution': formats_info[0].get('resolution'),
|
||||
'fps': formats_info[0].get('fps'),
|
||||
'vcodec': formats_info[0].get('vcodec'),
|
||||
'vbr': formats_info[0].get('vbr'),
|
||||
'stretched_ratio': formats_info[0].get('stretched_ratio'),
|
||||
'acodec': formats_info[1].get('acodec'),
|
||||
'abr': formats_info[1].get('abr'),
|
||||
'ext': output_ext,
|
||||
}
|
||||
else:
|
||||
selected_format = None
|
||||
else:
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if selected_format is not None:
|
||||
formats_to_download.append(selected_format)
|
||||
break
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
formats_to_download = list(format_selector(formats))
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
@@ -1708,27 +1857,6 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
|
||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
req_is_string = isinstance(req, compat_basestring)
|
||||
url = req if req_is_string else req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
if req_is_string:
|
||||
req = url_escaped
|
||||
else:
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
req = req_type(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
|
||||
@@ -42,6 +42,11 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import cookielib as compat_cookiejar
|
||||
|
||||
try:
|
||||
import http.cookies as compat_cookies
|
||||
except ImportError: # Python 2
|
||||
import Cookie as compat_cookies
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
@@ -431,11 +436,17 @@ except TypeError: # Python 2.6
|
||||
yield n
|
||||
n += step
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
from tokenize import tokenize as compat_tokenize_tokenize
|
||||
else:
|
||||
from tokenize import generate_tokens as compat_tokenize_tokenize
|
||||
|
||||
__all__ = [
|
||||
'compat_HTTPError',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_expanduser',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
@@ -451,6 +462,7 @@ __all__ = [
|
||||
'compat_socket_create_connection',
|
||||
'compat_str',
|
||||
'compat_subprocess_get_DEVNULL',
|
||||
'compat_tokenize_tokenize',
|
||||
'compat_urllib_error',
|
||||
'compat_urllib_parse',
|
||||
'compat_urllib_parse_unquote',
|
||||
|
||||
@@ -45,11 +45,13 @@ class ExternalFD(FileDownloader):
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||
|
||||
def _source_address(self, command_option):
|
||||
source_address = self.params.get('source_address')
|
||||
if source_address is None:
|
||||
def _option(self, command_option, param):
|
||||
param = self.params.get(param)
|
||||
if param is None:
|
||||
return []
|
||||
return [command_option, source_address]
|
||||
if isinstance(param, bool):
|
||||
return [command_option]
|
||||
return [command_option, param]
|
||||
|
||||
def _configuration_args(self, default=[]):
|
||||
ex_args = self.params.get('external_downloader_args')
|
||||
@@ -77,7 +79,17 @@ class CurlFD(ExternalFD):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', '%s: %s' % (key, val)]
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@@ -88,7 +100,9 @@ class WgetFD(ExternalFD):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--bind-address')
|
||||
cmd += self._option('--bind-address', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._option('--no-check-certificate', 'nocheckcertificate')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@@ -105,7 +119,8 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
@@ -7,8 +7,7 @@ import os
|
||||
import time
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_error,
|
||||
@@ -16,8 +15,6 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
struct_pack,
|
||||
struct_unpack,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -226,16 +223,13 @@ def _add_ns(prop):
|
||||
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
def to_screen(self, *args, **kargs):
|
||||
pass
|
||||
|
||||
|
||||
class F4mFD(FileDownloader):
|
||||
class F4mFD(FragmentFD):
|
||||
"""
|
||||
A downloader for f4m manifests or AdobeHDS.
|
||||
"""
|
||||
|
||||
FD_NAME = 'f4m'
|
||||
|
||||
def _get_unencrypted_media(self, doc):
|
||||
media = doc.findall(_add_ns('media'))
|
||||
if not media:
|
||||
@@ -288,7 +282,7 @@ class F4mFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
@@ -320,67 +314,20 @@ class F4mFD(FileDownloader):
|
||||
# For some akamai manifests we'll need to add a query to the fragment url
|
||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||
|
||||
self.report_destination(filename)
|
||||
http_dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': total_frags,
|
||||
}
|
||||
|
||||
self._prepare_frag_download(ctx)
|
||||
|
||||
dest_stream = ctx['dest_stream']
|
||||
|
||||
write_flv_header(dest_stream)
|
||||
if not live:
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': 0,
|
||||
'frag_index': 0,
|
||||
'frag_count': total_frags,
|
||||
'filename': filename,
|
||||
'tmpfilename': tmpfilename,
|
||||
}
|
||||
start = time.time()
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
frag_total_bytes = s.get('total_bytes', 0)
|
||||
if s['status'] == 'finished':
|
||||
state['downloaded_bytes'] += frag_total_bytes
|
||||
state['frag_index'] += 1
|
||||
|
||||
estimated_size = (
|
||||
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
time_now = time.time()
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
state['elapsed'] = time_now - start
|
||||
|
||||
if s['status'] == 'finished':
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||
frag_total_bytes)
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
progress += frag_progress / float(total_frags)
|
||||
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||
state['speed'] = s.get('speed')
|
||||
self._hook_progress(state)
|
||||
|
||||
http_dl.add_progress_hook(frag_progress_hook)
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
frags_filenames = []
|
||||
while fragments_list:
|
||||
@@ -391,9 +338,9 @@ class F4mFD(FileDownloader):
|
||||
url += '?' + akamai_pv.strip(';')
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
url += info_dict.get('extra_param_to_segment_url')
|
||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||
try:
|
||||
success = http_dl.download(frag_filename, {'url': url})
|
||||
success = ctx['dl'].download(frag_filename, {'url': url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
@@ -425,20 +372,9 @@ class F4mFD(FileDownloader):
|
||||
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
|
||||
self.report_warning(msg)
|
||||
|
||||
dest_stream.close()
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
elapsed = time.time() - start
|
||||
self.try_rename(tmpfilename, filename)
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
|
||||
fsize = os.path.getsize(encodeFilename(filename))
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
||||
|
||||
return True
|
||||
|
||||
111
youtube_dl/downloader/fragment.py
Normal file
111
youtube_dl/downloader/fragment.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from __future__ import division, unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
)
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
def to_screen(self, *args, **kargs):
|
||||
pass
|
||||
|
||||
|
||||
class FragmentFD(FileDownloader):
|
||||
"""
|
||||
A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
|
||||
"""
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
|
||||
self.report_destination(ctx['filename'])
|
||||
dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
{
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'retries': self.params.get('retries', 0),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
|
||||
ctx.update({
|
||||
'dl': dl,
|
||||
'dest_stream': dest_stream,
|
||||
'tmpfilename': tmpfilename,
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
total_frags = ctx['total_frags']
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': 0,
|
||||
'frag_index': 0,
|
||||
'frag_count': total_frags,
|
||||
'filename': ctx['filename'],
|
||||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
start = time.time()
|
||||
ctx['started'] = start
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
frag_total_bytes = s.get('total_bytes', 0)
|
||||
if s['status'] == 'finished':
|
||||
state['downloaded_bytes'] += frag_total_bytes
|
||||
state['frag_index'] += 1
|
||||
|
||||
estimated_size = (
|
||||
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
time_now = time.time()
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
state['elapsed'] = time_now - start
|
||||
|
||||
if s['status'] == 'finished':
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||
frag_total_bytes)
|
||||
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||
progress += frag_progress / float(total_frags)
|
||||
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||
state['speed'] = s.get('speed')
|
||||
self._hook_progress(state)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return start
|
||||
|
||||
def _finish_frag_download(self, ctx):
|
||||
ctx['dest_stream'].close()
|
||||
elapsed = time.time() - ctx['started']
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
fsize = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': ctx['filename'],
|
||||
'status': 'finished',
|
||||
'elapsed': elapsed,
|
||||
})
|
||||
@@ -4,12 +4,11 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .fragment import FragmentFD
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
@@ -33,6 +32,8 @@ class HlsFD(FileDownloader):
|
||||
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
args.append(encodeFilename(tmpfilename, True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
@@ -51,54 +52,50 @@ class HlsFD(FileDownloader):
|
||||
return False
|
||||
|
||||
|
||||
class NativeHlsFD(FileDownloader):
|
||||
class NativeHlsFD(FragmentFD):
|
||||
""" A more limited implementation that does not require ffmpeg """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
self.to_screen(
|
||||
'[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
|
||||
data = self.ydl.urlopen(url).read()
|
||||
s = data.decode('utf-8', 'ignore')
|
||||
segment_urls = []
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
|
||||
s = manifest.decode('utf-8', 'ignore')
|
||||
fragment_urls = []
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
segment_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(url, line))
|
||||
segment_urls.append(segment_url)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||
byte_counter = 0
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
for i, segurl in enumerate(segment_urls):
|
||||
self.to_screen(
|
||||
'[hlsnative] %s: Downloading segment %d / %d' %
|
||||
(info_dict['id'], i + 1, len(segment_urls)))
|
||||
seg_req = compat_urllib_request.Request(segurl)
|
||||
if remaining_bytes is not None:
|
||||
seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||
|
||||
segment = self.ydl.urlopen(seg_req).read()
|
||||
if remaining_bytes is not None:
|
||||
segment = segment[:remaining_bytes]
|
||||
remaining_bytes -= len(segment)
|
||||
outf.write(segment)
|
||||
byte_counter += len(segment)
|
||||
if remaining_bytes is not None and remaining_bytes <= 0:
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
fragment_urls.append(segment_url)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
break
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
self.try_rename(tmpfilename, filename)
|
||||
'total_frags': len(fragment_urls),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
frags_filenames = []
|
||||
for i, frag_url in enumerate(fragment_urls):
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
ctx['dest_stream'].write(down.read())
|
||||
frags_filenames.append(frag_filename)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
|
||||
return True
|
||||
|
||||
@@ -4,6 +4,7 @@ import errno
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
@@ -57,6 +58,24 @@ class HttpFD(FileDownloader):
|
||||
# Establish connection
|
||||
try:
|
||||
data = self.ydl.urlopen(request)
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
# set in response despite of requested Range (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||
if resume_len > 0:
|
||||
content_range = data.headers.get('Content-Range')
|
||||
if content_range:
|
||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||
# Content-Range is present and matches requested Range, resume is possible
|
||||
if content_range_m and resume_len == int(content_range_m.group(1)):
|
||||
break
|
||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||
# and performing entire redownload
|
||||
self.report_unable_to_resume()
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
|
||||
@@ -118,6 +118,7 @@ from .dailymotion import (
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import DCNIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
@@ -157,6 +158,7 @@ from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .escapist import EscapistIE
|
||||
from .espn import ESPNIE
|
||||
from .esri import EsriVideoIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
@@ -241,6 +243,10 @@ from .imdb import (
|
||||
)
|
||||
from .imgur import ImgurIE
|
||||
from .ina import InaIE
|
||||
from .indavideo import (
|
||||
IndavideoIE,
|
||||
IndavideoEmbedIE,
|
||||
)
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
@@ -339,6 +345,7 @@ from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .mwave import MwaveIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
@@ -352,6 +359,7 @@ from .nbc import (
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
MSNBCIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
@@ -431,6 +439,10 @@ from .orf import (
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .periscope import (
|
||||
PeriscopeIE,
|
||||
QuickscopeIE,
|
||||
)
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
@@ -439,8 +451,13 @@ from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
from .pluralsight import (
|
||||
PluralsightIE,
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornhd import PornHdIE
|
||||
@@ -486,6 +503,7 @@ from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@@ -512,6 +530,7 @@ from .senateisvp import SenateISVPIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
@@ -591,6 +610,7 @@ from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .tenplay import TenPlayIE
|
||||
@@ -598,7 +618,10 @@ from .testurl import TestURLIE
|
||||
from .testtube import TestTubeIE
|
||||
from .tf1 import TF1IE
|
||||
from .theonion import TheOnionIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .theplatform import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
@@ -682,7 +705,6 @@ from .vgtv import (
|
||||
from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
|
||||
@@ -20,7 +20,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s'
|
||||
_MEDIASELECTOR_URLS = [
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
]
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -162,6 +164,10 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
@@ -212,8 +218,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
@@ -270,9 +275,23 @@ class BBCCoUkIE(InfoExtractor):
|
||||
]
|
||||
return subtitles
|
||||
|
||||
def _raise_extractor_error(self, media_selection_error):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
|
||||
expected=True)
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
return self._download_media_selector_url(
|
||||
self._MEDIASELECTOR_URL % programme_id, programme_id)
|
||||
last_exception = None
|
||||
for mediaselector_url in self._MEDIASELECTOR_URLS:
|
||||
try:
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id == 'notukerror':
|
||||
last_exception = e
|
||||
continue
|
||||
self._raise_extractor_error(e)
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
try:
|
||||
@@ -297,7 +316,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self.extract_subtitles(media, programme_id)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _download_playlist(self, playlist_id):
|
||||
@@ -426,9 +444,14 @@ class BBCIE(BBCCoUkIE):
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
# fails with notukerror for some videos
|
||||
# _MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s'
|
||||
_MEDIASELECTOR_URL = 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s'
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
||||
# notukerror for non UK (?) users (e.g.
|
||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
|
||||
# Provides fewer formats, but works everywhere for everybody (hopefully)
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
# article with multiple videos embedded with data-media-meta containing
|
||||
@@ -463,7 +486,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'url': 'http://www.bbc.com/news/world-europe-32041533',
|
||||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
@@ -503,6 +526,18 @@ class BBCIE(BBCCoUkIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||
'info_dict': {
|
||||
'id': 'p02w6qjc',
|
||||
'ext': 'mp4',
|
||||
'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
|
||||
'duration': 56,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
@@ -523,7 +558,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
'info_dict': {
|
||||
'id': 'p018zqqg',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyundai Santa Fe Sport: Rock star',
|
||||
'description': 'md5:b042a26142c4154a6e472933cf20793d',
|
||||
'timestamp': 1368473503,
|
||||
@@ -538,7 +573,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
|
||||
'duration': 140,
|
||||
@@ -672,13 +707,36 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
if not medias:
|
||||
# Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
|
||||
media_asset_page = self._parse_json(
|
||||
media_asset = self._search_regex(
|
||||
r'mediaAssetPage\.init\(\s*({.+?}), "/',
|
||||
webpage, 'media asset', default=None)
|
||||
if media_asset:
|
||||
media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
|
||||
medias = []
|
||||
for video in media_asset_page.get('videos', {}).values():
|
||||
medias.extend(video.values())
|
||||
|
||||
if not medias:
|
||||
# Multiple video playlist with single `now playing` entry (e.g.
|
||||
# http://www.bbc.com/news/video_and_audio/must_see/33767813)
|
||||
vxp_playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'),
|
||||
r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
|
||||
webpage, 'playlist data'),
|
||||
playlist_id)
|
||||
medias = []
|
||||
for video in media_asset_page.get('videos', {}).values():
|
||||
medias.extend(video.values())
|
||||
playlist_medias = []
|
||||
for item in vxp_playlist:
|
||||
media = item.get('media')
|
||||
if not media:
|
||||
continue
|
||||
playlist_medias.append(media)
|
||||
# Download single video if found media with asset id matching the video id from URL
|
||||
if item.get('advert', {}).get('assetId') == playlist_id:
|
||||
medias = [media]
|
||||
break
|
||||
# Fallback to the whole playlist
|
||||
if not medias:
|
||||
medias = playlist_medias
|
||||
|
||||
entries = []
|
||||
for num, media_meta in enumerate(medias, start=1):
|
||||
|
||||
@@ -18,6 +18,7 @@ class BreakIE(InfoExtractor):
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
'title': 'When Girls Act Like D-Bags',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
||||
|
||||
@@ -1,53 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class ClipfishIE(InfoExtractor):
|
||||
IE_NAME = 'clipfish'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
'md5': '2521cd644e862936cf2e698206e47385',
|
||||
'md5': '79bc922f3e8a9097b3d68a93780fd475',
|
||||
'info_dict': {
|
||||
'id': '3966754',
|
||||
'ext': 'mp4',
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'timestamp': 1370938118,
|
||||
'upload_date': '20130611',
|
||||
'duration': 82,
|
||||
},
|
||||
'skip': 'Blocked in the US'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||
(video_id, int(time.time())))
|
||||
doc = self._download_xml(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
title = doc.find('title').text
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||
raise ExtractorError('Cannot find video URL in document %r' %
|
||||
xml_bytes)
|
||||
thumbnail = doc.find('imageurl').text
|
||||
duration = parse_duration(doc.find('duration').text)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = self._parse_json(
|
||||
js_to_json(self._html_search_regex(
|
||||
'(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.append({
|
||||
'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - Video')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(video_info.get('length'))
|
||||
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
@@ -14,10 +14,14 @@ import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_cookies,
|
||||
compat_getpass,
|
||||
compat_HTTPError,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
@@ -35,6 +39,9 @@ from ..utils import (
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
@@ -181,6 +188,7 @@ class InfoExtractor(object):
|
||||
by YoutubeDL if it's missing)
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
|
||||
is_live: True, False, or None (=unknown). Whether this video is a
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
start_time: Time in seconds where the reproduction should start, as
|
||||
@@ -197,8 +205,8 @@ class InfoExtractor(object):
|
||||
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "title" and "id" attributes with the same
|
||||
semantics as videos (see above).
|
||||
Additionally, playlists can have "title", "description" and "id" attributes
|
||||
with the same semantics as videos (see above).
|
||||
|
||||
|
||||
_type "multi_video" indicates that there are multiple videos that
|
||||
@@ -502,6 +510,12 @@ class InfoExtractor(object):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen('Logging in')
|
||||
|
||||
@staticmethod
|
||||
def raise_login_required(msg='This video is only available for registered users'):
|
||||
raise ExtractorError(
|
||||
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
|
||||
expected=True)
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||
@@ -603,7 +617,7 @@ class InfoExtractor(object):
|
||||
|
||||
return (username, password)
|
||||
|
||||
def _get_tfa_info(self):
|
||||
def _get_tfa_info(self, note='two-factor verification code'):
|
||||
"""
|
||||
Get the two-factor authentication info
|
||||
TODO - asking the user will be required for sms/phone verify
|
||||
@@ -617,7 +631,7 @@ class InfoExtractor(object):
|
||||
if downloader_params.get('twofactor', None) is not None:
|
||||
return downloader_params['twofactor']
|
||||
|
||||
return None
|
||||
return compat_getpass('Type %s and press [Return]: ' % note)
|
||||
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
@@ -630,6 +644,12 @@ class InfoExtractor(object):
|
||||
template % (content_re, property_re),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _meta_regex(prop):
|
||||
return r'''(?isx)<meta
|
||||
(?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
if name is None:
|
||||
name = 'OpenGraph %s' % prop
|
||||
@@ -660,9 +680,7 @@ class InfoExtractor(object):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?isx)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
|
||||
self._meta_regex(name),
|
||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
@@ -713,16 +731,18 @@ class InfoExtractor(object):
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
return dict([
|
||||
(input.group('name'), input.group('value')) for input in re.finditer(
|
||||
r'''(?x)
|
||||
<input\s+
|
||||
type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
|
||||
name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
|
||||
(?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
|
||||
value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
|
||||
''', html)
|
||||
])
|
||||
hidden_inputs = {}
|
||||
for input in re.findall(r'<input([^>]+)>', html):
|
||||
if not re.search(r'type=(["\'])hidden\1', input):
|
||||
continue
|
||||
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
||||
if not name:
|
||||
continue
|
||||
value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
|
||||
if not value:
|
||||
continue
|
||||
hidden_inputs[name.group('value')] = value.group('value')
|
||||
return hidden_inputs
|
||||
|
||||
def _form_hidden_inputs(self, form_id, html):
|
||||
form = self._search_regex(
|
||||
@@ -971,69 +991,221 @@ class InfoExtractor(object):
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
# TODO: improve extraction
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
||||
smil = self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
@staticmethod
|
||||
def _xpath_ns(path, namespace=None):
|
||||
if not namespace:
|
||||
return path
|
||||
out = []
|
||||
for c in path.split('/'):
|
||||
if not c or c == '.':
|
||||
out.append(c)
|
||||
else:
|
||||
out.append('{%s}%s' % (namespace, c))
|
||||
return '/'.join(out)
|
||||
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
|
||||
if smil is False:
|
||||
assert not fatal
|
||||
return []
|
||||
|
||||
base = smil.find('./head/meta').get('base')
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
return self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
|
||||
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
if smil is False:
|
||||
return {}
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
def _download_smil(self, smil_url, video_id, fatal=True):
|
||||
return self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
|
||||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
description = None
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
name = meta.attrib.get('name')
|
||||
content = meta.attrib.get('content')
|
||||
if not name or not content:
|
||||
continue
|
||||
if not title and name == 'title':
|
||||
title = content
|
||||
elif not description and name in ('description', 'abstract'):
|
||||
description = content
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title or video_id,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _parse_smil_namespace(self, smil):
|
||||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
if b:
|
||||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
if smil.findall('./body/seq/video'):
|
||||
video = smil.findall('./body/seq/video')[0]
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
else:
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
http_count = 0
|
||||
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
ext = video.get('ext')
|
||||
src_ext = determine_ext(src)
|
||||
streamer = video.get('streamer') or base
|
||||
|
||||
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
||||
rtmp_count += 1
|
||||
formats.append({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
if transform_rtmp_url:
|
||||
streamer, src = transform_rtmp_url(streamer, src)
|
||||
formats[-1].update({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
})
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls'))
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
f4m_url = src_url
|
||||
if not f4m_params:
|
||||
f4m_params = {
|
||||
'hdcore': '3.2.0',
|
||||
'plugin': 'flowplayer-3.2.0.1',
|
||||
}
|
||||
f4m_url += '&' if '?' in f4m_url else '?'
|
||||
f4m_url += compat_urllib_parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
|
||||
continue
|
||||
|
||||
if src_url.startswith('http'):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
'ext': ext or src_ext or 'flv',
|
||||
'format_id': 'http-%d' % (bitrate or http_count),
|
||||
'tbr': bitrate,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
return [], rtmp_count
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
if not proto:
|
||||
if base:
|
||||
if base.startswith('rtmp'):
|
||||
proto = 'rtmp'
|
||||
elif base.startswith('http'):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
return ([{
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
elif proto.startswith('http'):
|
||||
return ([{
|
||||
'url': base + src,
|
||||
'ext': ext or 'flv',
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
subtitles = {}
|
||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||
src = textstream.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = textstream.get('ext') or determine_ext(src)
|
||||
if not ext:
|
||||
type_ = textstream.get('type')
|
||||
SUBTITLES_TYPES = {
|
||||
'text/vtt': 'vtt',
|
||||
'text/srt': 'srt',
|
||||
'application/smptett+xml': 'tt',
|
||||
}
|
||||
if type_ in SUBTITLES_TYPES:
|
||||
ext = SUBTITLES_TYPES[type_]
|
||||
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': src,
|
||||
'ext': ext,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
||||
xspf = self._download_xml(
|
||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
||||
'Unable to download xspf manifest', fatal=fatal)
|
||||
if xspf is False:
|
||||
return []
|
||||
return self._parse_xspf(xspf, playlist_id)
|
||||
|
||||
def _parse_xspf(self, playlist, playlist_id):
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||
description = xpath_text(
|
||||
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||
thumbnail = xpath_text(
|
||||
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
})
|
||||
return entries
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
@@ -1069,6 +1241,12 @@ class InfoExtractor(object):
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
||||
req = compat_urllib_request.Request(url)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def get_testcases(self, include_onlymatching=False):
|
||||
t = getattr(self, '_TEST', None)
|
||||
if t:
|
||||
@@ -1107,6 +1285,23 @@ class InfoExtractor(object):
|
||||
def _get_subtitles(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
@staticmethod
|
||||
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
|
||||
""" Merge subtitle items for one language. Items with duplicated URLs
|
||||
will be dropped. """
|
||||
list1_urls = set([item['url'] for item in subtitle_list1])
|
||||
ret = list(subtitle_list1)
|
||||
ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
|
||||
""" Merge two subtitle dictionaries, language by language. """
|
||||
ret = dict(subtitle_dict1)
|
||||
for lang in subtitle_dict2:
|
||||
ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
|
||||
return ret
|
||||
|
||||
def extract_automatic_captions(self, *args, **kwargs):
|
||||
if (self._downloader.params.get('writeautomaticsub', False) or
|
||||
self._downloader.params.get('listsubtitles')):
|
||||
|
||||
@@ -14,11 +14,13 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@@ -235,7 +237,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
webpage_url = 'http://www.' + mobj.group('url')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
|
||||
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
|
||||
note_m = self._html_search_regex(
|
||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||
webpage, 'trailer-notice', default='')
|
||||
if note_m:
|
||||
raise ExtractorError(note_m)
|
||||
|
||||
@@ -245,6 +249,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if msg.get('type') == 'error':
|
||||
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
|
||||
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||
@@ -279,6 +286,20 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
video_url = stream_info.find('./host').text
|
||||
video_play_path = stream_info.find('./file').text
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
formats.append({
|
||||
'url': direct_video_url,
|
||||
'format_id': video_format,
|
||||
})
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
|
||||
@@ -15,7 +15,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
@@ -278,7 +277,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
}]
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = []
|
||||
video_ids = set()
|
||||
processed_urls = set()
|
||||
for pagenum in itertools.count(1):
|
||||
page_url = self._PAGE_TEMPLATE % (id, pagenum)
|
||||
@@ -291,12 +290,13 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
processed_urls.add(urlh.geturl())
|
||||
|
||||
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
|
||||
for video_id in re.findall(r'data-xid="(.+?)"', webpage):
|
||||
if video_id not in video_ids:
|
||||
yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
video_ids.add(video_id)
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||
for video_id in orderedSet(video_ids)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
84
youtube_dl/extractor/dcn.py
Normal file
84
youtube_dl/extractor/dcn.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '17375',
|
||||
'ext': 'mp4',
|
||||
'title': 'رحلة العمر : الحلقة 1',
|
||||
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
|
||||
video = self._download_json(request, video_id)
|
||||
title = video.get('title_en') or video['title_ar']
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
|
||||
+ compat_urllib_parse.urlencode({
|
||||
'id': video['id'],
|
||||
'user_id': video['user_id'],
|
||||
'signature': video['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), video_id)
|
||||
|
||||
m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
rtsp_url = self._search_regex(
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
img = video.get('img')
|
||||
thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
|
||||
duration = int_or_none(video.get('duration'))
|
||||
description = video.get('description_en') or video.get('description_ar')
|
||||
timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1,10 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
)
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class DHMIE(InfoExtractor):
|
||||
@@ -34,24 +31,14 @@ class DHMIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
||||
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(
|
||||
'./{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
|
||||
|
||||
video_url = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}location',
|
||||
'video url', fatal=True)
|
||||
thumbnail = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}image',
|
||||
'thumbnail')
|
||||
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||
|
||||
title = self._search_regex(
|
||||
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
||||
@@ -63,11 +50,10 @@ class DHMIE(InfoExtractor):
|
||||
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
||||
webpage, 'duration', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
entries[0].update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
||||
@@ -71,8 +71,7 @@ class EroProfileIE(InfoExtractor):
|
||||
|
||||
m = re.search(r'You must be logged in to view this video\.', webpage)
|
||||
if m:
|
||||
raise ExtractorError(
|
||||
'This video requires login. Please specify a username and password and try again.', expected=True)
|
||||
self.raise_login_required('This video requires login')
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||
|
||||
74
youtube_dl/extractor/esri.py
Normal file
74
youtube_dl/extractor/esri.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class EsriVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications',
|
||||
'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc',
|
||||
'info_dict': {
|
||||
'id': '1124',
|
||||
'ext': 'mp4',
|
||||
'title': 'ArcGIS Online - Developing Applications',
|
||||
'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 185,
|
||||
'upload_date': '20120419',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for width, height, content in re.findall(
|
||||
r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage):
|
||||
for video_url, ext, filesize in re.findall(
|
||||
r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content):
|
||||
formats.append({
|
||||
'url': compat_urlparse.urljoin(url, video_url),
|
||||
'ext': ext.lower(),
|
||||
'format_id': '%s-%s' % (ext.lower(), height),
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
'filesize_approx': parse_filesize(filesize),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_meta('title', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description', fatal=False)
|
||||
|
||||
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
[r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"],
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'last-modified', webpage, 'upload date', fatal=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats
|
||||
}
|
||||
@@ -17,6 +17,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
limit_length,
|
||||
urlencode_postdata,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,6 +44,7 @@ class FacebookIE(InfoExtractor):
|
||||
'id': '637842556329505',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
'uploader': 'Tennis on Facebook',
|
||||
}
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
@@ -50,6 +53,7 @@ class FacebookIE(InfoExtractor):
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
@@ -161,6 +165,7 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = limit_length(video_title, 80)
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -168,4 +173,5 @@ class FacebookIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('video_duration')),
|
||||
'thumbnail': video_data.get('thumbnail_src'),
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ class FC2IE(InfoExtractor):
|
||||
|
||||
info_url = (
|
||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
|
||||
@@ -30,6 +30,10 @@ class FolketingetIE(InfoExtractor):
|
||||
'upload_date': '20141120',
|
||||
'duration': 3960,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -32,6 +32,7 @@ class FourTubeIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
@@ -8,7 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class FoxNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
IE_DESC = 'Fox News and Fox Business Video'
|
||||
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
||||
@@ -42,13 +45,19 @@ class FoxNewsIE(InfoExtractor):
|
||||
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxbusiness.com/v/4442309889001',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
|
||||
video = self._download_json(
|
||||
'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
|
||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
|
||||
|
||||
item = video['channel']['item']
|
||||
title = item['title']
|
||||
|
||||
@@ -53,7 +53,7 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
'url': '%s%d.%s' % (link[0], bitrate, link[1]),
|
||||
'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])),
|
||||
'format_id': '%s-%d' % (link[1], bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
|
||||
@@ -7,7 +7,10 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
HEADRequest,
|
||||
)
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
@@ -73,10 +76,20 @@ class GDCVaultIE(InfoExtractor):
|
||||
return video_formats
|
||||
|
||||
def _parse_flv(self, xml_description):
|
||||
video_formats = []
|
||||
formats = []
|
||||
akamai_url = xml_description.find('./metadata/akamaiHost').text
|
||||
audios = xml_description.find('./metadata/audios')
|
||||
if audios is not None:
|
||||
for audio in audios:
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(audio.get('url'), '.flv'),
|
||||
'ext': 'flv',
|
||||
'vcodec': 'none',
|
||||
'format_id': audio.get('code'),
|
||||
})
|
||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||
video_formats.append({
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(slide_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
@@ -86,7 +99,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'format_id': 'slides',
|
||||
})
|
||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||
video_formats.append({
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||
'ext': 'flv',
|
||||
@@ -95,7 +108,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'preference': -1,
|
||||
'format_id': 'speaker',
|
||||
})
|
||||
return video_formats
|
||||
return formats
|
||||
|
||||
def _login(self, webpage_url, display_id):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -133,16 +146,18 @@ class GDCVaultIE(InfoExtractor):
|
||||
r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
|
||||
start_page, 'url', default=None)
|
||||
if direct_url:
|
||||
video_url = 'http://www.gdcvault.com/' + direct_url
|
||||
title = self._html_search_regex(
|
||||
r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
|
||||
start_page, 'title')
|
||||
video_url = 'http://www.gdcvault.com' + direct_url
|
||||
# resolve the url so that we can detect the correct extension
|
||||
head = self._request_webpage(HEADRequest(video_url), video_id)
|
||||
video_url = head.geturl()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
}
|
||||
|
||||
@@ -168,8 +183,8 @@ class GDCVaultIE(InfoExtractor):
|
||||
# Fallback to the older format
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
|
||||
xml_decription_url = xml_root + 'xml/' + xml_name
|
||||
xml_description = self._download_xml(xml_decription_url, display_id)
|
||||
xml_description_url = xml_root + 'xml/' + xml_name
|
||||
xml_description = self._download_xml(xml_description_url, display_id)
|
||||
|
||||
video_title = xml_description.find('./metadata/title').text
|
||||
video_formats = self._parse_mp4(xml_description)
|
||||
|
||||
@@ -130,6 +130,89 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
}
|
||||
},
|
||||
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
|
||||
{
|
||||
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
|
||||
'info_dict': {
|
||||
'id': 'smil',
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'formats': 'mincount:16',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'force_generic_extractor': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
|
||||
{
|
||||
'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
|
||||
'info_dict': {
|
||||
'id': 'hds',
|
||||
'ext': 'flv',
|
||||
'title': 'hds',
|
||||
'formats': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from https://www.restudy.dk/video/play/id/1637
|
||||
{
|
||||
'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
|
||||
'info_dict': {
|
||||
'id': 'video_1637',
|
||||
'ext': 'flv',
|
||||
'title': 'video_1637',
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
|
||||
{
|
||||
'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
|
||||
'info_dict': {
|
||||
'id': 'smil-service',
|
||||
'ext': 'flv',
|
||||
'title': 'smil-service',
|
||||
'formats': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
|
||||
{
|
||||
'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
|
||||
'info_dict': {
|
||||
'id': '4719370',
|
||||
'ext': 'mp4',
|
||||
'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
|
||||
{
|
||||
'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
|
||||
'info_dict': {
|
||||
'id': 'mZlp2ctYIUEB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tikibad ontruimd wegens brand',
|
||||
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 33,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@@ -236,6 +319,19 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
{
|
||||
# ooyala video embedded with http://player.ooyala.com/iframe.js
|
||||
'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
|
||||
'info_dict': {
|
||||
'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
|
||||
'ext': 'mp4',
|
||||
'title': '"Steve Jobs: Man in the Machine" trailer',
|
||||
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# multiple ooyala embeds on SBN network websites
|
||||
{
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
@@ -1110,11 +1206,15 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed?
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
try:
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
return self._parse_smil(doc, url, video_id)
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
@@ -1320,7 +1420,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
@@ -1655,7 +1755,7 @@ class GenericIE(InfoExtractor):
|
||||
if not found:
|
||||
# Broaden the findall a little bit: JWPlayer JS loader
|
||||
found = filter_video(re.findall(
|
||||
r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
|
||||
r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
|
||||
if not found:
|
||||
# Flow player
|
||||
found = filter_video(re.findall(r'''(?xs)
|
||||
@@ -1716,7 +1816,8 @@ class GenericIE(InfoExtractor):
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
if determine_ext(video_url) == 'smil':
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_smil_formats(video_url, video_id),
|
||||
@@ -1724,6 +1825,8 @@ class GenericIE(InfoExtractor):
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
|
||||
142
youtube_dl/extractor/indavideo.py
Normal file
142
youtube_dl/extractor/indavideo.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class IndavideoEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'md5': 'f79b009c66194acacd40712a6778acfa',
|
||||
'info_dict': {
|
||||
'id': '1837039',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cicatánc',
|
||||
'description': '',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'cukiajanlo',
|
||||
'uploader_id': '83729',
|
||||
'timestamp': 1439193826,
|
||||
'upload_date': '20150810',
|
||||
'duration': 72,
|
||||
'age_limit': 0,
|
||||
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||
video_id)['data']
|
||||
|
||||
title = video['title']
|
||||
|
||||
video_urls = video.get('video_files', [])
|
||||
video_file = video.get('video_file')
|
||||
if video:
|
||||
video_urls.append(video_file)
|
||||
video_urls = list(set(video_urls))
|
||||
|
||||
video_prefix = video_urls[0].rsplit('/', 1)[0]
|
||||
|
||||
for flv_file in video.get('flv_files', []):
|
||||
flv_url = '%s/%s' % (video_prefix, flv_file)
|
||||
if flv_url not in video_urls:
|
||||
video_urls.append(flv_url)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None),
|
||||
} for video_url in video_urls]
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = video.get('date')
|
||||
if timestamp:
|
||||
# upload date is in CEST
|
||||
timestamp = parse_iso8601(timestamp + ' +0200', ' ')
|
||||
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(thumbnail)
|
||||
} for thumbnail in video.get('thumbnails', [])]
|
||||
|
||||
tags = [tag['title'] for tag in video.get('tags', [])]
|
||||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video.get('user_name'),
|
||||
'uploader_id': video.get('user_id'),
|
||||
'timestamp': timestamp,
|
||||
'duration': int_or_none(video.get('length')),
|
||||
'age_limit': parse_age_limit(video.get('age_limit')),
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class IndavideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/video/Vicces_cica_1',
|
||||
'md5': '8c82244ba85d2a2310275b318eb51eac',
|
||||
'info_dict': {
|
||||
'id': '1335611',
|
||||
'display_id': 'Vicces_cica_1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vicces cica',
|
||||
'description': 'Játszik a tablettel. :D',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Jet_Pack',
|
||||
'uploader_id': '491217',
|
||||
'timestamp': 1390821212,
|
||||
'upload_date': '20140127',
|
||||
'duration': 7,
|
||||
'age_limit': 0,
|
||||
'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
embed_url = self._search_regex(
|
||||
r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'IndavideoEmbed',
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
}
|
||||
@@ -201,7 +201,7 @@ class IqiyiIE(InfoExtractor):
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
enc_key = '8e29ab5666d041c3a1ea76e06dabdffb'
|
||||
enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
@@ -34,33 +37,28 @@ class KontrTubeIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
video_url = self._search_regex(
|
||||
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
thumbnail = self._search_regex(
|
||||
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'video title')
|
||||
r'(?s)<h2>(.+?)</h2>', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'video description')
|
||||
'description', webpage, 'description')
|
||||
|
||||
mobj = re.search(
|
||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
duration = self._search_regex(
|
||||
r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
|
||||
if duration:
|
||||
duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
|
||||
view_count = self._search_regex(
|
||||
r'Просмотров: <em>([^<]+)</em>',
|
||||
webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = int_or_none(view_count.replace(' ', ''))
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(
|
||||
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
|
||||
if comment_str.startswith('комментариев нет'):
|
||||
comment_count = 0
|
||||
else:
|
||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||
if mobj:
|
||||
comment_count = mobj.group('total')
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -25,6 +25,9 @@ class KrasViewIE(InfoExtractor):
|
||||
'duration': 27,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Not accessible from Travis CI server',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -8,9 +8,9 @@ from ..utils import unified_strdate
|
||||
|
||||
|
||||
class LibsynIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||
'info_dict': {
|
||||
@@ -19,12 +19,24 @@ class LibsynIE(InfoExtractor):
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
|
||||
'md5': '6c5cb21acd622d754d3b1a92b582ce42',
|
||||
'info_dict': {
|
||||
'id': '3727166',
|
||||
'ext': 'mp3',
|
||||
'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
|
||||
'upload_date': '20150818',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
url = m.group('mainurl')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
@@ -32,20 +44,18 @@ class LibsynIE(InfoExtractor):
|
||||
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||
|
||||
podcast_title = self._search_regex(
|
||||
r'<h2>([^<]+)</h2>', webpage, 'title')
|
||||
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
|
||||
episode_title = self._search_regex(
|
||||
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
||||
r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
|
||||
|
||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
|
||||
'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
release_date = unified_strdate(self._search_regex(
|
||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||
|
||||
|
||||
@@ -11,13 +11,13 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LyndaBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
_NETRC_MACHINE = 'lynda'
|
||||
|
||||
@@ -41,7 +41,7 @@ class LyndaBaseIE(InfoExtractor):
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
# Not (yet) logged in
|
||||
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
||||
m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page)
|
||||
if m is not None:
|
||||
response = m.group('json')
|
||||
response_json = json.loads(response)
|
||||
@@ -70,7 +70,16 @@ class LyndaBaseIE(InfoExtractor):
|
||||
request, None,
|
||||
'Confirming log in and log out from another device')
|
||||
|
||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||
if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
|
||||
if 'login error' in login_page:
|
||||
mobj = re.search(
|
||||
r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>',
|
||||
login_page)
|
||||
if mobj:
|
||||
raise ExtractorError(
|
||||
'lynda returned error: %s - %s'
|
||||
% (mobj.group('title'), clean_html(mobj.group('description'))),
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
@@ -109,9 +118,7 @@ class LyndaIE(LyndaBaseIE):
|
||||
'lynda returned error: %s' % video_json['Message'], expected=True)
|
||||
|
||||
if video_json['HasAccess'] is False:
|
||||
raise ExtractorError(
|
||||
'Video %s is only available for members. '
|
||||
% video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||
self.raise_login_required('Video %s is only available for members' % video_id)
|
||||
|
||||
video_id = compat_str(video_json['ID'])
|
||||
duration = video_json['DurationInSeconds']
|
||||
|
||||
@@ -25,6 +25,7 @@ class MailRuIE(InfoExtractor):
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
'duration': 184,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
{
|
||||
'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
|
||||
@@ -39,6 +40,7 @@ class MailRuIE(InfoExtractor):
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ class MDRIE(InfoExtractor):
|
||||
doc = self._download_xml(domain + xmlurl, video_id)
|
||||
formats = []
|
||||
for a in doc.findall('./assets/asset'):
|
||||
url_el = a.find('.//progressiveDownloadUrl')
|
||||
url_el = a.find('./progressiveDownloadUrl')
|
||||
if url_el is None:
|
||||
continue
|
||||
abr = int(a.find('bitrateAudio').text) // 1000
|
||||
|
||||
@@ -9,7 +9,10 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class MonikerIE(InfoExtractor):
|
||||
@@ -24,6 +27,14 @@ class MonikerIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
'id': 'jih3nce3x6wn',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vidspot.net/l2ngsmhs8ci5',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
@@ -38,7 +49,10 @@ class MonikerIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
orig_video_id = self._match_id(url)
|
||||
video_id = remove_start(orig_video_id, 'embed-')
|
||||
url = url.replace(orig_video_id, video_id)
|
||||
assert re.match(self._VALID_URL, url) is not None
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>File Not Found<' in orig_webpage:
|
||||
|
||||
58
youtube_dl/extractor/mwave.py
Normal file
58
youtube_dl/extractor/mwave.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class MwaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
|
||||
'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc',
|
||||
'info_dict': {
|
||||
'id': '168859',
|
||||
'ext': 'flv',
|
||||
'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'M COUNTDOWN',
|
||||
'duration': 206,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
vod_info = self._download_json(
|
||||
'http://mwave.interest.me/onair/vod_info.m?vodtype=CL§orid=&endinfo=Y&id=%s' % video_id,
|
||||
video_id, 'Download vod JSON')
|
||||
|
||||
formats = []
|
||||
for num, cdn_info in enumerate(vod_info['cdn']):
|
||||
stream_url = cdn_info.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
stream_name = cdn_info.get('name') or compat_str(num)
|
||||
f4m_stream = self._download_json(
|
||||
stream_url, video_id,
|
||||
'Download %s stream JSON' % stream_name)
|
||||
f4m_url = f4m_stream.get('fileurl')
|
||||
if not f4m_url:
|
||||
continue
|
||||
formats.extend(
|
||||
self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': vod_info['title'],
|
||||
'thumbnail': vod_info.get('cover'),
|
||||
'uploader': vod_info.get('program_title'),
|
||||
'duration': parse_duration(vod_info.get('time')),
|
||||
'view_count': int_or_none(vod_info.get('hit')),
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -124,7 +124,7 @@ class NBCSportsIE(InfoExtractor):
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
(?:feature|nightly-news)/[^/]+/(?P<title>.+))
|
||||
(?:watch|feature|nightly-news)/[^/]+/(?P<title>.+))
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
@@ -169,6 +169,10 @@ class NBCNewsIE(InfoExtractor):
|
||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -232,3 +236,28 @@ class NBCNewsIE(InfoExtractor):
|
||||
'url': info['videoAssets'][-1]['publicUrl'],
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
||||
class MSNBCIE(InfoExtractor):
|
||||
# https URLs redirect to corresponding http ones
|
||||
_VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||
'info_dict': {
|
||||
'id': 'n_hayes_Aimm_140801_272214',
|
||||
'ext': 'mp4',
|
||||
'title': 'The chaotic GOP immigration vote',
|
||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406937606,
|
||||
'upload_date': '20140802',
|
||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._html_search_meta('embedURL', webpage)
|
||||
return self.url_result(embed_url)
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
@@ -15,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NowTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?P<station>rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player'
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
|
||||
|
||||
_TESTS = [{
|
||||
# rtl
|
||||
@@ -23,7 +22,7 @@ class NowTVIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '203519',
|
||||
'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Die neuen Bauern und eine Hochzeit',
|
||||
'description': 'md5:e234e1ed6d63cf06be5c070442612e7e',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -32,7 +31,7 @@ class NowTVIE(InfoExtractor):
|
||||
'duration': 2786,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@@ -41,7 +40,7 @@ class NowTVIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '203481',
|
||||
'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Berlin - Tag & Nacht (Folge 934)',
|
||||
'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -50,7 +49,7 @@ class NowTVIE(InfoExtractor):
|
||||
'duration': 2641,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@@ -59,7 +58,7 @@ class NowTVIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '165780',
|
||||
'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Hals- und Beinbruch',
|
||||
'description': 'md5:b50d248efffe244e6f56737f0911ca57',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -68,7 +67,7 @@ class NowTVIE(InfoExtractor):
|
||||
'duration': 2742,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@@ -77,7 +76,7 @@ class NowTVIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '99205',
|
||||
'display_id': 'medicopter-117/angst',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Angst!',
|
||||
'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -86,7 +85,7 @@ class NowTVIE(InfoExtractor):
|
||||
'duration': 3025,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@@ -95,7 +94,7 @@ class NowTVIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '203521',
|
||||
'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Thema u.a.: Der erste Blick: Die Apple Watch',
|
||||
'description': 'md5:4312b6c9d839ffe7d8caf03865a531af',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -104,7 +103,7 @@ class NowTVIE(InfoExtractor):
|
||||
'duration': 1083,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@@ -113,7 +112,7 @@ class NowTVIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '128953',
|
||||
'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': "Büro-Fall / Chihuahua 'Joel'",
|
||||
'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
@@ -122,15 +121,19 @@ class NowTVIE(InfoExtractor):
|
||||
'duration': 3092,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
station = mobj.group('station')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
|
||||
@@ -148,29 +151,19 @@ class NowTVIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available for free' % video_id, expected=True)
|
||||
|
||||
f = info.get('format', {})
|
||||
station = f.get('station') or station
|
||||
|
||||
STATIONS = {
|
||||
'rtl': 'rtlnow',
|
||||
'rtl2': 'rtl2now',
|
||||
'vox': 'voxnow',
|
||||
'nitro': 'rtlnitronow',
|
||||
'ntv': 'n-tvnow',
|
||||
'superrtl': 'superrtlnow'
|
||||
}
|
||||
|
||||
formats = []
|
||||
for item in files['items']:
|
||||
item_path = remove_start(item['path'], '/')
|
||||
tbr = int_or_none(item['bitrate'])
|
||||
m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path)
|
||||
m3u8_url = m3u8_url.replace('now/', 'now/videos/')
|
||||
if determine_ext(item['path']) != 'f4v':
|
||||
continue
|
||||
app, play_path = remove_start(item['path'], '/').split('/', 1)
|
||||
formats.append({
|
||||
'url': m3u8_url,
|
||||
'format_id': '%s-%sk' % (item['id'], tbr),
|
||||
'ext': 'mp4',
|
||||
'tbr': tbr,
|
||||
'url': 'rtmpe://fms.rtl.de',
|
||||
'app': app,
|
||||
'play_path': 'mp4:%s' % play_path,
|
||||
'ext': 'flv',
|
||||
'page_url': url,
|
||||
'player_url': 'http://rtl-now.rtl.de/includes/nc_player.swf',
|
||||
'tbr': int_or_none(item.get('bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -178,6 +171,8 @@ class NowTVIE(InfoExtractor):
|
||||
description = info.get('articleLong') or info.get('articleShort')
|
||||
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
|
||||
duration = parse_duration(info.get('duration'))
|
||||
|
||||
f = info.get('format', {})
|
||||
thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
|
||||
|
||||
return {
|
||||
|
||||
@@ -407,6 +407,7 @@ class NPORadioFragmentIE(InfoExtractor):
|
||||
|
||||
|
||||
class VPROIE(NPOIE):
|
||||
IE_NAME = 'vpro'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
|
||||
@@ -16,15 +16,17 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
|
||||
'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
|
||||
'info_dict': {
|
||||
'id': '20079905452',
|
||||
'ext': 'mp4',
|
||||
'title': 'Культура меняет нас (прекрасный ролик!))',
|
||||
'duration': 100,
|
||||
'upload_date': '20141207',
|
||||
'uploader_id': '330537914540',
|
||||
'uploader': 'Виталий Добровольский',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# metadataUrl
|
||||
@@ -35,9 +37,11 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Девушка без комплексов ...',
|
||||
'duration': 191,
|
||||
'upload_date': '20150518',
|
||||
'uploader_id': '534380003155',
|
||||
'uploader': 'Андрей Мещанинов',
|
||||
'uploader': '☭ Андрей Мещанинов ☭',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
||||
|
||||
@@ -92,6 +92,7 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 3172,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140122',
|
||||
'age_limit': 10,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
|
||||
99
youtube_dl/extractor/periscope.py
Normal file
99
youtube_dl/extractor/periscope.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class PeriscopeIE(InfoExtractor):
|
||||
IE_DESC = 'Periscope'
|
||||
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
|
||||
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
|
||||
'info_dict': {
|
||||
'id': '56102209',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
|
||||
'timestamp': 1438978559,
|
||||
'upload_date': '20150807',
|
||||
'uploader': 'Bec Boop',
|
||||
'uploader_id': '1465763',
|
||||
},
|
||||
'skip': 'Expires in 24 hours',
|
||||
}
|
||||
|
||||
def _call_api(self, method, token):
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token)
|
||||
|
||||
def _real_extract(self, url):
|
||||
token = self._match_id(url)
|
||||
|
||||
broadcast_data = self._call_api('getBroadcastPublic', token)
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
|
||||
uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
|
||||
uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
|
||||
|
||||
title = '%s - %s' % (uploader, status) if uploader else status
|
||||
state = broadcast.get('state').lower()
|
||||
if state == 'running':
|
||||
title = self._live_title(title)
|
||||
timestamp = parse_iso8601(broadcast.get('created_at'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': broadcast[image],
|
||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||
|
||||
stream = self._call_api('getAccessPublic', token)
|
||||
|
||||
formats = []
|
||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
|
||||
video_url = stream.get(format_id + '_url')
|
||||
if not video_url:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
||||
}
|
||||
if format_id != 'rtmp':
|
||||
f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8'
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': broadcast.get('id') or token,
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class QuickscopeIE(InfoExtractor):
|
||||
IE_DESC = 'Quick Scope'
|
||||
_VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://watchonperiscope.com/broadcast/56180087',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcast_id = self._match_id(url)
|
||||
request = compat_urllib_request.Request(
|
||||
'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
|
||||
'broadcast_id': broadcast_id,
|
||||
'entry_ticket': '',
|
||||
'from_push': 'false',
|
||||
'uses_sessions': 'true',
|
||||
}).encode('utf-8'))
|
||||
return self.url_result(
|
||||
self._download_json(request, broadcast_id)['share_url'], 'Periscope')
|
||||
181
youtube_dl/extractor/playtvak.py
Normal file
181
youtube_dl/extractor/playtvak.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class PlaytvakIE(InfoExtractor):
|
||||
IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko',
|
||||
'md5': '4525ae312c324b4be2f4603cc78ceb4a',
|
||||
'info_dict': {
|
||||
'id': 'A150730_150323_hodinovy-manzel_kuko',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vyžeňte vosy a sršně ze zahrady',
|
||||
'description': 'md5:f93d398691044d303bc4a3de62f3e976',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'duration': 279,
|
||||
'timestamp': 1438732860,
|
||||
'upload_date': '20150805',
|
||||
'is_live': False,
|
||||
}
|
||||
}, { # live video test
|
||||
'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat',
|
||||
'info_dict': {
|
||||
'id': 'A150624_164934_planespotting_cat',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, { # idnes.cz
|
||||
'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku',
|
||||
'md5': '819832ba33cd7016e58a6658577fe289',
|
||||
'info_dict': {
|
||||
'id': 'A150809_104116_domaci_pku',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se',
|
||||
'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'duration': 39,
|
||||
'timestamp': 1438969140,
|
||||
'upload_date': '20150807',
|
||||
'is_live': False,
|
||||
}
|
||||
}, { # lidovky.cz
|
||||
'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE',
|
||||
'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8',
|
||||
'info_dict': {
|
||||
'id': 'A150808_214044_ln-video_ELE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Táhni! Demonstrace proti imigrantům budila emoce',
|
||||
'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439052180,
|
||||
'upload_date': '20150808',
|
||||
'is_live': False,
|
||||
}
|
||||
}, { # metro.cz
|
||||
'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row',
|
||||
'md5': '84fc1deedcac37b7d4a6ccae7c716668',
|
||||
'info_dict': {
|
||||
'id': 'A141111_173251_metro-extra_row',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recesisté udělali z billboardu kolotoč',
|
||||
'description': 'md5:7369926049588c3989a66c9c1a043c4c',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1415725500,
|
||||
'upload_date': '20141111',
|
||||
'is_live': False,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info_url = self._html_search_regex(
|
||||
r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(info_url)
|
||||
|
||||
qs = compat_urlparse.parse_qs(parsed_url.query)
|
||||
qs.update({
|
||||
'reklama': ['0'],
|
||||
'type': ['js'],
|
||||
})
|
||||
|
||||
info_url = compat_urlparse.urlunparse(
|
||||
parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
|
||||
|
||||
json_info = self._download_json(
|
||||
info_url, video_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||
|
||||
item = None
|
||||
for i in json_info['items']:
|
||||
if i.get('type') == 'video' or i.get('type') == 'stream':
|
||||
item = i
|
||||
break
|
||||
if not item:
|
||||
raise ExtractorError('No suitable stream found')
|
||||
|
||||
quality = qualities(('low', 'middle', 'high'))
|
||||
|
||||
formats = []
|
||||
for fmt in item['video']:
|
||||
video_url = fmt.get('file')
|
||||
if not video_url:
|
||||
continue
|
||||
|
||||
format_ = fmt['format']
|
||||
format_id = '%s_%s' % (format_, fmt['quality'])
|
||||
preference = None
|
||||
|
||||
if format_ in ('mp4', 'webm'):
|
||||
ext = format_
|
||||
elif format_ == 'rtmp':
|
||||
ext = 'flv'
|
||||
elif format_ == 'apple':
|
||||
ext = 'mp4'
|
||||
# Some streams have mp3 audio which does not play
|
||||
# well with ffmpeg filter aac_adtstoasc
|
||||
preference = -1
|
||||
elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests
|
||||
continue
|
||||
else: # Other formats not supported yet
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'format_id': format_id,
|
||||
'quality': quality(fmt.get('quality')),
|
||||
'preference': preference,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = item['title']
|
||||
is_live = item['type'] == 'stream'
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
timestamp = None
|
||||
duration = None
|
||||
if not is_live:
|
||||
duration = int_or_none(item.get('length'))
|
||||
timestamp = item.get('published')
|
||||
if timestamp:
|
||||
timestamp = parse_iso8601(timestamp[:-5])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': item.get('image'),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
}
|
||||
207
youtube_dl/extractor/pluralsight.py
Normal file
207
youtube_dl/extractor/pluralsight.py
Normal file
@@ -0,0 +1,207 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class PluralsightIE(InfoExtractor):
|
||||
IE_NAME = 'pluralsight'
|
||||
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
|
||||
_LOGIN_URL = 'https://www.pluralsight.com/id/'
|
||||
_NETRC_MACHINE = 'pluralsight'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
|
||||
'md5': '4d458cf5cf4c593788672419a8dd4cf8',
|
||||
'info_dict': {
|
||||
'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Management of SQL Server - Demo Monitoring',
|
||||
'duration': 338,
|
||||
},
|
||||
'skip': 'Requires pluralsight account credentials',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required('Pluralsight account is required')
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'Username': username.encode('utf-8'),
|
||||
'Password': password.encode('utf-8'),
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
error = self._search_regex(
|
||||
r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
author = mobj.group('author')
|
||||
name = mobj.group('name')
|
||||
clip_id = mobj.group('clip')
|
||||
course = mobj.group('course')
|
||||
|
||||
display_id = '%s-%s' % (name, clip_id)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
collection = self._parse_json(
|
||||
self._search_regex(
|
||||
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
|
||||
webpage, 'modules'),
|
||||
display_id)
|
||||
|
||||
module, clip = None, None
|
||||
|
||||
for module_ in collection:
|
||||
if module_.get('moduleName') == name:
|
||||
module = module_
|
||||
for clip_ in module_.get('clips', []):
|
||||
clip_index = clip_.get('clipIndex')
|
||||
if clip_index is None:
|
||||
continue
|
||||
if compat_str(clip_index) == clip_id:
|
||||
clip = clip_
|
||||
break
|
||||
|
||||
if not clip:
|
||||
raise ExtractorError('Unable to resolve clip')
|
||||
|
||||
QUALITIES = {
|
||||
'low': {'width': 640, 'height': 480},
|
||||
'medium': {'width': 848, 'height': 640},
|
||||
'high': {'width': 1024, 'height': 768},
|
||||
}
|
||||
|
||||
ALLOWED_QUALITIES = (
|
||||
('webm', ('high',)),
|
||||
('mp4', ('low', 'medium', 'high',)),
|
||||
)
|
||||
|
||||
formats = []
|
||||
for ext, qualities in ALLOWED_QUALITIES:
|
||||
for quality in qualities:
|
||||
f = QUALITIES[quality].copy()
|
||||
clip_post = {
|
||||
'a': author,
|
||||
'cap': 'false',
|
||||
'cn': clip_id,
|
||||
'course': course,
|
||||
'lc': 'en',
|
||||
'm': name,
|
||||
'mt': ext,
|
||||
'q': '%dx%d' % (f['width'], f['height']),
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
'http://www.pluralsight.com/training/Player/ViewClip',
|
||||
json.dumps(clip_post).encode('utf-8'))
|
||||
request.add_header('Content-Type', 'application/json;charset=utf-8')
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
clip_url = self._download_webpage(
|
||||
request, display_id, 'Downloading %s URL' % format_id, fatal=False)
|
||||
if not clip_url:
|
||||
continue
|
||||
f.update({
|
||||
'url': clip_url,
|
||||
'ext': ext,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
# TODO: captions
|
||||
# http://www.pluralsight.com/training/Player/ViewClip + cap = true
|
||||
# or
|
||||
# http://www.pluralsight.com/training/Player/Captions
|
||||
# { a = author, cn = clip_id, lc = end, m = name }
|
||||
|
||||
return {
|
||||
'id': clip['clipName'],
|
||||
'title': '%s - %s' % (module['title'], clip['title']),
|
||||
'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
|
||||
'creator': author,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class PluralsightCourseIE(InfoExtractor):
|
||||
IE_NAME = 'pluralsight:course'
|
||||
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
# Free course from Pluralsight Starter Subscription for Microsoft TechNet
|
||||
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
|
||||
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
|
||||
'info_dict': {
|
||||
'id': 'hosting-sql-server-windows-azure-iaas',
|
||||
'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals',
|
||||
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
|
||||
},
|
||||
'playlist_count': 31,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
|
||||
# TODO: PSM cookie
|
||||
|
||||
course = self._download_json(
|
||||
'http://www.pluralsight.com/data/course/%s' % course_id,
|
||||
course_id, 'Downloading course JSON')
|
||||
|
||||
title = course['title']
|
||||
description = course.get('description') or course.get('shortDescription')
|
||||
|
||||
course_data = self._download_json(
|
||||
'http://www.pluralsight.com/data/course/content/%s' % course_id,
|
||||
course_id, 'Downloading course data JSON')
|
||||
|
||||
entries = []
|
||||
for module in course_data:
|
||||
for clip in module.get('clips', []):
|
||||
player_parameters = clip.get('playerParameters')
|
||||
if not player_parameters:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://www.pluralsight.com/training/player?%s' % player_parameters,
|
||||
'Pluralsight'))
|
||||
|
||||
return self.playlist_result(entries, course_id, title, description)
|
||||
@@ -22,6 +22,7 @@ class Porn91IE(InfoExtractor):
|
||||
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
||||
'ext': 'mp4',
|
||||
'duration': 431,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,4 +69,5 @@ class Porn91IE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
||||
|
||||
@@ -81,7 +81,7 @@ class PornHubIE(InfoExtractor):
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
video_urls = list(map(compat_urllib_parse_unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = compat_urllib_parse_unquote_plus(
|
||||
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
@@ -94,7 +94,7 @@ class PornHubIE(InfoExtractor):
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
|
||||
m = re.match(r'^(?P<height>[0-9]+)P-(?P<tbr>[0-9]+)K$', format)
|
||||
m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format)
|
||||
if m is None:
|
||||
height = None
|
||||
tbr = None
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -8,22 +9,28 @@ class RTL2IE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
|
||||
'md5': 'bfcc179030535b08dc2b36b469b5adc7',
|
||||
'info_dict': {
|
||||
'id': 'folge-203-0',
|
||||
'ext': 'f4v',
|
||||
'title': 'GRIP sucht den Sommerkönig',
|
||||
'description': 'Matthias, Det und Helge treten gegeneinander an.'
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
|
||||
'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
|
||||
'info_dict': {
|
||||
'id': '21040-anna-erwischt-alex',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anna erwischt Alex!',
|
||||
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -34,12 +41,18 @@ class RTL2IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
vico_id = self._html_search_regex(
|
||||
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
||||
vivi_id = self._html_search_regex(
|
||||
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
||||
mobj = re.search(
|
||||
r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
vico_id = mobj.group('vico_id')
|
||||
vivi_id = mobj.group('vivi_id')
|
||||
else:
|
||||
vico_id = self._html_search_regex(
|
||||
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
||||
vivi_id = self._html_search_regex(
|
||||
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
||||
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
|
||||
webpage = self._download_webpage(info_url, '')
|
||||
|
||||
info = self._download_json(info_url, video_id)
|
||||
video_info = info['video']
|
||||
|
||||
@@ -18,6 +18,10 @@ class RTPIE(InfoExtractor):
|
||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||
'only_matching': True,
|
||||
|
||||
47
youtube_dl/extractor/rtvnh.py
Normal file
47
youtube_dl/extractor/rtvnh.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class RTVNHIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.rtvnh.nl/video/131946',
|
||||
'md5': '6e1d0ab079e2a00b6161442d3ceacfc1',
|
||||
'info_dict': {
|
||||
'id': '131946',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',
|
||||
'thumbnail': 're:^https?:.*\.jpg$'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
meta = self._parse_json(self._download_webpage(
|
||||
'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id)
|
||||
|
||||
status = meta.get('status')
|
||||
if status != 200:
|
||||
raise ExtractorError(
|
||||
'%s returned error code %d' % (self.IE_NAME, status), expected=True)
|
||||
|
||||
formats = self._extract_smil_formats(
|
||||
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id, fatal=False)
|
||||
|
||||
for item in meta['source']['fb']:
|
||||
if item.get('type') == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
elif item.get('type') == '':
|
||||
formats.append({'url': item['file']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': meta['title'].strip(),
|
||||
'thumbnail': meta.get('image'),
|
||||
'formats': formats
|
||||
}
|
||||
@@ -30,6 +30,7 @@ class RutubeIE(InfoExtractor):
|
||||
'uploader': 'NTDRussian',
|
||||
'uploader_id': '29790',
|
||||
'upload_date': '20131016',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
# It requires ffmpeg (m3u8 download)
|
||||
|
||||
@@ -20,7 +20,6 @@ from ..utils import (
|
||||
class SafariBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
|
||||
_NETRC_MACHINE = 'safari'
|
||||
|
||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
|
||||
@@ -37,9 +36,7 @@ class SafariBaseIE(InfoExtractor):
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(
|
||||
self._ACCOUNT_CREDENTIALS_HINT,
|
||||
expected=True)
|
||||
self.raise_login_required('safaribooksonline.com account is required')
|
||||
|
||||
headers = std_headers
|
||||
if 'Referer' not in headers:
|
||||
|
||||
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,59 +23,70 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
playerdata = self._download_webpage(
|
||||
'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id,
|
||||
'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
|
||||
video_id, 'Downloading player webpage')
|
||||
|
||||
vidtitle = self._search_regex(
|
||||
r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
|
||||
vidurl = self._search_regex(
|
||||
r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
|
||||
|
||||
videolist_url = None
|
||||
playerconfig = self._download_webpage(
|
||||
'http://player.screenwavemedia.com/player.js',
|
||||
video_id, 'Downloading playerconfig webpage')
|
||||
|
||||
mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata)
|
||||
if mobj:
|
||||
videoserver = mobj.group('videoserver')
|
||||
mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
|
||||
vidid = mobj.group('vidid') if mobj else video_id
|
||||
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
|
||||
else:
|
||||
mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
|
||||
if mobj:
|
||||
videolist_url = mobj.group('smil')
|
||||
videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver')
|
||||
|
||||
if videolist_url:
|
||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||
formats = []
|
||||
baseurl = vidurl[:vidurl.rfind('/') + 1]
|
||||
for video in videolist.findall('.//video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
sources = self._parse_json(
|
||||
js_to_json(
|
||||
re.sub(
|
||||
r'(?s)/\*.*?\*/', '',
|
||||
self._search_regex(
|
||||
r"sources\s*:\s*(\[[^\]]+?\])", playerconfig,
|
||||
'sources',
|
||||
).replace(
|
||||
"' + thisObj.options.videoserver + '",
|
||||
videoserver
|
||||
).replace(
|
||||
"' + playerVidId + '",
|
||||
video_id
|
||||
)
|
||||
)
|
||||
),
|
||||
video_id, fatal=False
|
||||
)
|
||||
|
||||
# Fallback to hardcoded sources if JS changes again
|
||||
if not sources:
|
||||
sources = [{
|
||||
'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
|
||||
'type': 'mp4',
|
||||
'label': format_label,
|
||||
} for format_id, format_label in (
|
||||
('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))]
|
||||
sources.append({
|
||||
'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id),
|
||||
'type': 'hls',
|
||||
})
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source['type'] == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id))
|
||||
else:
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
file_ = src.partition(':')[-1]
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
|
||||
format = {
|
||||
'url': baseurl + file_,
|
||||
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
|
||||
}
|
||||
if width or height:
|
||||
format.update({
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
else:
|
||||
format.update({
|
||||
'abr': bitrate,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
else:
|
||||
formats = [{
|
||||
'url': vidurl,
|
||||
}]
|
||||
format_label = source.get('label')
|
||||
format_id = self._search_regex(
|
||||
r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]', format_label, 'height', default=None))
|
||||
formats.append({
|
||||
'url': source['file'],
|
||||
'format_id': format_id,
|
||||
'format': format_label,
|
||||
'ext': source.get('type'),
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
||||
@@ -29,6 +29,7 @@ class SexyKarmaIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
||||
|
||||
107
youtube_dl/extractor/shahid.py
Normal file
107
youtube_dl/extractor/shahid.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ShahidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
|
||||
'info_dict': {
|
||||
'id': '90574',
|
||||
'ext': 'm3u8',
|
||||
'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
|
||||
'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
|
||||
'duration': 2972,
|
||||
'timestamp': 1422057420,
|
||||
'upload_date': '20150123',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# shahid plus subscriber only
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
return
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
|
||||
expected=True)
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||
response = super(ShahidIE, self)._download_json(url, video_id, note)['data']
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
api_vars = {
|
||||
'id': video_id,
|
||||
'type': 'player',
|
||||
'url': 'http://api.shahid.net/api/v1_1',
|
||||
'playerType': 'episode',
|
||||
}
|
||||
|
||||
flashvars = self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None)
|
||||
if flashvars:
|
||||
for key in api_vars.keys():
|
||||
value = self._search_regex(
|
||||
r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key,
|
||||
flashvars, 'type', default=None, group='value')
|
||||
if value:
|
||||
api_vars[key] = value
|
||||
|
||||
player = self._download_json(
|
||||
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html'
|
||||
% (video_id, api_vars['type']), video_id, 'Downloading player JSON')
|
||||
|
||||
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
|
||||
|
||||
video = self._download_json(
|
||||
'%s/%s/%s?%s' % (
|
||||
api_vars['url'], api_vars['playerType'], api_vars['id'],
|
||||
compat_urllib_parse.urlencode({
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
}).encode('utf-8')),
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
video = video[api_vars['playerType']]
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = video.get('thumbnailUrl')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('referenceDate'))
|
||||
categories = [
|
||||
category['name']
|
||||
for category in video.get('genres', []) if 'name' in category]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -14,17 +14,28 @@ from ..utils import (
|
||||
|
||||
|
||||
class SharedIE(InfoExtractor):
|
||||
_VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
|
||||
IE_DESC = 'shared.sx and vivo.sx'
|
||||
_VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://shared.sx/0060718775',
|
||||
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
|
||||
'info_dict': {
|
||||
'id': '0060718775',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bmp4',
|
||||
'filesize': 1720110,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vivo.sx/d7ddda0e78',
|
||||
'md5': '15b3af41be0b4fe01f4df075c2678b2c',
|
||||
'info_dict': {
|
||||
'id': 'd7ddda0e78',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chicken',
|
||||
'filesize': 528031,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -330,10 +330,7 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(
|
||||
'Erotic broadcasts allowed only for registered users, '
|
||||
'use --username and --password options to provide account credentials.',
|
||||
expected=True)
|
||||
self.raise_login_required('Erotic broadcasts allowed only for registered users')
|
||||
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
|
||||
@@ -29,7 +29,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)^(?:https?://)?
|
||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||
(?P<uploader>[\w\d-]+)/
|
||||
(?!sets/|(?:likes|tracks)/?(?:$|[?#]))
|
||||
(?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||
(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||
@@ -293,60 +293,139 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
|
||||
|
||||
class SoundcloudUserIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|m)\.)?soundcloud\.com/
|
||||
(?P<user>[^/]+)
|
||||
(?:/
|
||||
(?P<rsrc>tracks|sets|reposts|likes|spotlight)
|
||||
)?
|
||||
/?(?:[?#].*)?$
|
||||
'''
|
||||
IE_NAME = 'soundcloud:user'
|
||||
_TESTS = [{
|
||||
'url': 'https://soundcloud.com/the-concept-band',
|
||||
'url': 'https://soundcloud.com/the-akashic-chronicler',
|
||||
'info_dict': {
|
||||
'id': '9615865',
|
||||
'title': 'The Royal Concept',
|
||||
'id': '114582580',
|
||||
'title': 'The Akashic Chronicler (All)',
|
||||
},
|
||||
'playlist_mincount': 12
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-concept-band/likes',
|
||||
'info_dict': {
|
||||
'id': '9615865',
|
||||
'title': 'The Royal Concept',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
'playlist_mincount': 111,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '114582580',
|
||||
'title': 'The Akashic Chronicler (Tracks)',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
|
||||
'info_dict': {
|
||||
'id': '114582580',
|
||||
'title': 'The Akashic Chronicler (Playlists)',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
|
||||
'info_dict': {
|
||||
'id': '114582580',
|
||||
'title': 'The Akashic Chronicler (Reposts)',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
|
||||
'info_dict': {
|
||||
'id': '114582580',
|
||||
'title': 'The Akashic Chronicler (Likes)',
|
||||
},
|
||||
'playlist_mincount': 321,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
||||
'info_dict': {
|
||||
'id': '7098329',
|
||||
'title': 'Grynpyret (Spotlight)',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_API_BASE = 'https://api.soundcloud.com'
|
||||
_API_V2_BASE = 'https://api-v2.soundcloud.com'
|
||||
|
||||
_BASE_URL_MAP = {
|
||||
'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
|
||||
'tracks': '%s/users/%%s/tracks' % _API_BASE,
|
||||
'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
|
||||
'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
|
||||
'likes': '%s/users/%%s/likes' % _API_V2_BASE,
|
||||
'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
|
||||
}
|
||||
|
||||
_TITLE_MAP = {
|
||||
'all': 'All',
|
||||
'tracks': 'Tracks',
|
||||
'sets': 'Playlists',
|
||||
'reposts': 'Reposts',
|
||||
'likes': 'Likes',
|
||||
'spotlight': 'Spotlight',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group('user')
|
||||
resource = mobj.group('rsrc')
|
||||
if resource is None:
|
||||
resource = 'tracks'
|
||||
elif resource == 'likes':
|
||||
resource = 'favorites'
|
||||
|
||||
url = 'http://soundcloud.com/%s/' % uploader
|
||||
resolv_url = self._resolv_url(url)
|
||||
user = self._download_json(
|
||||
resolv_url, uploader, 'Downloading user info')
|
||||
base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource)
|
||||
|
||||
resource = mobj.group('rsrc') or 'all'
|
||||
base_url = self._BASE_URL_MAP[resource] % user['id']
|
||||
|
||||
next_href = None
|
||||
|
||||
entries = []
|
||||
for i in itertools.count():
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'offset': i * 50,
|
||||
'limit': 50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
new_entries = self._download_json(
|
||||
base_url + data, uploader, 'Downloading track page %s' % (i + 1))
|
||||
if len(new_entries) == 0:
|
||||
if not next_href:
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'offset': i * 50,
|
||||
'limit': 50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'linked_partitioning': '1',
|
||||
'representation': 'speedy',
|
||||
})
|
||||
next_href = base_url + '?' + data
|
||||
|
||||
response = self._download_json(
|
||||
next_href, uploader, 'Downloading track page %s' % (i + 1))
|
||||
|
||||
collection = response['collection']
|
||||
|
||||
if not collection:
|
||||
self.to_screen('%s: End page received' % uploader)
|
||||
break
|
||||
entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries)
|
||||
|
||||
def resolve_permalink_url(candidates):
|
||||
for cand in candidates:
|
||||
if isinstance(cand, dict):
|
||||
permalink_url = cand.get('permalink_url')
|
||||
if permalink_url and permalink_url.startswith('http'):
|
||||
return permalink_url
|
||||
|
||||
for e in collection:
|
||||
permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
|
||||
if permalink_url:
|
||||
entries.append(self.url_result(permalink_url))
|
||||
|
||||
if 'next_href' in response:
|
||||
next_href = response['next_href']
|
||||
if not next_href:
|
||||
break
|
||||
else:
|
||||
next_href = None
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': compat_str(user['id']),
|
||||
'title': user['username'],
|
||||
'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
@@ -45,6 +45,14 @@ class SouthParkDeIE(SouthParkIE):
|
||||
'title': 'The Government Won\'t Respect My Privacy',
|
||||
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||
},
|
||||
}, {
|
||||
# non-ASCII characters in initial URL
|
||||
'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen',
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
# non-ASCII characters in redirect URL
|
||||
'url': 'http://www.southpark.de/alle-episoden/s18e09',
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
|
||||
|
||||
@@ -16,8 +16,9 @@ from ..aes import aes_decrypt_text
|
||||
|
||||
|
||||
class SpankwireIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
|
||||
_TESTS = [{
|
||||
# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
|
||||
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
||||
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
|
||||
'info_dict': {
|
||||
@@ -30,14 +31,27 @@ class SpankwireIE(InfoExtractor):
|
||||
'upload_date': '20070507',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# download URL pattern: */mp4_<format_id>_<video_id>.mp4
|
||||
'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
|
||||
'md5': '09b3c20833308b736ae8902db2f8d7e6',
|
||||
'info_dict': {
|
||||
'id': '1921551',
|
||||
'ext': 'mp4',
|
||||
'title': 'Titcums Compiloation I',
|
||||
'description': 'cum on tits',
|
||||
'uploader': 'dannyh78999',
|
||||
'uploader_id': '3056053',
|
||||
'upload_date': '20150822',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
@@ -54,7 +68,7 @@ class SpankwireIE(InfoExtractor):
|
||||
r'by:\s*<a [^>]*>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"',
|
||||
r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'</a> on (.+?) at \d+:\d+',
|
||||
@@ -67,9 +81,10 @@ class SpankwireIE(InfoExtractor):
|
||||
r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
video_urls = list(map(
|
||||
compat_urllib_parse_unquote,
|
||||
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
|
||||
videos = re.findall(
|
||||
r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
|
||||
heights = [int(video[0]) for video in videos]
|
||||
video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
|
||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||
password = self._search_regex(
|
||||
r'flashvars\.video_title = "([^"]+)',
|
||||
@@ -79,21 +94,22 @@ class SpankwireIE(InfoExtractor):
|
||||
video_urls))
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
for height, video_url in zip(heights, video_urls):
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
format = path.split('/')[4].split('_')[:2]
|
||||
resolution, bitrate_str = format
|
||||
format = "-".join(format)
|
||||
height = int(resolution.rstrip('Pp'))
|
||||
tbr = int(bitrate_str.rstrip('Kk'))
|
||||
formats.append({
|
||||
_, quality = path.split('/')[4].split('_')[:2]
|
||||
f = {
|
||||
'url': video_url,
|
||||
'resolution': resolution,
|
||||
'format': format,
|
||||
'tbr': tbr,
|
||||
'height': height,
|
||||
'format_id': format,
|
||||
})
|
||||
}
|
||||
tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
|
||||
if tbr:
|
||||
f.update({
|
||||
'tbr': int(tbr),
|
||||
'format_id': '%dp' % height,
|
||||
})
|
||||
else:
|
||||
f['format_id'] = quality
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
@@ -38,10 +38,12 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'upload_date': '20140825',
|
||||
'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
|
||||
'timestamp': 1408976060,
|
||||
'duration': 2732,
|
||||
'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'categories': ['Li-Ning Badminton WM 2014'],
|
||||
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -50,7 +52,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
sport_id = mobj.group('sport')
|
||||
|
||||
api_url = 'http://splink.tv/api/permalinks/%s/%s' % (
|
||||
api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
|
||||
sport_id, video_id)
|
||||
req = compat_urllib_request.Request(api_url, headers={
|
||||
'Accept': 'application/vnd.vidibus.v2.html+json',
|
||||
@@ -58,12 +60,11 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
})
|
||||
data = self._download_json(req, video_id)
|
||||
|
||||
categories = list(data.get('section', {}).get('tags', {}).values())
|
||||
asset = data['asset']
|
||||
assets_info = self._download_json(asset['url'], video_id)
|
||||
categories = [data['section']['title']]
|
||||
|
||||
formats = []
|
||||
smil_url = assets_info['video']
|
||||
smil_url = asset['video']
|
||||
if '.smil' in smil_url:
|
||||
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
||||
formats.extend(
|
||||
@@ -91,6 +92,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'title': asset['title'],
|
||||
'thumbnail': asset.get('image'),
|
||||
'description': asset.get('teaser'),
|
||||
'duration': asset.get('duration'),
|
||||
'categories': categories,
|
||||
'view_count': asset.get('views'),
|
||||
'rtmp_live': asset.get('live'),
|
||||
|
||||
@@ -6,7 +6,7 @@ from .mitele import MiTeleIE
|
||||
|
||||
class TelecincoIE(MiTeleIE):
|
||||
IE_NAME = 'telecinco.es'
|
||||
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
|
||||
_VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||
@@ -23,4 +23,7 @@ class TelecincoIE(MiTeleIE):
|
||||
}, {
|
||||
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
35
youtube_dl/extractor/telegraaf.py
Normal file
35
youtube_dl/extractor/telegraaf.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class TelegraafIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
|
||||
'md5': '83245a9779bcc4a24454bfd53c65b6dc',
|
||||
'info_dict': {
|
||||
'id': '24353229',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tikibad ontruimd wegens brand',
|
||||
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 33,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
|
||||
|
||||
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
@@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
import hmac
|
||||
import binascii
|
||||
@@ -10,7 +10,8 @@ import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -18,12 +19,69 @@ from ..utils import (
|
||||
xpath_with_ns,
|
||||
unsmuggle_url,
|
||||
int_or_none,
|
||||
url_basename,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||
|
||||
|
||||
class ThePlatformIE(InfoExtractor):
|
||||
class ThePlatformBaseIE(InfoExtractor):
|
||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||
meta = self._download_xml(smil_url, video_id, note=note)
|
||||
try:
|
||||
error_msg = next(
|
||||
n.attrib['abstract']
|
||||
for n in meta.findall(_x('.//smil:ref'))
|
||||
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
meta, smil_url, video_id, namespace=default_ns,
|
||||
# the parameters are from syfy.com, other sites may use others,
|
||||
# they also work for nbc.com
|
||||
f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
|
||||
transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
|
||||
|
||||
for _format in formats:
|
||||
ext = determine_ext(_format['url'])
|
||||
if ext == 'once':
|
||||
_format['ext'] = 'mp4'
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self._parse_smil_subtitles(meta, default_ns)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def get_metadata(self, path, video_id):
|
||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||
info = self._download_json(info_url, video_id)
|
||||
|
||||
subtitles = {}
|
||||
captions = info.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
||||
subtitles[lang] = [{
|
||||
'ext': 'srt' if mime == 'text/srt' else 'ttml',
|
||||
'url': src,
|
||||
}]
|
||||
|
||||
return {
|
||||
'title': info['title'],
|
||||
'subtitles': subtitles,
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
}
|
||||
|
||||
|
||||
class ThePlatformIE(ThePlatformBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
@@ -67,6 +125,20 @@ class ThePlatformIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
|
||||
'md5': '734f3790fb5fc4903da391beeebc4836',
|
||||
'info_dict': {
|
||||
'id': 'tdy_or_siri_150701',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPhone Siri’s sassy response to a math question has people talking',
|
||||
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
|
||||
'duration': 83.0,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1435752600,
|
||||
'upload_date': '20150701',
|
||||
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -101,6 +173,24 @@ class ThePlatformIE(InfoExtractor):
|
||||
path += '/media'
|
||||
path += '/' + video_id
|
||||
|
||||
qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
if 'guid' in qs_dict:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
|
||||
feed_id = None
|
||||
# feed id usually locates in the last script.
|
||||
# Seems there's no pattern for the interested script filename, so
|
||||
# I try one by one
|
||||
for script in reversed(scripts):
|
||||
feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
|
||||
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
|
||||
if feed_id is not None:
|
||||
break
|
||||
if feed_id is None:
|
||||
raise ExtractorError('Unable to find feed id')
|
||||
return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
|
||||
provider_id, feed_id, qs_dict['guid'][0]))
|
||||
|
||||
if smuggled_data.get('force_smil_url', False):
|
||||
smil_url = url
|
||||
elif mobj.group('config'):
|
||||
@@ -108,7 +198,11 @@ class ThePlatformIE(InfoExtractor):
|
||||
config_url = config_url.replace('swf/', 'config/')
|
||||
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||
config = self._download_json(config_url, video_id, 'Downloading config')
|
||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
if 'releaseUrl' in config:
|
||||
release_url = config['releaseUrl']
|
||||
else:
|
||||
release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
else:
|
||||
smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
|
||||
|
||||
@@ -116,95 +210,85 @@ class ThePlatformIE(InfoExtractor):
|
||||
if sig:
|
||||
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
|
||||
|
||||
meta = self._download_xml(smil_url, video_id)
|
||||
try:
|
||||
error_msg = next(
|
||||
n.attrib['abstract']
|
||||
for n in meta.findall(_x('.//smil:ref'))
|
||||
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
|
||||
|
||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)
|
||||
|
||||
subtitles = {}
|
||||
captions = info.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
||||
subtitles[lang] = [{
|
||||
'ext': 'srt' if mime == 'text/srt' else 'ttml',
|
||||
'url': src,
|
||||
}]
|
||||
|
||||
head = meta.find(_x('smil:head'))
|
||||
body = meta.find(_x('smil:body'))
|
||||
|
||||
f4m_node = body.find(_x('smil:seq//smil:video'))
|
||||
if f4m_node is None:
|
||||
f4m_node = body.find(_x('smil:seq/smil:video'))
|
||||
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
|
||||
f4m_url = f4m_node.attrib['src']
|
||||
if 'manifest.f4m?' not in f4m_url:
|
||||
f4m_url += '?'
|
||||
# the parameters are from syfy.com, other sites may use others,
|
||||
# they also work for nbc.com
|
||||
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
else:
|
||||
formats = []
|
||||
switch = body.find(_x('smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par//smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par/smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par'))
|
||||
if switch is not None:
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int_or_none(attr.get('width'))
|
||||
height = int_or_none(attr.get('height'))
|
||||
vbr = int_or_none(attr.get('system-bitrate'), 1000)
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
else:
|
||||
switch = body.find(_x('smil:seq//smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:seq/smil:switch'))
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
vbr = int_or_none(attr.get('system-bitrate'), 1000)
|
||||
ext = determine_ext(attr['src'])
|
||||
if ext == 'once':
|
||||
ext = 'mp4'
|
||||
formats.append({
|
||||
'format_id': compat_str(vbr),
|
||||
'url': attr['src'],
|
||||
'vbr': vbr,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
ret = self.get_metadata(path, video_id)
|
||||
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
|
||||
ret.update({
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
}
|
||||
'subtitles': combined_subtitles,
|
||||
})
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
|
||||
_TEST = {
|
||||
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||
'md5': '22d2b84f058d3586efcd99e57d59d314',
|
||||
'info_dict': {
|
||||
'id': 'n_hardball_5biden_140207',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Biden factor: will Joe run in 2016?',
|
||||
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140208',
|
||||
'timestamp': 1391824260,
|
||||
'duration': 467.0,
|
||||
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
provider_id = mobj.group('provider_id')
|
||||
feed_id = mobj.group('feed_id')
|
||||
|
||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
|
||||
feed = self._download_json(real_url, video_id)
|
||||
entry = feed['entries'][0]
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
first_video_id = None
|
||||
duration = None
|
||||
for item in entry['media$content']:
|
||||
smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
|
||||
cur_video_id = url_basename(smil_url)
|
||||
if first_video_id is None:
|
||||
first_video_id = cur_video_id
|
||||
duration = float_or_none(item.get('plfile$duration'))
|
||||
cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
|
||||
formats.extend(cur_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail['plfile$url'],
|
||||
'width': int_or_none(thumbnail.get('plfile$width')),
|
||||
'height': int_or_none(thumbnail.get('plfile$height')),
|
||||
} for thumbnail in entry.get('media$thumbnails', [])]
|
||||
|
||||
timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
|
||||
categories = [item['media$name'] for item in entry.get('media$categories', [])]
|
||||
|
||||
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
|
||||
subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
|
||||
ret.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'categories': categories,
|
||||
})
|
||||
|
||||
return ret
|
||||
|
||||
@@ -60,9 +60,7 @@ class TubiTvIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
|
||||
raise ExtractorError(
|
||||
'This video requires login, use --username and --password '
|
||||
'options to provide account credentials.', expected=True)
|
||||
self.raise_login_required('This video requires login')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
@@ -29,6 +29,8 @@ class TudouIE(InfoExtractor):
|
||||
}
|
||||
}]
|
||||
|
||||
_PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
|
||||
|
||||
def _url_for_id(self, id, quality=None):
|
||||
info_url = "http://v2.tudou.com/f?id=" + str(id)
|
||||
if quality:
|
||||
@@ -54,6 +56,10 @@ class TudouIE(InfoExtractor):
|
||||
thumbnail_url = self._search_regex(
|
||||
r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False)
|
||||
|
||||
player_url = self._search_regex(
|
||||
r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']",
|
||||
webpage, 'player URL', default=self._PLAYER_URL)
|
||||
|
||||
segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments')
|
||||
segments = json.loads(segs_json)
|
||||
# It looks like the keys are the arguments that have to be passed as
|
||||
@@ -76,6 +82,9 @@ class TudouIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
'http_headers': {
|
||||
'Referer': player_url,
|
||||
},
|
||||
}
|
||||
result.append(part_info)
|
||||
|
||||
|
||||
@@ -104,6 +104,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'duration': 1492,
|
||||
'timestamp': 1330522854,
|
||||
'upload_date': '20120229',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
|
||||
@@ -1,19 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TweakersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
|
||||
'md5': '1b5afa817403bb5baa08359dca31e6df',
|
||||
'md5': '3147e4ddad366f97476a93863e4557c8',
|
||||
'info_dict': {
|
||||
'id': '9926',
|
||||
'ext': 'mp4',
|
||||
@@ -25,41 +19,7 @@ class TweakersIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
playlist = self._download_xml(
|
||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id,
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||
thumbnail = xpath_text(
|
||||
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'),
|
||||
1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
playlist_id = self._match_id(url)
|
||||
entries = self._extract_xspf_playlist(
|
||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id)
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
||||
@@ -7,12 +7,17 @@ import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@@ -23,7 +28,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'http://usher.twitch.tv'
|
||||
_LOGIN_URL = 'https://secure.twitch.tv/login'
|
||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
|
||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
@@ -66,8 +71,15 @@ class TwitchBaseIE(InfoExtractor):
|
||||
'password': password.encode('utf-8'),
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_POST_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
@@ -129,14 +141,14 @@ class TwitchItemBaseIE(TwitchBaseIE):
|
||||
def _extract_info(self, info):
|
||||
return {
|
||||
'id': info['_id'],
|
||||
'title': info['title'],
|
||||
'description': info['description'],
|
||||
'duration': info['length'],
|
||||
'thumbnail': info['preview'],
|
||||
'uploader': info['channel']['display_name'],
|
||||
'uploader_id': info['channel']['name'],
|
||||
'timestamp': parse_iso8601(info['recorded_at']),
|
||||
'view_count': info['views'],
|
||||
'title': info.get('title') or 'Untitled Broadcast',
|
||||
'description': info.get('description'),
|
||||
'duration': int_or_none(info.get('length')),
|
||||
'thumbnail': info.get('preview'),
|
||||
'uploader': info.get('channel', {}).get('display_name'),
|
||||
'uploader_id': info.get('channel', {}).get('name'),
|
||||
'timestamp': parse_iso8601(info.get('recorded_at')),
|
||||
'view_count': int_or_none(info.get('views')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -184,8 +196,8 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
_ITEM_TYPE = 'vod'
|
||||
_ITEM_SHORTCUT = 'v'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.twitch.tv/riotgames/v/6528877',
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
|
||||
'info_dict': {
|
||||
'id': 'v6528877',
|
||||
'ext': 'mp4',
|
||||
@@ -197,12 +209,32 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
'uploader': 'Riot Games',
|
||||
'uploader_id': 'riotgames',
|
||||
'view_count': int,
|
||||
'start_time': 310,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Untitled broadcast (title is None)
|
||||
'url': 'http://www.twitch.tv/belkao_o/v/11230755',
|
||||
'info_dict': {
|
||||
'id': 'v11230755',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled Broadcast',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1638,
|
||||
'timestamp': 1439746708,
|
||||
'upload_date': '20150816',
|
||||
'uploader': 'BelkAO_o',
|
||||
'uploader_id': 'belkao_o',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
@@ -216,6 +248,12 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
item_id, 'mp4')
|
||||
self._prefer_source(formats)
|
||||
info['formats'] = formats
|
||||
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
query = compat_parse_qs(parsed_url.query)
|
||||
if 't' in query:
|
||||
info['start_time'] = parse_duration(query['t'][0])
|
||||
|
||||
return info
|
||||
|
||||
|
||||
|
||||
@@ -70,9 +70,7 @@ class UdemyIE(InfoExtractor):
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(
|
||||
'Udemy account is required, use --username and --password options to provide account credentials.',
|
||||
expected=True)
|
||||
self.raise_login_required('Udemy account is required')
|
||||
|
||||
login_popup = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class VideoBamIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://videobam.com/OiJQM',
|
||||
'md5': 'db471f27763a531f10416a0c58b5a1e0',
|
||||
'info_dict': {
|
||||
'id': 'OiJQM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Is Alcohol Worse Than Ecstasy?',
|
||||
'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
|
||||
'uploader': 'frihetsvinge',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://videobam.com/pqLvq',
|
||||
'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
|
||||
'note': 'HD video',
|
||||
'info_dict': {
|
||||
'id': 'pqLvq',
|
||||
'ext': 'mp4',
|
||||
'title': '_',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
|
||||
|
||||
formats = []
|
||||
|
||||
for preference, format_id in enumerate(['low', 'high']):
|
||||
mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
|
||||
if not mobj:
|
||||
continue
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'preference': preference,
|
||||
})
|
||||
|
||||
if not formats:
|
||||
player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
|
||||
formats = [{
|
||||
'url': item['url'],
|
||||
'ext': 'mp4',
|
||||
} for item in player_config['playlist'] if 'autoPlay' in item]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(page, default='_', fatal=False)
|
||||
description = self._og_search_description(page, default=None)
|
||||
thumbnail = self._og_search_thumbnail(page)
|
||||
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
|
||||
view_count = int_or_none(
|
||||
self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VideoLecturesNetIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
|
||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
IE_NAME = 'videolectures.net'
|
||||
|
||||
_TEST = {
|
||||
|
||||
@@ -22,6 +22,27 @@ class VidmeIE(InfoExtractor):
|
||||
'timestamp': 1406313244,
|
||||
'upload_date': '20140725',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# tests uploader field
|
||||
'url': 'https://vid.me/4Iib',
|
||||
'info_dict': {
|
||||
'id': '4Iib',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Carver',
|
||||
'description': 'md5:e9c24870018ae8113be936645b93ba3c',
|
||||
'duration': 97.859999999999999,
|
||||
'timestamp': 1433203629,
|
||||
'upload_date': '20150602',
|
||||
'uploader': 'Thomas',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching
|
||||
@@ -40,16 +61,23 @@ class VidmeIE(InfoExtractor):
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default='')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
|
||||
timestamp = int_or_none(self._og_search_property(
|
||||
'updated_time', webpage, fatal=False))
|
||||
width = int_or_none(self._og_search_property(
|
||||
'video:width', webpage, fatal=False))
|
||||
height = int_or_none(self._og_search_property(
|
||||
'video:height', webpage, fatal=False))
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
|
||||
r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?',
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
||||
webpage, 'like count', fatal=False))
|
||||
uploader = self._html_search_regex(
|
||||
'class="video_author_username"[^>]*>([^<]+)',
|
||||
webpage, 'uploader', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -63,4 +91,5 @@ class VidmeIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
@@ -5,11 +5,13 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
HEADRequest,
|
||||
)
|
||||
|
||||
|
||||
@@ -62,7 +64,6 @@ class ViewsterIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
_ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
|
||||
_AUTH_TOKEN = '/YqhSYsx8EaU9Bsta3ojlA=='
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True):
|
||||
request = compat_urllib_request.Request(url)
|
||||
@@ -72,6 +73,10 @@ class ViewsterIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# Get 'api_token' cookie
|
||||
self._request_webpage(HEADRequest(url), video_id)
|
||||
cookies = self._get_cookies(url)
|
||||
self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
|
||||
|
||||
info = self._download_json(
|
||||
'https://public-api.viewster.com/search/%s' % video_id,
|
||||
|
||||
@@ -29,6 +29,7 @@ from ..utils import (
|
||||
class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
_LOGIN_REQUIRED = False
|
||||
_LOGIN_URL = 'https://vimeo.com/log_in'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -37,21 +38,25 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://vimeo.com/log_in'
|
||||
webpage = self._download_webpage(login_url, None, False)
|
||||
token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
|
||||
webpage = self._download_webpage(self._LOGIN_URL, None, False)
|
||||
token = self._extract_xsrft(webpage)
|
||||
data = urlencode_postdata({
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'action': 'login',
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
login_request.add_header('Referer', self._LOGIN_URL)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _extract_xsrft(self, webpage):
|
||||
return self._search_regex(
|
||||
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
webpage, 'login token', group='xsrft')
|
||||
|
||||
|
||||
class VimeoIE(VimeoBaseInfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
@@ -193,7 +198,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
token = self._extract_xsrft(webpage)
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
@@ -203,7 +208,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = compat_urllib_request.Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
password_request.add_header('Referer', url)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
@@ -422,10 +427,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class VimeoChannelIE(InfoExtractor):
|
||||
class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vimeo:channel'
|
||||
_VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
_TITLE = None
|
||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/channels/tributes',
|
||||
@@ -440,7 +446,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
return '%s/videos/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||
return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||
|
||||
def _login_list_password(self, page_url, list_id, webpage):
|
||||
login_form = self._search_regex(
|
||||
@@ -453,7 +459,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
if password is None:
|
||||
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||
fields = self._hidden_inputs(login_form)
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
token = self._extract_xsrft(webpage)
|
||||
fields['token'] = token
|
||||
fields['password'] = password
|
||||
post = urlencode_postdata(fields)
|
||||
@@ -499,7 +505,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
|
||||
class VimeoUserIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:user'
|
||||
_VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||
_VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/nkistudio/videos',
|
||||
@@ -603,14 +609,14 @@ class VimeoReviewIE(InfoExtractor):
|
||||
return self.url_result(player_url, 'Vimeo', video_id)
|
||||
|
||||
|
||||
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
class VimeoWatchLaterIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:watchlater'
|
||||
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
|
||||
_VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
|
||||
_VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
|
||||
_TITLE = 'Watch Later'
|
||||
_LOGIN_REQUIRED = True
|
||||
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/home/watchlater',
|
||||
'url': 'https://vimeo.com/watchlater',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -626,7 +632,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
return request
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
|
||||
|
||||
|
||||
class VimeoLikesIE(InfoExtractor):
|
||||
|
||||
@@ -4,7 +4,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
int_or_none,
|
||||
@@ -22,7 +21,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'FemaleAgent Shy beauty takes the bait',
|
||||
'upload_date': '20121014',
|
||||
'uploader_id': 'Ruseful2011',
|
||||
'uploader': 'Ruseful2011',
|
||||
'duration': 893,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -34,7 +33,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Britney Spears Sexy Booty',
|
||||
'upload_date': '20130914',
|
||||
'uploader_id': 'jojo747400',
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200,
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -46,12 +45,12 @@ class XHamsterIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
def extract_video_url(webpage):
|
||||
mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage)
|
||||
if mp4 is None:
|
||||
raise ExtractorError('Unable to extract media URL')
|
||||
else:
|
||||
return mp4.group(1)
|
||||
def extract_video_url(webpage, name):
|
||||
return self._search_regex(
|
||||
[r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
|
||||
r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
|
||||
r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
|
||||
webpage, name, group='mp4')
|
||||
|
||||
def is_hd(webpage):
|
||||
return '<div class=\'icon iconHD\'' in webpage
|
||||
@@ -75,10 +74,14 @@ class XHamsterIE(InfoExtractor):
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
|
||||
webpage, 'uploader id', default='anonymous')
|
||||
uploader = self._html_search_regex(
|
||||
r"<a href='[^']+xhamster\.com/user/[^>]+>(?P<uploader>[^<]+)",
|
||||
webpage, 'uploader', default='anonymous')
|
||||
|
||||
thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
|
||||
thumbnail = self._search_regex(
|
||||
[r'''thumb\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
|
||||
r'''<video[^>]+poster=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
|
||||
duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
|
||||
webpage, 'duration', fatal=False))
|
||||
@@ -97,7 +100,9 @@ class XHamsterIE(InfoExtractor):
|
||||
|
||||
hd = is_hd(webpage)
|
||||
|
||||
video_url = extract_video_url(webpage)
|
||||
format_id = 'hd' if hd else 'sd'
|
||||
|
||||
video_url = extract_video_url(webpage, format_id)
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': 'hd' if hd else 'sd',
|
||||
@@ -108,7 +113,7 @@ class XHamsterIE(InfoExtractor):
|
||||
mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url')
|
||||
webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
|
||||
if is_hd(webpage):
|
||||
video_url = extract_video_url(webpage)
|
||||
video_url = extract_video_url(webpage, 'hd')
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'hd',
|
||||
@@ -122,7 +127,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
|
||||
@@ -1,18 +1,38 @@
|
||||
# coding=utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class YandexMusicBaseIE(InfoExtractor):
|
||||
class YandexMusicTrackIE(InfoExtractor):
|
||||
IE_NAME = 'yandexmusic:track'
|
||||
IE_DESC = 'Яндекс.Музыка - Трек'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
||||
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
||||
'info_dict': {
|
||||
'id': '4878838',
|
||||
'ext': 'mp3',
|
||||
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
|
||||
'filesize': 4628061,
|
||||
'duration': 193.04,
|
||||
}
|
||||
}
|
||||
|
||||
def _get_track_url(self, storage_dir, track_id):
|
||||
data = self._download_json(
|
||||
'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
|
||||
@@ -35,24 +55,6 @@ class YandexMusicBaseIE(InfoExtractor):
|
||||
'duration': float_or_none(track.get('durationMs'), 1000),
|
||||
}
|
||||
|
||||
|
||||
class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
IE_NAME = 'yandexmusic:track'
|
||||
IE_DESC = 'Яндекс.Музыка - Трек'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
||||
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
||||
'info_dict': {
|
||||
'id': '4878838',
|
||||
'ext': 'mp3',
|
||||
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
|
||||
'filesize': 4628061,
|
||||
'duration': 193.04,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
album_id, track_id = mobj.group('album_id'), mobj.group('id')
|
||||
@@ -64,7 +66,15 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
return self._get_track_info(track)
|
||||
|
||||
|
||||
class YandexMusicAlbumIE(YandexMusicBaseIE):
|
||||
class YandexMusicPlaylistBaseIE(InfoExtractor):
|
||||
def _build_playlist(self, tracks):
|
||||
return [
|
||||
self.url_result(
|
||||
'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
|
||||
for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
|
||||
|
||||
|
||||
class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
|
||||
IE_NAME = 'yandexmusic:album'
|
||||
IE_DESC = 'Яндекс.Музыка - Альбом'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
|
||||
@@ -85,7 +95,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
|
||||
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
|
||||
album_id, 'Downloading album JSON')
|
||||
|
||||
entries = [self._get_track_info(track) for track in album['volumes'][0]]
|
||||
entries = self._build_playlist(album['volumes'][0])
|
||||
|
||||
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
|
||||
year = album.get('year')
|
||||
@@ -95,12 +105,12 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
|
||||
return self.playlist_result(entries, compat_str(album['id']), title)
|
||||
|
||||
|
||||
class YandexMusicPlaylistIE(YandexMusicBaseIE):
|
||||
class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
|
||||
IE_NAME = 'yandexmusic:playlist'
|
||||
IE_DESC = 'Яндекс.Музыка - Плейлист'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
|
||||
'info_dict': {
|
||||
'id': '1245',
|
||||
@@ -108,20 +118,54 @@ class YandexMusicPlaylistIE(YandexMusicBaseIE):
|
||||
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}
|
||||
}, {
|
||||
# playlist exceeding the limit of 150 tracks shipped with webpage (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6666)
|
||||
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
|
||||
'info_dict': {
|
||||
'id': '1036',
|
||||
'title': 'Музыка 90-х',
|
||||
},
|
||||
'playlist_count': 310,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
mu = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
|
||||
playlist_id)['pageData']['playlist']
|
||||
playlist_id)
|
||||
|
||||
entries = [self._get_track_info(track) for track in playlist['tracks']]
|
||||
playlist = mu['pageData']['playlist']
|
||||
tracks, track_ids = playlist['tracks'], playlist['trackIds']
|
||||
|
||||
# tracks dictionary shipped with webpage is limited to 150 tracks,
|
||||
# missing tracks should be retrieved manually.
|
||||
if len(tracks) < len(track_ids):
|
||||
present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
|
||||
missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
|
||||
request = compat_urllib_request.Request(
|
||||
'https://music.yandex.ru/handlers/track-entries.jsx',
|
||||
compat_urllib_parse.urlencode({
|
||||
'entries': ','.join(missing_track_ids),
|
||||
'lang': mu.get('settings', {}).get('lang', 'en'),
|
||||
'external-domain': 'music.yandex.ru',
|
||||
'overembed': 'false',
|
||||
'sign': mu.get('authData', {}).get('user', {}).get('sign'),
|
||||
'strict': 'true',
|
||||
}).encode('utf-8'))
|
||||
request.add_header('Referer', url)
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
|
||||
missing_tracks = self._download_json(
|
||||
request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
|
||||
if missing_tracks:
|
||||
tracks.extend(missing_tracks)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(playlist_id),
|
||||
self._build_playlist(tracks),
|
||||
compat_str(playlist_id),
|
||||
playlist['title'], playlist.get('description'))
|
||||
|
||||
@@ -33,9 +33,12 @@ from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
uppercase_escape,
|
||||
ISO3166Utils,
|
||||
)
|
||||
@@ -44,7 +47,7 @@ from ..utils import (
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
|
||||
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
@@ -126,40 +129,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# Two-Factor
|
||||
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
|
||||
tfa_code = self._get_tfa_info()
|
||||
if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
|
||||
tfa_code = self._get_tfa_info('2-step verification code')
|
||||
|
||||
if tfa_code is None:
|
||||
self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
|
||||
self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
if not tfa_code:
|
||||
self._downloader.report_warning(
|
||||
'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
|
||||
'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
return False
|
||||
|
||||
# Unlike the first login form, secTok and timeStmp are both required for the TFA form
|
||||
tfa_code = remove_start(tfa_code, 'G-')
|
||||
|
||||
match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||
if match is None:
|
||||
self._downloader.report_warning('Failed to get secTok - did the page structure change?')
|
||||
secTok = match.group(1)
|
||||
match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||
if match is None:
|
||||
self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
|
||||
timeStmp = match.group(1)
|
||||
tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
|
||||
|
||||
tfa_form_strs = {
|
||||
'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
|
||||
'smsToken': '',
|
||||
'smsUserPin': tfa_code,
|
||||
'smsVerifyPin': 'Verify',
|
||||
tfa_form_strs.update({
|
||||
'Pin': tfa_code,
|
||||
'TrustDevice': 'on',
|
||||
})
|
||||
|
||||
'PersistentCookie': 'yes',
|
||||
'checkConnection': '',
|
||||
'checkedDomains': 'youtube',
|
||||
'pstMsg': '1',
|
||||
'secTok': secTok,
|
||||
'timeStmp': timeStmp,
|
||||
'service': 'youtube',
|
||||
'hl': 'en_US',
|
||||
}
|
||||
tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
|
||||
tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
|
||||
|
||||
@@ -171,8 +158,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
if tfa_results is False:
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
|
||||
self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
||||
if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
|
||||
self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
|
||||
return False
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
||||
self._downloader.report_warning('unable to log in - did the page structure change?')
|
||||
@@ -211,11 +198,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
(?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
v=
|
||||
)
|
||||
))
|
||||
|youtu\.be/ # just youtu.be/xxxx
|
||||
|(?:
|
||||
youtu\.be| # just youtu.be/xxxx
|
||||
vid\.plus # or vid.plus/xxxx
|
||||
)/
|
||||
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||
)
|
||||
)? # all until now is optional -> you can pass the naked ID
|
||||
@@ -329,6 +319,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'upload_date': '20121002',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'start_time': 1,
|
||||
@@ -343,7 +334,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120506',
|
||||
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
||||
'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
|
||||
'description': 'md5:782e8651347686cba06e58f71ab51773',
|
||||
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
|
||||
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
|
||||
'iconic ep', 'iconic', 'love', 'it'],
|
||||
'uploader': 'Icona Pop',
|
||||
'uploader_id': 'IconaPop',
|
||||
}
|
||||
@@ -359,6 +353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'description': 'md5:64249768eec3bc4276236606ea996373',
|
||||
'uploader': 'justintimberlakeVEVO',
|
||||
'uploader_id': 'justintimberlakeVEVO',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -374,6 +369,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': 'setindia'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
|
||||
'note': 'Use the first video ID in the URL',
|
||||
'info_dict': {
|
||||
'id': 'BaW_jenozKc',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'phihag',
|
||||
'upload_date': '20121002',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
|
||||
'note': '256k DASH audio (format 141) via DASH manifest',
|
||||
@@ -415,7 +430,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'id': 'nfWlot6h_JM',
|
||||
'ext': 'm4a',
|
||||
'title': 'Taylor Swift - Shake It Off',
|
||||
'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
|
||||
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
|
||||
'uploader': 'TaylorSwiftVEVO',
|
||||
'uploader_id': 'TaylorSwiftVEVO',
|
||||
'upload_date': '20140818',
|
||||
@@ -449,6 +464,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'The Witcher',
|
||||
'uploader_id': 'WitcherGame',
|
||||
'upload_date': '20140605',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
# Age-gate video with encrypted signature
|
||||
@@ -462,6 +478,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'LloydVEVO',
|
||||
'uploader_id': 'LloydVEVO',
|
||||
'upload_date': '20110629',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
||||
@@ -486,7 +503,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'lqQg6PlCWgI',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120731',
|
||||
'upload_date': '20120724',
|
||||
'uploader_id': 'olympic',
|
||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
||||
'uploader': 'Olympics',
|
||||
@@ -515,7 +532,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'qEJwOuvDf7I',
|
||||
'info_dict': {
|
||||
'id': 'qEJwOuvDf7I',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
|
||||
'description': '',
|
||||
'upload_date': '20150404',
|
||||
@@ -558,6 +575,63 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'format': '135', # bestvideo
|
||||
}
|
||||
},
|
||||
{
|
||||
# Multifeed videos (multiple cameras), URL is for Main Camera
|
||||
'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
|
||||
'info_dict': {
|
||||
'id': 'jqWvoWXjCVs',
|
||||
'title': 'teamPGP: Rocket League Noob Stream',
|
||||
'description': 'md5:dc7872fb300e143831327f1bae3af010',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'jqWvoWXjCVs',
|
||||
'ext': 'mp4',
|
||||
'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
|
||||
'description': 'md5:dc7872fb300e143831327f1bae3af010',
|
||||
'upload_date': '20150721',
|
||||
'uploader': 'Beer Games Beer',
|
||||
'uploader_id': 'beergamesbeer',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '6h8e8xoXJzg',
|
||||
'ext': 'mp4',
|
||||
'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
|
||||
'description': 'md5:dc7872fb300e143831327f1bae3af010',
|
||||
'upload_date': '20150721',
|
||||
'uploader': 'Beer Games Beer',
|
||||
'uploader_id': 'beergamesbeer',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'PUOgX5z9xZw',
|
||||
'ext': 'mp4',
|
||||
'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
|
||||
'description': 'md5:dc7872fb300e143831327f1bae3af010',
|
||||
'upload_date': '20150721',
|
||||
'uploader': 'Beer Games Beer',
|
||||
'uploader_id': 'beergamesbeer',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'teuwxikvS5k',
|
||||
'ext': 'mp4',
|
||||
'title': 'teamPGP: Rocket League Noob Stream (zim)',
|
||||
'description': 'md5:dc7872fb300e143831327f1bae3af010',
|
||||
'upload_date': '20150721',
|
||||
'uploader': 'Beer Games Beer',
|
||||
'uploader_id': 'beergamesbeer',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://vid.plus/FlRa-iH7PGw',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -586,7 +660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
id_m = re.match(
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
if not id_m:
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
@@ -889,6 +963,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
proto = (
|
||||
'http' if self._downloader.params.get('prefer_insecure', False)
|
||||
else 'https')
|
||||
@@ -1005,6 +1081,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'"token" parameter not in video info for unknown reason',
|
||||
video_id=video_id)
|
||||
|
||||
# title
|
||||
if 'title' in video_info:
|
||||
video_title = video_info['title'][0]
|
||||
else:
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
video_title = '_'
|
||||
|
||||
# description
|
||||
video_description = get_element_by_id("eow-description", video_webpage)
|
||||
if video_description:
|
||||
video_description = re.sub(r'''(?x)
|
||||
<a\s+
|
||||
(?:[a-zA-Z-]+="[^"]+"\s+)*?
|
||||
title="([^"]+)"\s+
|
||||
(?:[a-zA-Z-]+="[^"]+"\s+)*?
|
||||
class="yt-uix-redirect-link"\s*>
|
||||
[^<]+
|
||||
</a>
|
||||
''', r'\1', video_description)
|
||||
video_description = clean_html(video_description)
|
||||
else:
|
||||
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
|
||||
if fd_mobj:
|
||||
video_description = unescapeHTML(fd_mobj.group(1))
|
||||
else:
|
||||
video_description = ''
|
||||
|
||||
if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
|
||||
if not self._downloader.params.get('noplaylist'):
|
||||
entries = []
|
||||
feed_ids = []
|
||||
multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
|
||||
for feed in multifeed_metadata_list.split(','):
|
||||
feed_data = compat_parse_qs(feed)
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Youtube',
|
||||
'url': smuggle_url(
|
||||
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
||||
{'force_singlefeed': True}),
|
||||
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
|
||||
})
|
||||
feed_ids.append(feed_data['id'][0])
|
||||
self.to_screen(
|
||||
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
||||
% (', '.join(feed_ids), video_id))
|
||||
return self.playlist_result(entries, video_id, video_title, video_description)
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
|
||||
if 'view_count' in video_info:
|
||||
view_count = int(video_info['view_count'][0])
|
||||
else:
|
||||
@@ -1030,13 +1155,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
self._downloader.report_warning('unable to extract uploader nickname')
|
||||
|
||||
# title
|
||||
if 'title' in video_info:
|
||||
video_title = video_info['title'][0]
|
||||
else:
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
video_title = '_'
|
||||
|
||||
# thumbnail image
|
||||
# We try first to get a high quality image:
|
||||
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||
@@ -1072,25 +1190,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
video_categories = None
|
||||
|
||||
# description
|
||||
video_description = get_element_by_id("eow-description", video_webpage)
|
||||
if video_description:
|
||||
video_description = re.sub(r'''(?x)
|
||||
<a\s+
|
||||
(?:[a-zA-Z-]+="[^"]+"\s+)*?
|
||||
title="([^"]+)"\s+
|
||||
(?:[a-zA-Z-]+="[^"]+"\s+)*?
|
||||
class="yt-uix-redirect-link"\s*>
|
||||
[^<]+
|
||||
</a>
|
||||
''', r'\1', video_description)
|
||||
video_description = clean_html(video_description)
|
||||
else:
|
||||
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
|
||||
if fd_mobj:
|
||||
video_description = unescapeHTML(fd_mobj.group(1))
|
||||
else:
|
||||
video_description = ''
|
||||
video_tags = [
|
||||
unescapeHTML(m.group('content'))
|
||||
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
||||
|
||||
def _extract_count(count_name):
|
||||
return str_to_int(self._search_regex(
|
||||
@@ -1187,7 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
r'html5player-([^/]+?)(?:/html5player)?\.js',
|
||||
r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
@@ -1260,6 +1362,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'categories': video_categories,
|
||||
'tags': video_tags,
|
||||
'subtitles': video_subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
'duration': video_duration,
|
||||
@@ -1666,7 +1769,7 @@ class YoutubeSearchURLIE(InfoExtractor):
|
||||
r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
|
||||
|
||||
part_codes = re.findall(
|
||||
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
|
||||
r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
|
||||
entries = []
|
||||
for part_code in part_codes:
|
||||
part_title = self._html_search_regex(
|
||||
|
||||
@@ -139,21 +139,24 @@ def write_json_file(obj, fn):
|
||||
|
||||
|
||||
if sys.version_info >= (2, 7):
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
def find_xpath_attr(node, xpath, key, val=None):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z-]+$', key)
|
||||
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
||||
expr = xpath + "[@%s='%s']" % (key, val)
|
||||
if val:
|
||||
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
||||
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
|
||||
return node.find(expr)
|
||||
else:
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
def find_xpath_attr(node, xpath, key, val=None):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
# .//node does not match if a node is a direct child of . !
|
||||
if isinstance(xpath, compat_str):
|
||||
xpath = xpath.encode('ascii')
|
||||
|
||||
for f in node.findall(xpath):
|
||||
if f.attrib.get(key) == val:
|
||||
if key not in f.attrib:
|
||||
continue
|
||||
if val is None or f.attrib.get(key) == val:
|
||||
return f
|
||||
return None
|
||||
|
||||
@@ -576,11 +579,9 @@ class ContentTooShortError(Exception):
|
||||
download is too small for what the server announced first, indicating
|
||||
the connection was probably interrupted.
|
||||
"""
|
||||
# Both in bytes
|
||||
downloaded = None
|
||||
expected = None
|
||||
|
||||
def __init__(self, downloaded, expected):
|
||||
# Both in bytes
|
||||
self.downloaded = downloaded
|
||||
self.expected = expected
|
||||
|
||||
@@ -650,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
return ret
|
||||
|
||||
def http_request(self, req):
|
||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
|
||||
# the code of this workaround has been moved here from YoutubeDL.urlopen()
|
||||
url = req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
|
||||
new_req = req_type(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
new_req.timeout = req.timeout
|
||||
req = new_req
|
||||
|
||||
for h, v in std_headers.items():
|
||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||
# The dict keys are capitalized because of this bug by urllib
|
||||
@@ -694,6 +715,17 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
gz = io.BytesIO(self.deflate(resp.read()))
|
||||
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
|
||||
resp.msg = old_resp.msg
|
||||
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986
|
||||
if 300 <= resp.code < 400:
|
||||
location = resp.headers.get('Location')
|
||||
if location:
|
||||
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
|
||||
if sys.version_info >= (3, 0):
|
||||
location = location.encode('iso-8859-1').decode('utf-8')
|
||||
location_escaped = escape_url(location)
|
||||
if location != location_escaped:
|
||||
del resp.headers['Location']
|
||||
resp.headers['Location'] = location_escaped
|
||||
return resp
|
||||
|
||||
https_request = http_request
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.07.28'
|
||||
__version__ = '2015.08.28'
|
||||
|
||||
Reference in New Issue
Block a user