Compare commits
274 Commits
2016.02.01
...
2016.03.01
Author | SHA1 | Date | |
---|---|---|---|
|
e781ab63db | ||
|
3e76968220 | ||
|
2812c24c16 | ||
|
4b3cd7316c | ||
|
6dae56384a | ||
|
2b2dfae83e | ||
|
6c10dbeae9 | ||
|
9173202b84 | ||
|
8870bb4653 | ||
|
7a0e7779fe | ||
|
a048ffc9b0 | ||
|
4587915b2a | ||
|
da665ddc25 | ||
|
5add979d91 | ||
|
20afe8bd14 | ||
|
940b606a07 | ||
|
9505053704 | ||
|
2c9ca78281 | ||
|
63719a8ac3 | ||
|
8fab62482a | ||
|
d6e9c2706f | ||
|
f7f2e53a0a | ||
|
9cdffeeb3f | ||
|
fbb6edd298 | ||
|
5eb6bdced4 | ||
|
5633b4d39d | ||
|
4435c6e98e | ||
|
2ebd2eac88 | ||
|
b78b292f0c | ||
|
efbd6fb8bb | ||
|
680079be39 | ||
|
e4fc8d2ebe | ||
|
f52354a889 | ||
|
59f898b7a7 | ||
|
8f4a2124a9 | ||
|
481888294d | ||
|
d1e440a4a1 | ||
|
81bdc8fdf6 | ||
|
e048d87fc9 | ||
|
e26cde0927 | ||
|
20108c6b90 | ||
|
9195ef745a | ||
|
d0459c530d | ||
|
f160785c5c | ||
|
5c0a57185c | ||
|
43479d9e9d | ||
|
c0da50d2b2 | ||
|
c24883a1c0 | ||
|
1b77ee6248 | ||
|
bf4b3b6bd9 | ||
|
efbeddead3 | ||
|
3cfeb1624a | ||
|
b95dc034ca | ||
|
86a7dbe66e | ||
|
b43a7a92cd | ||
|
6563d31710 | ||
|
cf89ba9eff | ||
|
9b01272832 | ||
|
58525c94d5 | ||
|
621bd0cda9 | ||
|
1610f770d7 | ||
|
0fc871d2f0 | ||
|
1ad6143061 | ||
|
92da3cd848 | ||
|
6212bcb191 | ||
|
d69abbd3f0 | ||
|
1d00a8823e | ||
|
5d6e1011df | ||
|
f5bdb44443 | ||
|
7efc1c2b49 | ||
|
132e3b74bd | ||
|
bdbf4ba40e | ||
|
acb6e97e6a | ||
|
445d72b8b5 | ||
|
92c5e11b40 | ||
|
0dd046c16c | ||
|
305168ca3e | ||
|
b72f6163dc | ||
|
33d4fdabfa | ||
|
cafcf657a4 | ||
|
7360db05b4 | ||
|
765ac263db | ||
|
a4e4d7dfcd | ||
|
73f9c2867d | ||
|
9c86d50916 | ||
|
1d14c75f55 | ||
|
99709cc3f1 | ||
|
5bc880b988 | ||
|
958759f44b | ||
|
86bf29050e | ||
|
04cbc4980d | ||
|
8765151c8a | ||
|
8ec64ac683 | ||
|
ed8648a322 | ||
|
88641243ab | ||
|
40e146aa1e | ||
|
f3f9cd9234 | ||
|
ebf1b291d0 | ||
|
bc7a9cd8fb | ||
|
d48502b82a | ||
|
479ec54a8d | ||
|
49625662a9 | ||
|
8b809a079a | ||
|
778433cb90 | ||
|
411cb8f476 | ||
|
63bf4f0dc0 | ||
|
80e59a0d5d | ||
|
8bbd3d1476 | ||
|
e725e4bced | ||
|
08d65046f0 | ||
|
44b9745000 | ||
|
9654fc875b | ||
|
0f425e65ec | ||
|
e277f2a63b | ||
|
f4db09178a | ||
|
86be3cdc2a | ||
|
cb64ccc715 | ||
|
f66a3c7bc2 | ||
|
fe80df3080 | ||
|
1932476c13 | ||
|
d2c1f79f20 | ||
|
8eacae8cf9 | ||
|
c8a80fd818 | ||
|
b9e8d7140a | ||
|
6eff2605d6 | ||
|
fd7a3ea4a4 | ||
|
8d3eeb36d7 | ||
|
8e0548e180 | ||
|
a517bb4b1e | ||
|
9dcefb23a1 | ||
|
d9da74bc06 | ||
|
5e19323ed9 | ||
|
611c1dd96e | ||
|
d800609c62 | ||
|
c78c9cd10d | ||
|
e76394f36c | ||
|
080e09557d | ||
|
fca2e6d5a6 | ||
|
b45f2b1d6e | ||
|
fc2e70ee90 | ||
|
b4561e857f | ||
|
7023251239 | ||
|
e2bd68c901 | ||
|
35ced3985a | ||
|
3e18700d45 | ||
|
f9f49d87c2 | ||
|
6863631c26 | ||
|
9d939cec48 | ||
|
4c77d3f52a | ||
|
7be747b921 | ||
|
bb20526b64 | ||
|
bcbb1b08b2 | ||
|
3d98f97c64 | ||
|
c349456ef6 | ||
|
5a4905924d | ||
|
b826035dd5 | ||
|
a7cab4d039 | ||
|
fc3810f6d1 | ||
|
3dc71d82ce | ||
|
9c7b38981c | ||
|
8b85ac3fd9 | ||
|
81e1c4e2fc | ||
|
388ae76b52 | ||
|
b67d63149d | ||
|
28280e8ded | ||
|
6b3fbd3425 | ||
|
a7ab46375b | ||
|
b14d5e26f6 | ||
|
9a61dfba0c | ||
|
154c209e2d | ||
|
d1ea5e171f | ||
|
a1188d0ed0 | ||
|
47d205a646 | ||
|
80f772c28a | ||
|
f817d9bec1 | ||
|
e2effb08a4 | ||
|
7fcea295c5 | ||
|
cc799437ea | ||
|
89d23f37f2 | ||
|
b92071ef00 | ||
|
47246ae26c | ||
|
9c15869c28 | ||
|
51e9094f4a | ||
|
5e3a6fec33 | ||
|
c43fe0268c | ||
|
d413095f7e | ||
|
1bedf4de06 | ||
|
3967a761f4 | ||
|
b081350bd9 | ||
|
16f1430ba6 | ||
|
085ad71157 | ||
|
35972ba172 | ||
|
3834d3e35c | ||
|
8d0a2a2a4e | ||
|
11c0339bec | ||
|
915dd77783 | ||
|
b6bfa6fb79 | ||
|
f070197bd7 | ||
|
5a7699bb2e | ||
|
8628d26f38 | ||
|
8411229bd5 | ||
|
72b9ebc65d | ||
|
3b799ca14c | ||
|
0474512e30 | ||
|
f0905c6ec3 | ||
|
86296ad2cd | ||
|
52f5889f77 | ||
|
81e0b4f2d1 | ||
|
cbecc9b903 | ||
|
b8b465af3e | ||
|
59b35c6745 | ||
|
7032833011 | ||
|
f406c78785 | ||
|
f326b5837a | ||
|
5dd4b3468f | ||
|
d4f8e83404 | ||
|
7b8b007cd9 | ||
|
3547d26587 | ||
|
7e62c2eb6d | ||
|
56401e1e5f | ||
|
860db2d508 | ||
|
4b8874975c | ||
|
bd6b6f6622 | ||
|
4340727e6c | ||
|
3ceccade87 | ||
|
28ad7df65d | ||
|
79a3508579 | ||
|
1b840245bd | ||
|
6a3828fddd | ||
|
91cb6b5065 | ||
|
0826a0b555 | ||
|
bcbbb98bfe | ||
|
66159b38aa | ||
|
23d17e4beb | ||
|
d97b0e3241 | ||
|
eb2533ec4c | ||
|
b7b365067f | ||
|
86e284e028 | ||
|
d9e543b680 | ||
|
c773c232d8 | ||
|
58ae24336a | ||
|
7d3a035ee0 | ||
|
e06e75c7e7 | ||
|
593e0f43b4 | ||
|
008ab0f814 | ||
|
3f7e8750d4 | ||
|
f1ed3acae5 | ||
|
920d21b9d3 | ||
|
2fb35d1c28 | ||
|
09be85b8dd | ||
|
eadc3ccd50 | ||
|
255732f0d3 | ||
|
53c269c6fd | ||
|
675d001633 | ||
|
58be922079 | ||
|
c84d3a557d | ||
|
d577c79632 | ||
|
6ad2b01e14 | ||
|
fd3a1f3d60 | ||
|
87de7069b9 | ||
|
6fba62c87a | ||
|
f14be22816 | ||
|
1df4141196 | ||
|
fae45ede08 | ||
|
4e0cff2a50 | ||
|
9c74423510 | ||
|
5976e7ab57 | ||
|
a1a22572fb | ||
|
c11875b328 | ||
|
8ff648e4f9 | ||
|
1bac34556f | ||
|
0436157b95 | ||
|
cf57433bbd | ||
|
2b14cb566f |
5
AUTHORS
5
AUTHORS
@@ -156,3 +156,8 @@ Tom Gijselinck
|
||||
Founder Fang
|
||||
Andrew Alexeyew
|
||||
Saso Bezlaj
|
||||
Erwin de Haan
|
||||
Jens Wille
|
||||
Robin Houtevelts
|
||||
Patrick Griffis
|
||||
Aidan Rowe
|
||||
|
@@ -1,6 +1,6 @@
|
||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. add `-v` flag to your command line, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||
```
|
||||
$ youtube-dl -v http://www.youtube.com/watch?v=BaW_jenozKcj
|
||||
$ youtube-dl -v <your command line>
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
@@ -92,7 +92,9 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
@@ -140,16 +142,17 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
|
2
Makefile
2
Makefile
@@ -44,7 +44,7 @@ test:
|
||||
ot: offlinetest
|
||||
|
||||
offlinetest: codetest
|
||||
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
|
||||
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
|
145
README.md
145
README.md
@@ -409,13 +409,18 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime and use a proxy:
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
--extract-audio
|
||||
-x
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
|
||||
You can use `--ignore-config` if you want to disable the configuration file for a particular youtube-dl run.
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
@@ -440,30 +445,108 @@ On Windows you may also need to setup the `%HOME%` environment variable manually
|
||||
|
||||
# OUTPUT TEMPLATE
|
||||
|
||||
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
||||
The `-o` option allows users to indicate a template for the output file names.
|
||||
|
||||
- `id`: The sequence will be replaced by the video identifier.
|
||||
- `url`: The sequence will be replaced by the video URL.
|
||||
- `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video.
|
||||
- `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format.
|
||||
- `title`: The sequence will be replaced by the video title.
|
||||
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
|
||||
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
|
||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
||||
|
||||
- `id`: Video identifier
|
||||
- `title`: Video title
|
||||
- `url`: Video URL
|
||||
- `ext`: Video filename extension
|
||||
- `alt_title`: A secondary title of the video
|
||||
- `display_id`: An alternative identifier for the video
|
||||
- `uploader`: Full name of the video uploader
|
||||
- `creator`: The main artist who created the video
|
||||
- `release_date`: The date (YYYYMMDD) when the video was released
|
||||
- `timestamp`: UNIX timestamp of the moment the video became available
|
||||
- `upload_date`: Video upload date (YYYYMMDD)
|
||||
- `uploader_id`: Nickname or id of the video uploader
|
||||
- `location`: Physical location where the video was filmed
|
||||
- `duration`: Length of the video in seconds
|
||||
- `view_count`: How many users have watched the video on the platform
|
||||
- `like_count`: Number of positive ratings of the video
|
||||
- `dislike_count`: Number of negative ratings of the video
|
||||
- `repost_count`: Number of reposts of the video
|
||||
- `average_rating`: Average rating give by users, the scale used depends on the webpage
|
||||
- `comment_count`: Number of comments on the video
|
||||
- `age_limit`: Age restriction for the video (years)
|
||||
- `format`: A human-readable description of the format
|
||||
- `format_id`: Format code specified by `--format`
|
||||
- `format_note`: Additional info about the format
|
||||
- `width`: Width of the video
|
||||
- `height`: Height of the video
|
||||
- `resolution`: Textual description of width and height
|
||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `fps`: Frame rate
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `filesize`: The number of bytes, if known in advance
|
||||
- `filesize_approx`: An estimate for the number of bytes
|
||||
- `protocol`: The protocol that will be used for the actual download
|
||||
- `extractor`: Name of the extractor
|
||||
- `extractor_key`: Key name of the extractor
|
||||
- `epoch`: Unix epoch when creating the file
|
||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||
- `playlist`: Name or id of the playlist that contains the video
|
||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
- `chapter`: Name or title of the chapter the video belongs to
|
||||
- `chapter_number`: Number of the chapter the video belongs to
|
||||
- `chapter_id`: Id of the chapter the video belongs to
|
||||
|
||||
Available for the video that is an episode of some series or programme:
|
||||
- `series`: Title of the series or programme the video episode belongs to
|
||||
- `season`: Title of the season the video episode belongs to
|
||||
- `season_number`: Number of the season the video episode belongs to
|
||||
- `season_id`: Id of the season the video episode belongs to
|
||||
- `episode`: Title of the video episode
|
||||
- `episode_number`: Number of the video episode within a season
|
||||
- `episode_id`: Id of the video episode
|
||||
|
||||
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
|
||||
Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
|
||||
To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
||||
#### Output template examples
|
||||
|
||||
Note on Windows you may need to use double quotes instead of single.
|
||||
|
||||
```bash
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
|
||||
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
|
||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
|
||||
youtube-dl_test_video_.mp4 # A simple file name
|
||||
|
||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||
$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||
|
||||
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
|
||||
$ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists
|
||||
|
||||
# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
|
||||
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
||||
|
||||
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
|
||||
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
|
||||
|
||||
# Stream the video being downloaded to stdout
|
||||
$ youtube-dl -o - BaW_jenozKc
|
||||
```
|
||||
|
||||
# FORMAT SELECTION
|
||||
@@ -474,6 +557,8 @@ But sometimes you may want to download in a different format, for example when y
|
||||
|
||||
The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#format-selection-examples).
|
||||
|
||||
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
|
||||
@@ -519,11 +604,14 @@ You can merge the video and audio of two formats into a single file using `-f <v
|
||||
|
||||
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
|
||||
|
||||
Examples (note on Windows you may need to use double quotes instead of single):
|
||||
#### Format selection examples
|
||||
|
||||
Note on Windows you may need to use double quotes instead of single.
|
||||
|
||||
```bash
|
||||
# Download best mp4 format available or any other best if no mp4 available
|
||||
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
|
||||
@@ -664,7 +752,7 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
|
||||
|
||||
### What is this binary file? Where has the code gone?
|
||||
|
||||
Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||
|
||||
### The exe throws a *Runtime error from Visual C++*
|
||||
|
||||
@@ -747,7 +835,9 @@ If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
@@ -795,16 +885,17 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
@@ -866,9 +957,9 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
||||
|
||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. add `-v` flag to your command line, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||
```
|
||||
$ youtube-dl -v http://www.youtube.com/watch?v=BaW_jenozKcj
|
||||
$ youtube-dl -v <your command line>
|
||||
[debug] System config: []
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
|
@@ -30,6 +30,7 @@
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
- **Aparat**
|
||||
@@ -49,6 +50,7 @@
|
||||
- **arte.tv:ddc**
|
||||
- **arte.tv:embed**
|
||||
- **arte.tv:future**
|
||||
- **arte.tv:magazine**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
- **AudiMedia**
|
||||
@@ -75,6 +77,7 @@
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
- **BR**: Bayerischer Rundfunk Mediathek
|
||||
- **Break**
|
||||
@@ -89,8 +92,11 @@
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canvas**
|
||||
- **CBC**
|
||||
- **CBCPlayer**
|
||||
- **CBS**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
@@ -119,6 +125,7 @@
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
@@ -261,7 +268,7 @@
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima** (Currently broken)
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ivi**: ivi.ru
|
||||
@@ -282,6 +289,7 @@
|
||||
- **KeezMovies**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KonserthusetPlay**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
- **Ku6**
|
||||
@@ -355,7 +363,7 @@
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
- **Myvi**
|
||||
- **myvideo**
|
||||
- **myvideo** (Currently broken)
|
||||
- **MyVidster**
|
||||
- **n-tv.de**
|
||||
- **NationalGeographic**
|
||||
@@ -405,6 +413,7 @@
|
||||
- **NowTV** (Currently broken)
|
||||
- **NowTVList**
|
||||
- **nowvideo**: NowVideo
|
||||
- **Noz**
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
@@ -443,6 +452,7 @@
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
- **Playwire**
|
||||
@@ -454,6 +464,7 @@
|
||||
- **PornHd**
|
||||
- **PornHub**
|
||||
- **PornHubPlaylist**
|
||||
- **PornHubUserVideos**
|
||||
- **Pornotube**
|
||||
- **PornoVoisines**
|
||||
- **PornoXO**
|
||||
@@ -516,6 +527,7 @@
|
||||
- **screen.yahoo:search**: Yahoo screen search
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
- **ScreenJunkies**
|
||||
- **ScreenwaveMedia**
|
||||
- **SenateISVP**
|
||||
- **ServingSys**
|
||||
@@ -549,7 +561,6 @@
|
||||
- **southpark.de**
|
||||
- **southpark.nl**
|
||||
- **southparkstudios.dk**
|
||||
- **Space**
|
||||
- **SpankBang**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
@@ -609,6 +620,7 @@
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
- **TNAFlix**
|
||||
- **TNAFlixNetworkEmbed**
|
||||
- **toggle**
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics user profile
|
||||
@@ -649,6 +661,7 @@
|
||||
- **twitch:video**
|
||||
- **twitch:vod**
|
||||
- **twitter**
|
||||
- **twitter:amplify**
|
||||
- **twitter:card**
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
@@ -658,6 +671,7 @@
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
- **Ustudio**
|
||||
- **Varzesh3**
|
||||
- **Vbox7**
|
||||
- **VeeHD**
|
||||
@@ -673,14 +687,16 @@
|
||||
- **video.mit.edu**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **VideoMega** (Currently broken)
|
||||
- **VideoMega**
|
||||
- **videomore**
|
||||
- **videomore:season**
|
||||
- **videomore:video**
|
||||
- **VideoPremium**
|
||||
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidme**
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
- **Vidzi**
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
|
@@ -234,7 +234,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
|
||||
# Apple HTTP Live Streaming
|
||||
'96', '95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
@@ -248,6 +248,17 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def format_info(f_id):
|
||||
info = YoutubeIE._formats[f_id].copy()
|
||||
|
||||
# XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
|
||||
# and 'vcodec', while in tests such information is incomplete since
|
||||
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
|
||||
# test_YoutubeDL.test_youtube_format_selection is broken without
|
||||
# this fix
|
||||
if 'acodec' in info and 'vcodec' not in info:
|
||||
info['vcodec'] = 'none'
|
||||
elif 'vcodec' in info and 'acodec' not in info:
|
||||
info['acodec'] = 'none'
|
||||
|
||||
info['format_id'] = f_id
|
||||
info['url'] = 'url:' + f_id
|
||||
return info
|
||||
|
47
test/test_iqiyi_sdk_interpreter.py
Normal file
47
test/test_iqiyi_sdk_interpreter.py
Normal file
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor import IqiyiIE
|
||||
|
||||
|
||||
class IqiyiIEWithCredentials(IqiyiIE):
|
||||
def _get_login_info(self):
|
||||
return 'foo', 'bar'
|
||||
|
||||
|
||||
class WarningLogger(object):
|
||||
def __init__(self):
|
||||
self.messages = []
|
||||
|
||||
def warning(self, msg):
|
||||
self.messages.append(msg)
|
||||
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestIqiyiSDKInterpreter(unittest.TestCase):
|
||||
def test_iqiyi_sdk_interpreter(self):
|
||||
'''
|
||||
Test the functionality of IqiyiSDKInterpreter by trying to log in
|
||||
|
||||
If `sign` is incorrect, /validate call throws an HTTP 556 error
|
||||
'''
|
||||
logger = WarningLogger()
|
||||
ie = IqiyiIEWithCredentials(FakeYDL({'logger': logger}))
|
||||
ie._login()
|
||||
self.assertTrue('unable to log in:' in logger.messages[0])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -65,16 +65,16 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
for lang in ['it', 'fr', 'de']:
|
||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||
self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
|
||||
for lang in ['fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
def test_youtube_subtitles_ttml_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'sbv'
|
||||
self.DL.params['subtitlesformat'] = 'ttml'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
|
||||
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
|
@@ -18,10 +18,12 @@ import xml.etree.ElementTree
|
||||
from youtube_dl.utils import (
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
encode_base_n,
|
||||
clean_html,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
dict_get,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
@@ -34,6 +36,7 @@ from youtube_dl.utils import (
|
||||
is_html,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
@@ -247,6 +250,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(
|
||||
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||
'20150202')
|
||||
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
@@ -450,6 +454,28 @@ class TestUtil(unittest.TestCase):
|
||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||
self.assertTrue(isinstance(data, bytes))
|
||||
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
'false': False,
|
||||
'zero': 0,
|
||||
'empty_string': '',
|
||||
'empty_list': [],
|
||||
}
|
||||
d = FALSE_VALUES.copy()
|
||||
d['a'] = 42
|
||||
self.assertEqual(dict_get(d, 'a'), 42)
|
||||
self.assertEqual(dict_get(d, 'b'), None)
|
||||
self.assertEqual(dict_get(d, 'b', 42), 42)
|
||||
self.assertEqual(dict_get(d, ('a', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
||||
for key, false_value in FALSE_VALUES.items():
|
||||
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
||||
|
||||
def test_encode_compat_str(self):
|
||||
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
|
||||
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
|
||||
@@ -471,6 +497,10 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'STATUS': 'OK'})
|
||||
|
||||
stripped = strip_jsonp('ps.embedHandler({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
@@ -765,6 +795,24 @@ The first line
|
||||
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
|
||||
['--check-certificate=true'])
|
||||
|
||||
def test_ohdave_rsa_encrypt(self):
|
||||
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
|
||||
e = 65537
|
||||
|
||||
self.assertEqual(
|
||||
ohdave_rsa_encrypt(b'aa111222', e, N),
|
||||
'726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881')
|
||||
|
||||
def test_encode_base_n(self):
|
||||
self.assertEqual(encode_base_n(0, 30), '0')
|
||||
self.assertEqual(encode_base_n(80, 30), '2k')
|
||||
|
||||
custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA'
|
||||
self.assertEqual(encode_base_n(0, 30, custom_table), '9')
|
||||
self.assertEqual(encode_base_n(80, 30, custom_table), '7P')
|
||||
|
||||
self.assertRaises(ValueError, encode_base_n, 0, 70)
|
||||
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -605,12 +605,12 @@ class YoutubeDL(object):
|
||||
if rejecttitle:
|
||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||
return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||
date = info_dict.get('upload_date', None)
|
||||
date = info_dict.get('upload_date')
|
||||
if date is not None:
|
||||
dateRange = self.params.get('daterange', DateRange())
|
||||
if date not in dateRange:
|
||||
return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||
view_count = info_dict.get('view_count', None)
|
||||
view_count = info_dict.get('view_count')
|
||||
if view_count is not None:
|
||||
min_views = self.params.get('min_views')
|
||||
if min_views is not None and view_count < min_views:
|
||||
@@ -747,18 +747,18 @@ class YoutubeDL(object):
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
elif result_type == 'playlist' or result_type == 'multi_video':
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend', None)
|
||||
playlistend = self.params.get('playlistend')
|
||||
# For backwards compatibility, interpret -1 as whole list
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
playlistitems_str = self.params.get('playlist_items', None)
|
||||
playlistitems_str = self.params.get('playlist_items')
|
||||
playlistitems = None
|
||||
if playlistitems_str is not None:
|
||||
def iter_playlistitems(format):
|
||||
@@ -782,7 +782,7 @@ class YoutubeDL(object):
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
@@ -796,7 +796,7 @@ class YoutubeDL(object):
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
@@ -807,7 +807,7 @@ class YoutubeDL(object):
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
@@ -1288,6 +1288,9 @@ class YoutubeDL(object):
|
||||
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
else:
|
||||
# Sanitize format_id from characters used in format selector expression
|
||||
format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
|
||||
format_id = format['format_id']
|
||||
if format_id not in formats_dict:
|
||||
formats_dict[format_id] = []
|
||||
@@ -1338,7 +1341,6 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format_list = []
|
||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||
info_dict['extractor'] in ['youtube', 'ted'] and
|
||||
not info_dict.get('is_live')):
|
||||
merger = FFmpegMergerPP(self)
|
||||
if merger.available and merger.can_merge():
|
||||
@@ -1795,7 +1797,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
res = '%sp' % format['height']
|
||||
elif format.get('width') is not None:
|
||||
res = '?x%d' % format['width']
|
||||
res = '%dx?' % format['width']
|
||||
else:
|
||||
res = default
|
||||
return res
|
||||
|
@@ -7,7 +7,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import sys
|
||||
|
||||
if __package__ is None and not hasattr(sys, "frozen"):
|
||||
if __package__ is None and not hasattr(sys, 'frozen'):
|
||||
# direct call of __main__.py
|
||||
import os.path
|
||||
path = os.path.realpath(os.path.abspath(__file__))
|
||||
|
@@ -161,7 +161,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
nonce = data[:NONCE_LENGTH_BYTES]
|
||||
cipher = data[NONCE_LENGTH_BYTES:]
|
||||
|
||||
class Counter:
|
||||
class Counter(object):
|
||||
__value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
|
||||
|
||||
def next_value(self):
|
||||
|
@@ -181,20 +181,20 @@ except ImportError: # Python < 3.4
|
||||
# parameter := attribute "=" value
|
||||
url = req.get_full_url()
|
||||
|
||||
scheme, data = url.split(":", 1)
|
||||
mediatype, data = data.split(",", 1)
|
||||
scheme, data = url.split(':', 1)
|
||||
mediatype, data = data.split(',', 1)
|
||||
|
||||
# even base64 encoded data URLs might be quoted so unquote in any case:
|
||||
data = compat_urllib_parse_unquote_to_bytes(data)
|
||||
if mediatype.endswith(";base64"):
|
||||
if mediatype.endswith(';base64'):
|
||||
data = binascii.a2b_base64(data)
|
||||
mediatype = mediatype[:-7]
|
||||
|
||||
if not mediatype:
|
||||
mediatype = "text/plain;charset=US-ASCII"
|
||||
mediatype = 'text/plain;charset=US-ASCII'
|
||||
|
||||
headers = email.message_from_string(
|
||||
"Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
|
||||
'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
|
||||
|
||||
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
||||
|
||||
@@ -268,7 +268,7 @@ except ImportError: # Python 2
|
||||
nv = name_value.split('=', 1)
|
||||
if len(nv) != 2:
|
||||
if strict_parsing:
|
||||
raise ValueError("bad query field: %r" % (name_value,))
|
||||
raise ValueError('bad query field: %r' % (name_value,))
|
||||
# Handle case of a control-name with no equal sign
|
||||
if keep_blank_values:
|
||||
nv.append('')
|
||||
@@ -466,7 +466,7 @@ if sys.version_info < (2, 7):
|
||||
if err is not None:
|
||||
raise err
|
||||
else:
|
||||
raise socket.error("getaddrinfo returns an empty list")
|
||||
raise socket.error('getaddrinfo returns an empty list')
|
||||
else:
|
||||
compat_socket_create_connection = socket.create_connection
|
||||
|
||||
|
@@ -157,7 +157,7 @@ class FileDownloader(object):
|
||||
|
||||
def slow_down(self, start_time, now, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit', None)
|
||||
rate_limit = self.params.get('ratelimit')
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
if now is None:
|
||||
|
@@ -1,66 +1,59 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import sanitized_Request
|
||||
from .fragment import FragmentFD
|
||||
from ..utils import (
|
||||
sanitize_open,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FileDownloader):
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a DASH manifest
|
||||
"""
|
||||
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
base_url = info_dict['url']
|
||||
segment_urls = info_dict['segment_urls']
|
||||
segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
|
||||
initialization_url = info_dict.get('initialization_url')
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||
byte_counter = 0
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(segment_urls) + (1 if initialization_url else 0),
|
||||
}
|
||||
|
||||
def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
|
||||
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
|
||||
req = sanitized_Request(target_url)
|
||||
if remaining_bytes is not None:
|
||||
req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||
|
||||
data = self.ydl.urlopen(req).read()
|
||||
|
||||
if remaining_bytes is not None:
|
||||
data = data[:remaining_bytes]
|
||||
|
||||
outf.write(data)
|
||||
return len(data)
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
append_url_to_file(
|
||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||
'initialization segment')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
segment_len = append_url_to_file(
|
||||
outf, combine_url(base_url, segment_url),
|
||||
'segment %d / %d' % (i + 1, len(segment_urls)),
|
||||
remaining_bytes)
|
||||
byte_counter += segment_len
|
||||
if remaining_bytes is not None:
|
||||
remaining_bytes -= segment_len
|
||||
if remaining_bytes <= 0:
|
||||
break
|
||||
segments_filenames = []
|
||||
|
||||
self.try_rename(tmpfilename, filename)
|
||||
def append_url_to_file(target_url, target_filename):
|
||||
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
if initialization_url:
|
||||
append_url_to_file(initialization_url, ctx['tmpfilename'] + '-Init')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
segment_filename = '%s-Seg%d' % (ctx['tmpfilename'], i)
|
||||
append_url_to_file(segment_url, segment_filename)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for segment_file in segments_filenames:
|
||||
os.remove(encodeFilename(segment_file))
|
||||
|
||||
return True
|
||||
|
@@ -38,7 +38,7 @@ class FragmentFD(FileDownloader):
|
||||
'continuedl': True,
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'ratelimit': self.params.get('ratelimit'),
|
||||
'retries': self.params.get('retries', 0),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from .common import FileDownloader
|
||||
from .fragment import FragmentFD
|
||||
@@ -57,8 +58,10 @@ class HlsFD(FileDownloader):
|
||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams)
|
||||
proc.communicate(b'q')
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/rg3/youtube-dl/issues/8300).
|
||||
if sys.platform != 'win32':
|
||||
proc.communicate(b'q')
|
||||
raise
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
|
@@ -140,8 +140,8 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + resume_len
|
||||
min_data_len = self.params.get("min_filesize", None)
|
||||
max_data_len = self.params.get("max_filesize", None)
|
||||
min_data_len = self.params.get('min_filesize')
|
||||
max_data_len = self.params.get('max_filesize')
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
|
@@ -94,15 +94,15 @@ class RtmpFD(FileDownloader):
|
||||
return proc.returncode
|
||||
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url', None)
|
||||
page_url = info_dict.get('page_url', None)
|
||||
app = info_dict.get('app', None)
|
||||
play_path = info_dict.get('play_path', None)
|
||||
tc_url = info_dict.get('tc_url', None)
|
||||
flash_version = info_dict.get('flash_version', None)
|
||||
player_url = info_dict.get('player_url')
|
||||
page_url = info_dict.get('page_url')
|
||||
app = info_dict.get('app')
|
||||
play_path = info_dict.get('play_path')
|
||||
tc_url = info_dict.get('tc_url')
|
||||
flash_version = info_dict.get('flash_version')
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
protocol = info_dict.get('rtmp_protocol', None)
|
||||
conn = info_dict.get('rtmp_conn')
|
||||
protocol = info_dict.get('rtmp_protocol')
|
||||
real_time = info_dict.get('rtmp_real_time', False)
|
||||
no_resume = info_dict.get('no_resume', False)
|
||||
continue_dl = self.params.get('continuedl', True)
|
||||
|
@@ -20,6 +20,7 @@ from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
@@ -44,6 +45,7 @@ from .arte import (
|
||||
ArteTVFutureIE,
|
||||
ArteTVCinemaIE,
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
@@ -72,6 +74,7 @@ from .bleacherreport import (
|
||||
)
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bpb import BpbIE
|
||||
from .br import BRIE
|
||||
from .breakcom import BreakIE
|
||||
@@ -89,8 +92,15 @@ from .camdemy import (
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbsnews import CBSNewsIE
|
||||
from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
@@ -123,6 +133,7 @@ from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crunchyroll import (
|
||||
@@ -325,6 +336,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
@@ -480,6 +492,7 @@ from .nowtv import (
|
||||
NowTVIE,
|
||||
NowTVListIE,
|
||||
)
|
||||
from .noz import NozIE
|
||||
from .npo import (
|
||||
NPOIE,
|
||||
NPOLiveIE,
|
||||
@@ -529,6 +542,7 @@ from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
@@ -542,6 +556,7 @@ from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
PornHubPlaylistIE,
|
||||
PornHubUserVideosIE,
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
@@ -609,6 +624,7 @@ from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenjunkies import ScreenJunkiesIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .servingsys import ServingSysIE
|
||||
@@ -654,7 +670,6 @@ from .southpark import (
|
||||
SouthParkEsIE,
|
||||
SouthParkNlIE
|
||||
)
|
||||
from .space import SpaceIE
|
||||
from .spankbang import SpankBangIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
@@ -722,6 +737,7 @@ from .tmz import (
|
||||
TMZArticleIE,
|
||||
)
|
||||
from .tnaflix import (
|
||||
TNAFlixNetworkEmbedIE,
|
||||
TNAFlixIE,
|
||||
EMPFlixIE,
|
||||
MovieFapIE,
|
||||
@@ -783,7 +799,11 @@ from .twitch import (
|
||||
TwitchBookmarksIE,
|
||||
TwitchStreamIE,
|
||||
)
|
||||
from .twitter import TwitterCardIE, TwitterIE
|
||||
from .twitter import (
|
||||
TwitterCardIE,
|
||||
TwitterIE,
|
||||
TwitterAmplifyIE,
|
||||
)
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
@@ -794,6 +814,7 @@ from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import UstudioIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
@@ -819,7 +840,11 @@ from .videomore import (
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .vidme import VidmeIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
VidmeUserLikesIE,
|
||||
)
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewster import ViewsterIE
|
||||
|
@@ -28,7 +28,7 @@ class AENetworksIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'eg47EERs_JsZ',
|
||||
'ext': 'mp4',
|
||||
'title': "Winter Is Coming",
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
},
|
||||
'params': {
|
||||
|
160
youtube_dl/extractor/animeondemand.py
Normal file
160
youtube_dl/extractor/animeondemand.py
Normal file
@@ -0,0 +1,160 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
encode_dict,
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class AnimeOnDemandIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)'
|
||||
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
_TEST = {
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
'title': 'Grimgar, Ashes and Illusions (OmU)',
|
||||
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
login_form = self._form_hidden_inputs('new_user', login_page)
|
||||
|
||||
login_form.update({
|
||||
'user[login]': username,
|
||||
'user[password]': password,
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
request = sanitized_Request(
|
||||
post_url, urlencode_postdata(encode_dict(login_form)))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
|
||||
error = self._search_regex(
|
||||
r'<p class="alert alert-danger">(.+?)</p>',
|
||||
response, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
anime_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, anime_id)
|
||||
|
||||
if 'data-playlist=' not in webpage:
|
||||
self._download_webpage(
|
||||
self._APPLY_HTML5_URL, anime_id,
|
||||
'Activating HTML5 beta', 'Unable to apply HTML5 beta')
|
||||
webpage = self._download_webpage(url, anime_id)
|
||||
|
||||
csrf_token = self._html_search_meta(
|
||||
'csrf-token', webpage, 'csrf token', fatal=True)
|
||||
|
||||
anime_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>',
|
||||
webpage, 'anime name')
|
||||
anime_description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||
webpage, 'anime description', default=None)
|
||||
|
||||
entries = []
|
||||
|
||||
for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage):
|
||||
m = re.search(
|
||||
r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
episode_number = int(m.group('number'))
|
||||
episode_title = m.group('title')
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r'data-playlist=(["\'])(?P<url>.+?)\1',
|
||||
episode_html, 'data playlist', default=None, group='url')
|
||||
if playlist_url:
|
||||
request = sanitized_Request(
|
||||
compat_urlparse.urljoin(url, playlist_url),
|
||||
headers={
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRF-Token': csrf_token,
|
||||
'Referer': url,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
})
|
||||
|
||||
playlist = self._download_json(
|
||||
request, video_id, 'Downloading playlist JSON', fatal=False)
|
||||
if playlist:
|
||||
playlist = playlist['playlist'][0]
|
||||
title = playlist['title']
|
||||
description = playlist.get('description')
|
||||
for source in playlist.get('sources', []):
|
||||
file_ = source.get('file')
|
||||
if file_ and determine_ext(file_) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
if formats:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
||||
episode_html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-teaser' % f['id'],
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class AppleTrailersIE(InfoExtractor):
|
||||
IE_NAME = 'appletrailers'
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
|
||||
'info_dict': {
|
||||
@@ -73,6 +73,9 @@ class AppleTrailersIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
||||
|
@@ -13,6 +13,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
qualities,
|
||||
)
|
||||
|
||||
@@ -22,7 +23,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
|
||||
IE_NAME = 'arte.tv'
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -62,7 +63,7 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&+])'
|
||||
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
@@ -93,12 +94,40 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url')
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
# Differend kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'embed url', group='url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
info = self._download_json(json_url, video_id)
|
||||
@@ -106,7 +135,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
upload_date_str = player_info.get('shootingDate')
|
||||
if not upload_date_str:
|
||||
upload_date_str = player_info.get('VDA', '').split(' ')[0]
|
||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||
|
||||
title = player_info['VTI'].strip()
|
||||
subtitle = player_info.get('VSU', '').strip()
|
||||
@@ -122,27 +151,30 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
}
|
||||
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
|
||||
|
||||
LANGS = {
|
||||
'fr': 'F',
|
||||
'de': 'A',
|
||||
'en': 'E[ANG]',
|
||||
'es': 'E[ESP]',
|
||||
}
|
||||
|
||||
formats = []
|
||||
for format_id, format_dict in player_info['VSR'].items():
|
||||
f = dict(format_dict)
|
||||
versionCode = f.get('versionCode')
|
||||
|
||||
langcode = {
|
||||
'fr': 'F',
|
||||
'de': 'A',
|
||||
}.get(lang, lang)
|
||||
lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode]
|
||||
lang_pref = (
|
||||
None if versionCode is None else (
|
||||
10 if any(re.match(r, versionCode) for r in lang_rexs)
|
||||
else -10))
|
||||
langcode = LANGS.get(lang, lang)
|
||||
lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
|
||||
lang_pref = None
|
||||
if versionCode:
|
||||
matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
|
||||
lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
|
||||
source_pref = 0
|
||||
if versionCode is not None:
|
||||
# The original version with subtitles has lower relevance
|
||||
if re.match(r'VO-ST(F|A)', versionCode):
|
||||
if re.match(r'VO-ST(F|A|E)', versionCode):
|
||||
source_pref -= 10
|
||||
# The version with sourds/mal subtitles has also lower relevance
|
||||
elif re.match(r'VO?(F|A)-STM\1', versionCode):
|
||||
elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
|
||||
source_pref -= 9
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
@@ -175,7 +207,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:creative'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/(?:magazine?/)?(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:magazine?/)?(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||
@@ -199,7 +231,7 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:future'
|
||||
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses',
|
||||
@@ -207,6 +239,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
'id': '050940-028-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les écrevisses aussi peuvent être anxieuses',
|
||||
'upload_date': '20140902',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable',
|
||||
@@ -216,7 +249,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
|
||||
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:ddc'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
@@ -234,7 +267,7 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
|
||||
class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:concert'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||
@@ -251,7 +284,7 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
|
||||
class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:cinema'
|
||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://cinema.arte.tv/de/node/38291',
|
||||
@@ -266,6 +299,37 @@ class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
}
|
||||
|
||||
|
||||
class ArteTVMagazineIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:magazine'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..."
|
||||
'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium',
|
||||
'md5': '2a9369bcccf847d1c741e51416299f25',
|
||||
'info_dict': {
|
||||
'id': '065965-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trepalium - Extrait Ep.01',
|
||||
'upload_date': '20160121',
|
||||
},
|
||||
}, {
|
||||
# Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium"
|
||||
'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium',
|
||||
'md5': 'fedc64fc7a946110fe311634e79782ca',
|
||||
'info_dict': {
|
||||
'id': '054813-004_PLUS7-F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trepalium (4/6)',
|
||||
'description': 'md5:10057003c34d54e95350be4f9b05cb40',
|
||||
'upload_date': '20160218',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
|
@@ -86,7 +86,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'id': 'b00yng1d',
|
||||
'ext': 'flv',
|
||||
'title': 'The Voice UK: Series 3: Blind Auditions 5',
|
||||
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
|
||||
'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
|
||||
'duration': 5100,
|
||||
},
|
||||
'params': {
|
||||
|
60
youtube_dl/extractor/bokecc.py
Normal file
60
youtube_dl/extractor/bokecc.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BokeCCBaseIE(InfoExtractor):
|
||||
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
|
||||
player_params_str = self._html_search_regex(
|
||||
r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
|
||||
webpage, 'player params')
|
||||
|
||||
player_params = compat_parse_qs(player_params_str)
|
||||
|
||||
info_xml = self._download_xml(
|
||||
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
|
||||
player_params['siteid'][0], player_params['vid'][0]), video_id)
|
||||
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
'url': quality.find('./copy').attrib['playurl'],
|
||||
'preference': int(quality.attrib['value']),
|
||||
} for quality in info_xml.findall('./video/quality')]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
class BokeCCIE(BokeCCBaseIE):
|
||||
_IE_DESC = 'CC视频'
|
||||
_VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
|
||||
'info_dict': {
|
||||
'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30',
|
||||
'ext': 'flv',
|
||||
'title': 'BokeCC Video',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query'))
|
||||
if not qs.get('vid') or not qs.get('uid'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0])
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'BokeCC Video', # no title provided in the webpage
|
||||
'formats': self._extract_bokecc_formats(webpage, video_id),
|
||||
}
|
@@ -4,12 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class C56IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
|
||||
IE_NAME = '56.com'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||
'md5': 'e59995ac63d0457783ea05f93f12a866',
|
||||
'info_dict': {
|
||||
@@ -18,12 +19,29 @@ class C56IE(InfoExtractor):
|
||||
'title': '网事知多少 第32期:车怒',
|
||||
'duration': 283.813,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '82247482',
|
||||
'title': '爱的诅咒之杜鹃花开',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
'add_ie': ['Sohu'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
text_id = mobj.group('textid')
|
||||
|
||||
webpage = self._download_webpage(url, text_id)
|
||||
sohu_video_info_str = self._search_regex(
|
||||
r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None)
|
||||
if sohu_video_info_str:
|
||||
sohu_video_info = self._parse_json(
|
||||
sohu_video_info_str, text_id, transform_source=js_to_json)
|
||||
return self.url_result(sohu_video_info['url'], 'Sohu')
|
||||
|
||||
page = self._download_json(
|
||||
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
|
||||
|
||||
|
@@ -6,7 +6,7 @@ from ..utils import float_or_none
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
||||
'info_dict': {
|
||||
@@ -18,7 +18,27 @@ class CanvasIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 49.02,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# with subtitles
|
||||
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
|
||||
'display_id': 'pieter-0167',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pieter 0167',
|
||||
'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2553.08,
|
||||
'subtitles': {
|
||||
'nl': [{
|
||||
'ext': 'vtt',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -54,6 +74,14 @@ class CanvasIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_urls = data.get('subtitleUrls')
|
||||
if isinstance(subtitle_urls, list):
|
||||
for subtitle in subtitle_urls:
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url and subtitle.get('type') == 'CLOSED':
|
||||
subtitles.setdefault('nl', []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -62,4 +90,5 @@ class CanvasIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration'), 1000),
|
||||
'thumbnail': data.get('posterImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
113
youtube_dl/extractor/cbc.py
Normal file
113
youtube_dl/extractor/cbc.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||
'info_dict': {
|
||||
'id': '2682904050',
|
||||
'ext': 'flv',
|
||||
'title': 'Don Cherry – All-Stars',
|
||||
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||
'timestamp': 1454475540,
|
||||
'upload_date': '20160203',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with clipId
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'info_dict': {
|
||||
'id': '2487345465',
|
||||
'ext': 'flv',
|
||||
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '2680832926',
|
||||
'ext': 'flv',
|
||||
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '2658915080',
|
||||
'ext': 'flv',
|
||||
'title': 'Fly like an eagle!',
|
||||
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_init = self._search_regex(
|
||||
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
|
||||
default=None)
|
||||
if player_init:
|
||||
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
else:
|
||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'info_dict': {
|
||||
'id': '2683190193',
|
||||
'ext': 'flv',
|
||||
'title': 'Gerry Runs a Sweat Shop',
|
||||
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||
'timestamp': 1455067800,
|
||||
'upload_date': '20160210',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
||||
'ThePlatformFeed', video_id)
|
@@ -1,15 +1,17 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class CBSNewsIE(ThePlatformIE):
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -47,15 +49,23 @@ class CBSNewsIE(ThePlatformIE):
|
||||
},
|
||||
]
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
|
||||
return {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
'url': closed_caption_e.attrib['value'],
|
||||
}]
|
||||
} if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = json.loads(self._html_search_regex(
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||
webpage, 'video JSON info'))
|
||||
webpage, 'video JSON info'), video_id)
|
||||
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
title = item.get('articleTitle') or item.get('hed')
|
||||
@@ -63,12 +73,6 @@ class CBSNewsIE(ThePlatformIE):
|
||||
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
if 'mpxRefId' in video_info:
|
||||
subtitles['en'] = [{
|
||||
'ext': 'ttml',
|
||||
'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'],
|
||||
}]
|
||||
|
||||
formats = []
|
||||
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
||||
pid = item.get('media' + format_id)
|
||||
@@ -88,3 +92,41 @@ class CBSNewsIE(ThePlatformIE):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
'ext': 'flv',
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
|
||||
|
||||
hdcore_sign = 'hdcore=3.3.1'
|
||||
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
|
||||
if f4m_formats:
|
||||
for entry in f4m_formats:
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['headline'],
|
||||
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||
'duration': parse_duration(video_info.get('segmentDur')),
|
||||
'formats': f4m_formats,
|
||||
}
|
||||
|
@@ -45,7 +45,7 @@ class CCCIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r"(?s)<h3>About</h3>(.+?)<h3>",
|
||||
r'(?s)<h3>About</h3>(.+?)<h3>',
|
||||
webpage, 'description', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",
|
||||
|
@@ -177,16 +177,16 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
for divider in [1000, 60, 60, 100]:
|
||||
components.append(msec % divider)
|
||||
msec //= divider
|
||||
return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
|
||||
return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components)
|
||||
|
||||
def _fix_subtitle(subtitle):
|
||||
for line in subtitle.splitlines():
|
||||
m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
|
||||
m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line)
|
||||
if m:
|
||||
yield m.group(1)
|
||||
start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
|
||||
yield "{0} --> {1}".format(start, stop)
|
||||
yield '{0} --> {1}'.format(start, stop)
|
||||
else:
|
||||
yield line
|
||||
|
||||
return "\r\n".join(_fix_subtitle(subtitles))
|
||||
return '\r\n'.join(_fix_subtitle(subtitles))
|
||||
|
@@ -26,14 +26,14 @@ class CNNIE(InfoExtractor):
|
||||
'upload_date': '20130609',
|
||||
},
|
||||
}, {
|
||||
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
|
||||
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
|
||||
"info_dict": {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
'info_dict': {
|
||||
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
|
||||
'ext': 'mp4',
|
||||
"title": "Student's epic speech stuns new freshmen",
|
||||
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||
"upload_date": "20130821",
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||
'upload_date': '20130821',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
|
@@ -46,9 +46,9 @@ class CollegeRamaIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_options_request = {
|
||||
"getPlayerOptionsRequest": {
|
||||
"ResourceId": video_id,
|
||||
"QueryString": "",
|
||||
'getPlayerOptionsRequest': {
|
||||
'ResourceId': video_id,
|
||||
'QueryString': '',
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
||||
'info_dict': {
|
||||
'id': 'miranda-sings-happy-thanksgiving-miranda',
|
||||
'id': '2494164',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141127',
|
||||
'timestamp': 1417107600,
|
||||
'duration': 1232,
|
||||
'title': 'Happy Thanksgiving Miranda',
|
||||
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
||||
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires ffmpeg',
|
||||
@@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):
|
||||
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
||||
display_id)['videoData']
|
||||
|
||||
video_id = full_data['activeVideo']['video']
|
||||
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
||||
display_id = full_data['activeVideo']['video']
|
||||
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
|
||||
video_id = compat_str(video_data['mediaId'])
|
||||
thumbnails = [{
|
||||
'url': video_data['images']['thumb'],
|
||||
}, {
|
||||
'url': video_data['images']['poster'],
|
||||
}]
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['mediaUrl'], video_id, ext='mp4')
|
||||
|
||||
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
||||
video_data.get('pubDate'))
|
||||
@@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):
|
||||
video_data.get('duration'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'crackle:%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_data['title'],
|
||||
@@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
||||
}
|
||||
|
@@ -16,11 +16,11 @@ from ..utils import (
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(video-clips|episodes|cc-studios|video-collections|full-episodes)
|
||||
(video-clips|episodes|cc-studios|video-collections|full-episodes|shows)
|
||||
/(?P<title>.*)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
|
||||
'info_dict': {
|
||||
@@ -29,7 +29,10 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
|
||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
@@ -192,7 +195,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
if len(altMovieParams) == 0:
|
||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
||||
else:
|
||||
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
|
||||
mMovieParams = [('http://media.mtvnservices.com/' + altMovieParams[0], altMovieParams[0])]
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
# Correct cc.com in uri
|
||||
|
@@ -10,6 +10,7 @@ import re
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import math
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
@@ -44,6 +45,8 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -634,7 +637,7 @@ class InfoExtractor(object):
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username', None) is not None:
|
||||
if downloader_params.get('username') is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
elif downloader_params.get('usenetrc', False):
|
||||
@@ -661,7 +664,7 @@ class InfoExtractor(object):
|
||||
return None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
if downloader_params.get('twofactor', None) is not None:
|
||||
if downloader_params.get('twofactor') is not None:
|
||||
return downloader_params['twofactor']
|
||||
|
||||
return compat_getpass('Type %s and press [Return]: ' % note)
|
||||
@@ -742,7 +745,7 @@ class InfoExtractor(object):
|
||||
'mature': 17,
|
||||
'restricted': 19,
|
||||
}
|
||||
return RATING_TABLE.get(rating.lower(), None)
|
||||
return RATING_TABLE.get(rating.lower())
|
||||
|
||||
def _family_friendly_search(self, html):
|
||||
# See http://schema.org/VideoObject
|
||||
@@ -757,7 +760,7 @@ class InfoExtractor(object):
|
||||
'0': 18,
|
||||
'false': 18,
|
||||
}
|
||||
return RATING_TABLE.get(family_friendly.lower(), None)
|
||||
return RATING_TABLE.get(family_friendly.lower())
|
||||
|
||||
def _twitter_search_player(self, html):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
@@ -897,6 +900,16 @@ class InfoExtractor(object):
|
||||
item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
|
||||
formats)
|
||||
|
||||
@staticmethod
|
||||
def _remove_duplicate_formats(formats):
|
||||
format_urls = set()
|
||||
unique_formats = []
|
||||
for f in formats:
|
||||
if f['url'] not in format_urls:
|
||||
format_urls.add(f['url'])
|
||||
unique_formats.append(f)
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video'):
|
||||
url = self._proto_relative_url(url, scheme='http:')
|
||||
# For now assume non HTTP(S) URLs always valid
|
||||
@@ -1020,11 +1033,21 @@ class InfoExtractor(object):
|
||||
return []
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
# A Media Playlist Tag MUST NOT appear in a Master Playlist
|
||||
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||
# The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
|
||||
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc:
|
||||
|
||||
# We should try extracting formats only from master playlists [1], i.e.
|
||||
# playlists that describe available qualities. On the other hand media
|
||||
# playlists [2] should be returned as is since they contain just the media
|
||||
# without qualities renditions.
|
||||
# Fortunately, master playlist can be easily distinguished from media
|
||||
# playlist based on particular tags availability. As of [1, 2] master
|
||||
# playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
|
||||
# and MUST NOT appear in master playlist thus we can clearly detect media
|
||||
# playlist with this criterion.
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||
return [{
|
||||
'url': m3u8_url,
|
||||
'format_id': m3u8_id,
|
||||
@@ -1071,19 +1094,29 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
# TODO: looks like video codec is not always necessarily goes first
|
||||
va_codecs = codecs.split(',')
|
||||
if va_codecs[0]:
|
||||
f['vcodec'] = va_codecs[0]
|
||||
if len(va_codecs) > 1 and va_codecs[1]:
|
||||
f['acodec'] = va_codecs[1]
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
if resolution:
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
vcodec, acodec = [None] * 2
|
||||
va_codecs = codecs.split(',')
|
||||
if len(va_codecs) == 1:
|
||||
# Audio only entries usually come with single codec and
|
||||
# no resolution. For more robustness we also check it to
|
||||
# be mp4 audio.
|
||||
if not resolution and va_codecs[0].startswith('mp4a'):
|
||||
vcodec, acodec = 'none', va_codecs[0]
|
||||
else:
|
||||
vcodec = va_codecs[0]
|
||||
else:
|
||||
vcodec, acodec = va_codecs[:2]
|
||||
f.update({
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
@@ -1184,11 +1217,13 @@ class InfoExtractor(object):
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
|
||||
srcs = []
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
if not src or src in srcs:
|
||||
continue
|
||||
srcs.append(src)
|
||||
|
||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
@@ -1220,6 +1255,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
@@ -1265,21 +1301,14 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
subtitles = {}
|
||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||
src = textstream.get('src')
|
||||
if not src:
|
||||
if not src or src in urls:
|
||||
continue
|
||||
ext = textstream.get('ext') or determine_ext(src)
|
||||
if not ext:
|
||||
type_ = textstream.get('type')
|
||||
SUBTITLES_TYPES = {
|
||||
'text/vtt': 'vtt',
|
||||
'text/srt': 'srt',
|
||||
'application/smptett+xml': 'tt',
|
||||
}
|
||||
if type_ in SUBTITLES_TYPES:
|
||||
ext = SUBTITLES_TYPES[type_]
|
||||
urls.append(src)
|
||||
ext = textstream.get('ext') or determine_ext(src) or mimetype2ext(textstream.get('type'))
|
||||
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': src,
|
||||
@@ -1330,87 +1359,167 @@ class InfoExtractor(object):
|
||||
})
|
||||
return entries
|
||||
|
||||
def _download_dash_manifest(self, dash_manifest_url, video_id, fatal=True):
|
||||
return self._download_xml(
|
||||
dash_manifest_url, video_id,
|
||||
note='Downloading DASH manifest',
|
||||
errnote='Could not download DASH manifest',
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||
res = self._download_webpage_handle(
|
||||
mpd_url, video_id,
|
||||
note=note or 'Downloading MPD manifest',
|
||||
errnote=errnote or 'Failed to download MPD manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
mpd, urlh = res
|
||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||
|
||||
def _extract_dash_manifest_formats(self, dash_manifest_url, video_id, fatal=True, namespace=None, formats_dict={}):
|
||||
dash_doc = self._download_dash_manifest(dash_manifest_url, video_id, fatal)
|
||||
if dash_doc is False:
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
return self._parse_dash_manifest(
|
||||
dash_doc, namespace=namespace, formats_dict=formats_dict)
|
||||
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}):
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, namespace)
|
||||
|
||||
formats = []
|
||||
for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')):
|
||||
mime_type = a.attrib.get('mimeType')
|
||||
for r in a.findall(_add_ns('Representation')):
|
||||
mime_type = r.attrib.get('mimeType') or mime_type
|
||||
url_el = r.find(_add_ns('BaseURL'))
|
||||
if mime_type == 'text/vtt':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
|
||||
segment_list = r.find(_add_ns('SegmentList'))
|
||||
format_id = r.attrib['id']
|
||||
video_url = url_el.text if url_el is not None else None
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(r.attrib.get('width')),
|
||||
'height': int_or_none(r.attrib.get('height')),
|
||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
||||
'filesize': filesize,
|
||||
'fps': int_or_none(r.attrib.get('frameRate')),
|
||||
}
|
||||
if segment_list is not None:
|
||||
initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL']
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == format_id)
|
||||
except StopIteration:
|
||||
full_info = formats_dict.get(format_id, {}).copy()
|
||||
full_info.update(f)
|
||||
codecs = r.attrib.get('codecs')
|
||||
if codecs:
|
||||
if mime_type.startswith('video/'):
|
||||
vcodec, acodec = codecs, 'none'
|
||||
else: # mime_type.startswith('audio/')
|
||||
vcodec, acodec = 'none', codecs
|
||||
def is_drm_protected(element):
|
||||
return element.find(_add_ns('ContentProtection')) is not None
|
||||
|
||||
full_info.update({
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.append(full_info)
|
||||
def extract_multisegment_info(element, ms_parent_info):
|
||||
ms_info = ms_parent_info.copy()
|
||||
segment_list = element.find(_add_ns('SegmentList'))
|
||||
if segment_list is not None:
|
||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||
if segment_urls_e:
|
||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||
initialization = segment_list.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
else:
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
start_number = segment_template.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
for s in s_e:
|
||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
||||
else:
|
||||
existing_format.update(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = segment_template.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
initialization = segment_template.get('initialization')
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
else:
|
||||
initialization = segment_template.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
return ms_info
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats = []
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||
period_ms_info = extract_multisegment_info(period, {
|
||||
'start_number': 1,
|
||||
'timescale': 1,
|
||||
})
|
||||
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
|
||||
if is_drm_protected(adaptation_set):
|
||||
continue
|
||||
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
|
||||
for representation in adaptation_set.findall(_add_ns('Representation')):
|
||||
if is_drm_protected(representation):
|
||||
continue
|
||||
representation_attrib = adaptation_set.attrib.copy()
|
||||
representation_attrib.update(representation.attrib)
|
||||
mime_type = representation_attrib.get('mimeType')
|
||||
content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
|
||||
if content_type == 'text':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
elif content_type == 'video' or content_type == 'audio':
|
||||
base_url = ''
|
||||
for element in (representation, adaptation_set, period, mpd_doc):
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if base_url_e is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
|
||||
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == representation_id)
|
||||
except StopIteration:
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
now_str = now.strftime("%Y-%m-%d %H:%M")
|
||||
now_str = now.strftime('%Y-%m-%d %H:%M')
|
||||
return name + ' ' + now_str
|
||||
|
||||
def _int(self, v, name, fatal=False, **kwargs):
|
||||
@@ -1483,7 +1592,7 @@ class InfoExtractor(object):
|
||||
return {}
|
||||
|
||||
def _get_subtitles(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
@staticmethod
|
||||
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
|
||||
@@ -1509,7 +1618,7 @@ class InfoExtractor(object):
|
||||
return {}
|
||||
|
||||
def _get_automatic_captions(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
@@ -1549,7 +1658,7 @@ class SearchInfoExtractor(InfoExtractor):
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
"""Get a specified number of results for a query"""
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
@property
|
||||
def SEARCH_KEY(self):
|
||||
|
95
youtube_dl/extractor/crackle.py
Normal file
95
youtube_dl/extractor/crackle.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
||||
'info_dict': {
|
||||
'id': '2496419',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heavy Lies the Head',
|
||||
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
||||
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
||||
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
'height': 306,
|
||||
},
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id).find('i')
|
||||
title = item.attrib['t']
|
||||
|
||||
thumbnail = None
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||
formats.append({
|
||||
'url': http_base_url + mfs_path,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
for cc in item.findall('cc'):
|
||||
locale = cc.attrib.get('l')
|
||||
v = cc.attrib.get('v')
|
||||
if locale and v:
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
subtitles[locale] = [{
|
||||
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': item.attrib.get('d'),
|
||||
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
'thumbnail': thumbnail,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
@@ -180,40 +180,40 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
return assvalue
|
||||
|
||||
output = '[Script Info]\n'
|
||||
output += 'Title: %s\n' % sub_root.attrib["title"]
|
||||
output += 'Title: %s\n' % sub_root.attrib['title']
|
||||
output += 'ScriptType: v4.00+\n'
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib["wrap_style"]
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib["play_res_x"]
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib["play_res_y"]
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
|
||||
output += """ScaledBorderAndShadow: yes
|
||||
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
"""
|
||||
for style in sub_root.findall('./styles/style'):
|
||||
output += 'Style: ' + style.attrib["name"]
|
||||
output += ',' + style.attrib["font_name"]
|
||||
output += ',' + style.attrib["font_size"]
|
||||
output += ',' + style.attrib["primary_colour"]
|
||||
output += ',' + style.attrib["secondary_colour"]
|
||||
output += ',' + style.attrib["outline_colour"]
|
||||
output += ',' + style.attrib["back_colour"]
|
||||
output += ',' + ass_bool(style.attrib["bold"])
|
||||
output += ',' + ass_bool(style.attrib["italic"])
|
||||
output += ',' + ass_bool(style.attrib["underline"])
|
||||
output += ',' + ass_bool(style.attrib["strikeout"])
|
||||
output += ',' + style.attrib["scale_x"]
|
||||
output += ',' + style.attrib["scale_y"]
|
||||
output += ',' + style.attrib["spacing"]
|
||||
output += ',' + style.attrib["angle"]
|
||||
output += ',' + style.attrib["border_style"]
|
||||
output += ',' + style.attrib["outline"]
|
||||
output += ',' + style.attrib["shadow"]
|
||||
output += ',' + style.attrib["alignment"]
|
||||
output += ',' + style.attrib["margin_l"]
|
||||
output += ',' + style.attrib["margin_r"]
|
||||
output += ',' + style.attrib["margin_v"]
|
||||
output += ',' + style.attrib["encoding"]
|
||||
output += 'Style: ' + style.attrib['name']
|
||||
output += ',' + style.attrib['font_name']
|
||||
output += ',' + style.attrib['font_size']
|
||||
output += ',' + style.attrib['primary_colour']
|
||||
output += ',' + style.attrib['secondary_colour']
|
||||
output += ',' + style.attrib['outline_colour']
|
||||
output += ',' + style.attrib['back_colour']
|
||||
output += ',' + ass_bool(style.attrib['bold'])
|
||||
output += ',' + ass_bool(style.attrib['italic'])
|
||||
output += ',' + ass_bool(style.attrib['underline'])
|
||||
output += ',' + ass_bool(style.attrib['strikeout'])
|
||||
output += ',' + style.attrib['scale_x']
|
||||
output += ',' + style.attrib['scale_y']
|
||||
output += ',' + style.attrib['spacing']
|
||||
output += ',' + style.attrib['angle']
|
||||
output += ',' + style.attrib['border_style']
|
||||
output += ',' + style.attrib['outline']
|
||||
output += ',' + style.attrib['shadow']
|
||||
output += ',' + style.attrib['alignment']
|
||||
output += ',' + style.attrib['margin_l']
|
||||
output += ',' + style.attrib['margin_r']
|
||||
output += ',' + style.attrib['margin_v']
|
||||
output += ',' + style.attrib['encoding']
|
||||
output += '\n'
|
||||
|
||||
output += """
|
||||
@@ -222,15 +222,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
"""
|
||||
for event in sub_root.findall('./events/event'):
|
||||
output += 'Dialogue: 0'
|
||||
output += ',' + event.attrib["start"]
|
||||
output += ',' + event.attrib["end"]
|
||||
output += ',' + event.attrib["style"]
|
||||
output += ',' + event.attrib["name"]
|
||||
output += ',' + event.attrib["margin_l"]
|
||||
output += ',' + event.attrib["margin_r"]
|
||||
output += ',' + event.attrib["margin_v"]
|
||||
output += ',' + event.attrib["effect"]
|
||||
output += ',' + event.attrib["text"]
|
||||
output += ',' + event.attrib['start']
|
||||
output += ',' + event.attrib['end']
|
||||
output += ',' + event.attrib['style']
|
||||
output += ',' + event.attrib['name']
|
||||
output += ',' + event.attrib['margin_l']
|
||||
output += ',' + event.attrib['margin_r']
|
||||
output += ',' + event.attrib['margin_v']
|
||||
output += ',' + event.attrib['effect']
|
||||
output += ',' + event.attrib['text']
|
||||
output += '\n'
|
||||
|
||||
return output
|
||||
@@ -376,7 +376,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = "crunchyroll:playlist"
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
|
@@ -122,10 +122,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"',
|
||||
r'video_views_count[^>]+>\s+([\d\.,]+)'],
|
||||
webpage, 'view count', fatal=False))
|
||||
view_count_str = self._search_regex(
|
||||
(r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
|
||||
r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
|
||||
webpage, 'view count', fatal=False)
|
||||
if view_count_str:
|
||||
view_count_str = re.sub(r'\s', '', view_count_str)
|
||||
view_count = str_to_int(view_count_str)
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
|
||||
webpage, 'comment count', fatal=False))
|
||||
@@ -396,13 +399,13 @@ class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_dmcloud_url(self, webpage):
|
||||
mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % self._VALID_EMBED_URL, webpage)
|
||||
def _extract_dmcloud_url(cls, webpage):
|
||||
mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
mobj = re.search(
|
||||
r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % self._VALID_EMBED_URL,
|
||||
r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL,
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,44 +10,125 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.dplay\.se/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',
|
||||
'info_dict': {
|
||||
'id': '1255600',
|
||||
'display_id': 'stagione-1-episodio-25',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episodio 25',
|
||||
'description': 'md5:cae5f40ad988811b197d2d27a53227eb',
|
||||
'duration': 2761,
|
||||
'timestamp': 1454701800,
|
||||
'upload_date': '20160205',
|
||||
'creator': 'RTIT',
|
||||
'series': 'Take me out',
|
||||
'season_number': 1,
|
||||
'episode_number': 25,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||
'info_dict': {
|
||||
'id': '3172',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'ext': 'flv',
|
||||
'title': 'Svensken lär sig njuta av livet',
|
||||
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
||||
'duration': 2650,
|
||||
'timestamp': 1365454320,
|
||||
'upload_date': '20130408',
|
||||
'creator': 'Kanal 5 (Home)',
|
||||
'series': 'Nugammalt - 77 händelser som format Sverige',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
||||
'info_dict': {
|
||||
'id': '70816',
|
||||
'display_id': 'season-6-episode-12',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode 12',
|
||||
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
||||
'duration': 2563,
|
||||
'timestamp': 1429696800,
|
||||
'upload_date': '20150422',
|
||||
'creator': 'Kanal 4',
|
||||
'series': 'Mig og min mor',
|
||||
'season_number': 6,
|
||||
'episode_number': 12,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
domain = mobj.group('domain')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id="(\d+)"', webpage, 'video id')
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.dplay.se/api/v2/ajax/videos?video_id=' + video_id,
|
||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||
video_id)['data'][0]
|
||||
|
||||
self._set_cookie(
|
||||
'secure.dplay.se', 'dsc-geo',
|
||||
'{"countryCode":"NL","expiry":%d}' % ((time.time() + 20 * 60) * 1000))
|
||||
# TODO: consider adding support for 'stream_type=hds', it seems to
|
||||
# require setting some cookies
|
||||
manifest_url = self._download_json(
|
||||
'https://secure.dplay.se/secure/api/v2/user/authorization/stream/%s?stream_type=hls' % video_id,
|
||||
video_id, 'Getting manifest url for hls stream')['hls']
|
||||
formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
title = info['title']
|
||||
|
||||
PROTOCOLS = ('hls', 'hds')
|
||||
formats = []
|
||||
|
||||
def extract_formats(protocol, manifest_url):
|
||||
if protocol == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False))
|
||||
elif protocol == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
|
||||
video_id, f4m_id=protocol, fatal=False))
|
||||
|
||||
domain_tld = domain.split('.')[-1]
|
||||
if domain_tld in ('se', 'dk'):
|
||||
for protocol in PROTOCOLS:
|
||||
self._set_cookie(
|
||||
'secure.dplay.%s' % domain_tld, 'dsc-geo',
|
||||
json.dumps({
|
||||
'countryCode': domain_tld.upper(),
|
||||
'expiry': (time.time() + 20 * 60) * 1000,
|
||||
}))
|
||||
stream = self._download_json(
|
||||
'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
|
||||
% (domain_tld, video_id, protocol), video_id,
|
||||
'Downloading %s stream JSON' % protocol, fatal=False)
|
||||
if stream and stream.get(protocol):
|
||||
extract_formats(protocol, stream[protocol])
|
||||
else:
|
||||
for protocol in PROTOCOLS:
|
||||
if info.get(protocol):
|
||||
extract_formats(protocol, info[protocol])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': info.get('video_metadata_longDescription'),
|
||||
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
|
||||
'timestamp': int_or_none(info.get('video_publish_date')),
|
||||
'creator': info.get('video_metadata_homeChannel'),
|
||||
'series': info.get('video_metadata_show'),
|
||||
'season_number': int_or_none(info.get('season')),
|
||||
'episode_number': int_or_none(info.get('episode')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -87,7 +87,7 @@ class DRBonanzaIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for file in info['Files']:
|
||||
if info['Type'] == "Video":
|
||||
if info['Type'] == 'Video':
|
||||
if file['Type'] in video_types:
|
||||
format = parse_filename_info(file['Location'])
|
||||
format.update({
|
||||
@@ -101,10 +101,10 @@ class DRBonanzaIE(InfoExtractor):
|
||||
if '/bonanza/' in rtmp_url:
|
||||
format['play_path'] = rtmp_url.split('/bonanza/')[1]
|
||||
formats.append(format)
|
||||
elif file['Type'] == "Thumb":
|
||||
elif file['Type'] == 'Thumb':
|
||||
thumbnail = file['Location']
|
||||
elif info['Type'] == "Audio":
|
||||
if file['Type'] == "Audio":
|
||||
elif info['Type'] == 'Audio':
|
||||
if file['Type'] == 'Audio':
|
||||
format = parse_filename_info(file['Location'])
|
||||
format.update({
|
||||
'url': file['Location'],
|
||||
@@ -112,7 +112,7 @@ class DRBonanzaIE(InfoExtractor):
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
elif file['Type'] == "Thumb":
|
||||
elif file['Type'] == 'Thumb':
|
||||
thumbnail = file['Location']
|
||||
|
||||
description = '%s\n%s\n%s\n' % (
|
||||
|
@@ -17,85 +17,85 @@ class EightTracksIE(InfoExtractor):
|
||||
IE_NAME = '8tracks'
|
||||
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||
_TEST = {
|
||||
"name": "EightTracks",
|
||||
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||
"info_dict": {
|
||||
'name': 'EightTracks',
|
||||
'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a',
|
||||
'info_dict': {
|
||||
'id': '1336550',
|
||||
'display_id': 'youtube-dl-test-tracks-a',
|
||||
"description": "test chars: \"'/\\ä↭",
|
||||
"title": "youtube-dl test tracks \"'/\\ä↭<>",
|
||||
'description': "test chars: \"'/\\ä↭",
|
||||
'title': "youtube-dl test tracks \"'/\\ä↭<>",
|
||||
},
|
||||
"playlist": [
|
||||
'playlist': [
|
||||
{
|
||||
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
|
||||
"info_dict": {
|
||||
"id": "11885610",
|
||||
"ext": "m4a",
|
||||
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
'md5': '96ce57f24389fc8734ce47f4c1abcc55',
|
||||
'info_dict': {
|
||||
'id': '11885610',
|
||||
'ext': 'm4a',
|
||||
'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl'
|
||||
}
|
||||
},
|
||||
{
|
||||
"md5": "4ab26f05c1f7291ea460a3920be8021f",
|
||||
"info_dict": {
|
||||
"id": "11885608",
|
||||
"ext": "m4a",
|
||||
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
'md5': '4ab26f05c1f7291ea460a3920be8021f',
|
||||
'info_dict': {
|
||||
'id': '11885608',
|
||||
'ext': 'm4a',
|
||||
'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl'
|
||||
}
|
||||
},
|
||||
{
|
||||
"md5": "d30b5b5f74217410f4689605c35d1fd7",
|
||||
"info_dict": {
|
||||
"id": "11885679",
|
||||
"ext": "m4a",
|
||||
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
'md5': 'd30b5b5f74217410f4689605c35d1fd7',
|
||||
'info_dict': {
|
||||
'id': '11885679',
|
||||
'ext': 'm4a',
|
||||
'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl'
|
||||
}
|
||||
},
|
||||
{
|
||||
"md5": "4eb0a669317cd725f6bbd336a29f923a",
|
||||
"info_dict": {
|
||||
"id": "11885680",
|
||||
"ext": "m4a",
|
||||
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
'md5': '4eb0a669317cd725f6bbd336a29f923a',
|
||||
'info_dict': {
|
||||
'id': '11885680',
|
||||
'ext': 'm4a',
|
||||
'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl'
|
||||
}
|
||||
},
|
||||
{
|
||||
"md5": "1893e872e263a2705558d1d319ad19e8",
|
||||
"info_dict": {
|
||||
"id": "11885682",
|
||||
"ext": "m4a",
|
||||
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
'md5': '1893e872e263a2705558d1d319ad19e8',
|
||||
'info_dict': {
|
||||
'id': '11885682',
|
||||
'ext': 'm4a',
|
||||
'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl'
|
||||
}
|
||||
},
|
||||
{
|
||||
"md5": "b673c46f47a216ab1741ae8836af5899",
|
||||
"info_dict": {
|
||||
"id": "11885683",
|
||||
"ext": "m4a",
|
||||
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
'md5': 'b673c46f47a216ab1741ae8836af5899',
|
||||
'info_dict': {
|
||||
'id': '11885683',
|
||||
'ext': 'm4a',
|
||||
'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl'
|
||||
}
|
||||
},
|
||||
{
|
||||
"md5": "1d74534e95df54986da7f5abf7d842b7",
|
||||
"info_dict": {
|
||||
"id": "11885684",
|
||||
"ext": "m4a",
|
||||
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
'md5': '1d74534e95df54986da7f5abf7d842b7',
|
||||
'info_dict': {
|
||||
'id': '11885684',
|
||||
'ext': 'm4a',
|
||||
'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl'
|
||||
}
|
||||
},
|
||||
{
|
||||
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
|
||||
"info_dict": {
|
||||
"id": "11885685",
|
||||
"ext": "m4a",
|
||||
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
|
||||
"uploader_id": "ytdl"
|
||||
'md5': 'f081f47af8f6ae782ed131d38b9cd1c0',
|
||||
'info_dict': {
|
||||
'id': '11885685',
|
||||
'ext': 'm4a',
|
||||
'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
|
||||
'uploader_id': 'ytdl'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@@ -72,7 +72,7 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
def _extract_playlist(self, webpage):
|
||||
json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
|
||||
try:
|
||||
return json.loads("[{" + json_string + "}]")
|
||||
return json.loads('[{' + json_string + '}]')
|
||||
except ValueError as ve:
|
||||
raise ExtractorError('Failed to download JSON', cause=ve)
|
||||
|
||||
|
@@ -14,14 +14,14 @@ class EveryonesMixtapeIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
|
||||
"info_dict": {
|
||||
'info_dict': {
|
||||
'id': '5bfseWNmlds',
|
||||
'ext': 'mp4',
|
||||
"title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
|
||||
"uploader": "FKR.TV",
|
||||
"uploader_id": "frenchkissrecords",
|
||||
"description": "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
|
||||
"upload_date": "20081015"
|
||||
'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)",
|
||||
'uploader': 'FKR.TV',
|
||||
'uploader_id': 'frenchkissrecords',
|
||||
'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
|
||||
'upload_date': '20081015'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # This is simply YouTube
|
||||
|
@@ -41,7 +41,7 @@ class ExfmIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group('id')
|
||||
info_url = "http://ex.fm/api/v3/song/%s" % song_id
|
||||
info_url = 'http://ex.fm/api/v3/song/%s' % song_id
|
||||
info = self._download_json(info_url, song_id)['song']
|
||||
song_url = info['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
|
@@ -34,8 +34,9 @@ class FacebookIE(InfoExtractor):
|
||||
video/video\.php|
|
||||
photo\.php|
|
||||
video\.php|
|
||||
video/embed
|
||||
)\?(?:.*?)(?:v|video_id)=|
|
||||
video/embed|
|
||||
story\.php
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?
|
||||
)|
|
||||
facebook:
|
||||
@@ -92,6 +93,9 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'facebook:544765982287235',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -186,7 +190,7 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
|
||||
for item in server_js_data['instances']:
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
break
|
||||
@@ -208,16 +212,18 @@ class FacebookIE(InfoExtractor):
|
||||
for src_type in ('src', 'src_no_ratelimit'):
|
||||
src = f[0].get('%s_%s' % (quality, src_type))
|
||||
if src:
|
||||
preference = -10 if format_id == 'progressive' else 0
|
||||
if quality == 'hd':
|
||||
preference += 5
|
||||
formats.append({
|
||||
'format_id': '%s_%s_%s' % (format_id, quality, src_type),
|
||||
'url': src,
|
||||
'preference': -10 if format_id == 'progressive' else 0,
|
||||
'preference': preference,
|
||||
})
|
||||
dash_manifest = f[0].get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_dash_manifest(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)),
|
||||
namespace='urn:mpeg:dash:schema:mpd:2011'))
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
|
@@ -52,7 +52,7 @@ class FazIE(InfoExtractor):
|
||||
formats = []
|
||||
for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
|
||||
encoding = xpath_element(encodings, code)
|
||||
if encoding:
|
||||
if encoding is not None:
|
||||
encoding_url = xpath_text(encoding, 'FILENAME')
|
||||
if encoding_url:
|
||||
formats.append({
|
||||
|
@@ -87,7 +87,7 @@ class FC2IE(InfoExtractor):
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
|
||||
info_url = (
|
||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||
'http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&'.
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
|
@@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||
'info_dict': {
|
||||
'id': '255180355939',
|
||||
'ext': 'mp4',
|
||||
@@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):
|
||||
'duration': 129,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -29,7 +26,7 @@ class FOXIE(InfoExtractor):
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url'] + '&manifest=m3u'
|
||||
video_id)['release_url'] + '&switch=http'
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
|
@@ -10,7 +10,7 @@ class FranceInterIE(InfoExtractor):
|
||||
_TEST = {
|
||||
'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
|
||||
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
||||
"info_dict": {
|
||||
'info_dict': {
|
||||
'id': '793962',
|
||||
'ext': 'mp3',
|
||||
'title': 'L’Histoire dans les jeux vidéo',
|
||||
|
@@ -289,7 +289,7 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'href="http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
@@ -12,8 +12,8 @@ class FreeVideoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'vysukany-zadecek-22033',
|
||||
'ext': 'mp4',
|
||||
"title": "vysukany-zadecek-22033",
|
||||
"age_limit": 18,
|
||||
'title': 'vysukany-zadecek-22033',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'Blocked outside .cz',
|
||||
}
|
||||
|
@@ -47,6 +47,7 @@ from .senateisvp import SenateISVPIE
|
||||
from .svt import SVTIE
|
||||
from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
@@ -224,6 +225,20 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# MPD from http://dash-mse-test.appspot.com/media.html
|
||||
{
|
||||
'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
|
||||
'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
|
||||
'info_dict': {
|
||||
'id': 'car-20120827-manifest',
|
||||
'ext': 'mp4',
|
||||
'title': 'car-20120827-manifest',
|
||||
'formats': 'mincount:9',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@@ -1229,19 +1244,24 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = head_response.headers.get('Content-Type', '')
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
||||
if m:
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
formats = []
|
||||
if m.group('format_id').endswith('mpegurl'):
|
||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'direct': True,
|
||||
'formats': [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}],
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
@@ -1284,7 +1304,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
||||
try:
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
@@ -1293,6 +1313,13 @@ class GenericIE(InfoExtractor):
|
||||
return self._parse_smil(doc, url, video_id)
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'formats': self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0]),
|
||||
}
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
@@ -1402,7 +1429,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
@@ -1547,6 +1574,11 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'VK')
|
||||
|
||||
# Look for embedded Odnoklassniki player
|
||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Odnoklassniki')
|
||||
|
||||
# Look for embedded ivi player
|
||||
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
@@ -1602,6 +1634,11 @@ class GenericIE(InfoExtractor):
|
||||
if xhamster_urls:
|
||||
return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
|
||||
|
||||
# Look for embedded TNAFlixNetwork player
|
||||
tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
|
||||
if tnaflix_urls:
|
||||
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
@@ -1946,6 +1983,8 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
elif ext == 'mpd':
|
||||
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
|
||||
else:
|
||||
entry_info_dict['url'] = video_url
|
||||
|
||||
|
@@ -65,7 +65,7 @@ class GloboIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
class MD5:
|
||||
class MD5(object):
|
||||
HEX_FORMAT_LOWERCASE = 0
|
||||
HEX_FORMAT_UPPERCASE = 1
|
||||
BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = ''
|
||||
|
@@ -82,7 +82,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -11,8 +11,8 @@ class HentaiStigmaIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'inyouchuu-etsu-bonus',
|
||||
'ext': 'mp4',
|
||||
"title": "Inyouchuu Etsu Bonus",
|
||||
"age_limit": 18,
|
||||
'title': 'Inyouchuu Etsu Bonus',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -10,8 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class HotStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
||||
'info_dict': {
|
||||
'id': '1000076273',
|
||||
@@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/1000000515',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
||||
|
@@ -42,7 +42,7 @@ class ImdbIE(InfoExtractor):
|
||||
for f_url, f_name in extra_formats]
|
||||
format_pages.append(player_page)
|
||||
|
||||
quality = qualities(['SD', '480p', '720p'])
|
||||
quality = qualities(('SD', '480p', '720p', '1080p'))
|
||||
formats = []
|
||||
for format_page in format_pages:
|
||||
json_data = self._search_regex(
|
||||
|
@@ -4,15 +4,12 @@ from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_parse_qs,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import determine_ext
|
||||
from .bokecc import BokeCCBaseIE
|
||||
|
||||
|
||||
class InfoQIE(InfoExtractor):
|
||||
class InfoQIE(BokeCCBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -38,26 +35,6 @@ class InfoQIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_bokecc_videos(self, webpage, video_id):
|
||||
# TODO: bokecc.com is a Chinese video cloud platform
|
||||
# It should have an independent extractor but I don't have other
|
||||
# examples using bokecc
|
||||
player_params_str = self._html_search_regex(
|
||||
r'<script[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
|
||||
webpage, 'player params', default=None)
|
||||
|
||||
player_params = compat_parse_qs(player_params_str)
|
||||
|
||||
info_xml = self._download_xml(
|
||||
'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % (
|
||||
player_params['siteid'][0], player_params['vid'][0]), video_id)
|
||||
|
||||
return [{
|
||||
'format_id': 'bokecc',
|
||||
'url': quality.find('./copy').attrib['playurl'],
|
||||
'preference': int(quality.attrib['value']),
|
||||
} for quality in info_xml.findall('./video/quality')]
|
||||
|
||||
def _extract_rtmp_videos(self, webpage):
|
||||
# The server URL is hardcoded
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||
@@ -101,7 +78,7 @@ class InfoQIE(InfoExtractor):
|
||||
|
||||
if '/cn/' in url:
|
||||
# for China videos, HTTP video URL exists but always fails with 403
|
||||
formats = self._extract_bokecc_videos(webpage, video_id)
|
||||
formats = self._extract_bokecc_formats(webpage, video_id)
|
||||
else:
|
||||
formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
|
||||
|
||||
|
@@ -2,46 +2,30 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from random import random
|
||||
from math import floor
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
||||
'info_dict': {
|
||||
'id': 'p136534',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gondíci s. r. o. (34)',
|
||||
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||
'info_dict': {
|
||||
'id': '39152',
|
||||
'ext': 'flv',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45',
|
||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
|
||||
'info_dict': {
|
||||
'id': '9718337',
|
||||
'ext': 'flv',
|
||||
'title': 'Tchibo Partička - Jarní móda',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -51,62 +35,24 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
|
||||
raise ExtractorError(
|
||||
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
|
||||
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
||||
|
||||
player_url = (
|
||||
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
||||
(floor(random() * 1073741824), floor(random() * 1073741824))
|
||||
)
|
||||
|
||||
req = sanitized_Request(player_url)
|
||||
req = sanitized_Request(
|
||||
'http://play.iprima.cz/prehravac/init?_infuse=1'
|
||||
'&_ts=%s&productId=%s' % (round(time.time()), video_id))
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id)
|
||||
playerpage = self._download_webpage(req, video_id, note='Downloading player')
|
||||
|
||||
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||
m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url')
|
||||
|
||||
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||
if zoneGEO != '0':
|
||||
base_url = base_url.replace('token', 'token_' + zoneGEO)
|
||||
|
||||
formats = []
|
||||
for format_id in ['lq', 'hq', 'hd']:
|
||||
filename = self._html_search_regex(
|
||||
r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||
|
||||
if filename == 'null':
|
||||
continue
|
||||
|
||||
real_id = self._search_regex(
|
||||
r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
|
||||
filename, 'real video id')
|
||||
|
||||
if format_id == 'lq':
|
||||
quality = 0
|
||||
elif format_id == 'hq':
|
||||
quality = 1
|
||||
elif format_id == 'hd':
|
||||
quality = 2
|
||||
filename = 'hq/' + filename
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'quality': quality,
|
||||
'play_path': 'mp4:' + filename.replace('"', '')[:-4],
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'),
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._search_regex(
|
||||
r'<p[^>]+itemprop="description"[^>]*>([^<]+)',
|
||||
webpage, 'description', default=None),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
@@ -2,14 +2,163 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import itertools
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
ohdave_rsa_encrypt,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
def md5_text(text):
|
||||
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
class IqiyiSDK(object):
|
||||
def __init__(self, target, ip, timestamp):
|
||||
self.target = target
|
||||
self.ip = ip
|
||||
self.timestamp = timestamp
|
||||
|
||||
@staticmethod
|
||||
def split_sum(data):
|
||||
return compat_str(sum(map(lambda p: int(p, 16), list(data))))
|
||||
|
||||
@staticmethod
|
||||
def digit_sum(num):
|
||||
if isinstance(num, int):
|
||||
num = compat_str(num)
|
||||
return compat_str(sum(map(int, num)))
|
||||
|
||||
def even_odd(self):
|
||||
even = self.digit_sum(compat_str(self.timestamp)[::2])
|
||||
odd = self.digit_sum(compat_str(self.timestamp)[1::2])
|
||||
return even, odd
|
||||
|
||||
def preprocess(self, chunksize):
|
||||
self.target = md5_text(self.target)
|
||||
chunks = []
|
||||
for i in range(32 // chunksize):
|
||||
chunks.append(self.target[chunksize * i:chunksize * (i + 1)])
|
||||
if 32 % chunksize:
|
||||
chunks.append(self.target[32 - 32 % chunksize:])
|
||||
return chunks, list(map(int, self.ip.split('.')))
|
||||
|
||||
def mod(self, modulus):
|
||||
chunks, ip = self.preprocess(32)
|
||||
self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip))
|
||||
|
||||
def split(self, chunksize):
|
||||
modulus_map = {
|
||||
4: 256,
|
||||
5: 10,
|
||||
8: 100,
|
||||
}
|
||||
|
||||
chunks, ip = self.preprocess(chunksize)
|
||||
ret = ''
|
||||
for i in range(len(chunks)):
|
||||
ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else ''
|
||||
if chunksize == 8:
|
||||
ret += ip_part + chunks[i]
|
||||
else:
|
||||
ret += chunks[i] + ip_part
|
||||
self.target = ret
|
||||
|
||||
def handle_input16(self):
|
||||
self.target = md5_text(self.target)
|
||||
self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:])
|
||||
|
||||
def handle_input8(self):
|
||||
self.target = md5_text(self.target)
|
||||
ret = ''
|
||||
for i in range(4):
|
||||
part = self.target[8 * i:8 * (i + 1)]
|
||||
ret += self.split_sum(part) + part
|
||||
self.target = ret
|
||||
|
||||
def handleSum(self):
|
||||
self.target = md5_text(self.target)
|
||||
self.target = self.split_sum(self.target) + self.target
|
||||
|
||||
def date(self, scheme):
|
||||
self.target = md5_text(self.target)
|
||||
d = time.localtime(self.timestamp)
|
||||
strings = {
|
||||
'y': compat_str(d.tm_year),
|
||||
'm': '%02d' % d.tm_mon,
|
||||
'd': '%02d' % d.tm_mday,
|
||||
}
|
||||
self.target += ''.join(map(lambda c: strings[c], list(scheme)))
|
||||
|
||||
def split_time_even_odd(self):
|
||||
even, odd = self.even_odd()
|
||||
self.target = odd + md5_text(self.target) + even
|
||||
|
||||
def split_time_odd_even(self):
|
||||
even, odd = self.even_odd()
|
||||
self.target = even + md5_text(self.target) + odd
|
||||
|
||||
def split_ip_time_sum(self):
|
||||
chunks, ip = self.preprocess(32)
|
||||
self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp)
|
||||
|
||||
def split_time_ip_sum(self):
|
||||
chunks, ip = self.preprocess(32)
|
||||
self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip))
|
||||
|
||||
|
||||
class IqiyiSDKInterpreter(object):
|
||||
def __init__(self, sdk_code):
|
||||
self.sdk_code = sdk_code
|
||||
|
||||
def run(self, target, ip, timestamp):
|
||||
self.sdk_code = decode_packed_codes(self.sdk_code)
|
||||
|
||||
functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
|
||||
|
||||
sdk = IqiyiSDK(target, ip, timestamp)
|
||||
|
||||
other_functions = {
|
||||
'handleSum': sdk.handleSum,
|
||||
'handleInput8': sdk.handle_input8,
|
||||
'handleInput16': sdk.handle_input16,
|
||||
'splitTimeEvenOdd': sdk.split_time_even_odd,
|
||||
'splitTimeOddEven': sdk.split_time_odd_even,
|
||||
'splitIpTimeSum': sdk.split_ip_time_sum,
|
||||
'splitTimeIpSum': sdk.split_time_ip_sum,
|
||||
}
|
||||
for function in functions:
|
||||
if re.match(r'mod\d+', function):
|
||||
sdk.mod(int(function[3:]))
|
||||
elif re.match(r'date[ymd]{3}', function):
|
||||
sdk.date(function[4:])
|
||||
elif re.match(r'split\d+', function):
|
||||
sdk.split(int(function[5:]))
|
||||
elif function in other_functions:
|
||||
other_functions[function]()
|
||||
else:
|
||||
raise ExtractorError('Unknown funcion %s' % function)
|
||||
|
||||
return sdk.target
|
||||
|
||||
|
||||
class IqiyiIE(InfoExtractor):
|
||||
@@ -18,6 +167,8 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html'
|
||||
|
||||
_NETRC_MACHINE = 'iqiyi'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
|
||||
'md5': '2cb594dc2781e6c941a110d8f358118b',
|
||||
@@ -93,6 +244,35 @@ class IqiyiIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://yule.iqiyi.com/pcb.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# VIP-only video. The first 2 parts (6 minutes) are available without login
|
||||
# MD5 sums omitted as values are different on Travis CI and my machine
|
||||
'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
|
||||
'info_dict': {
|
||||
'id': 'f3cf468b39dddb30d676f89a91200dc1',
|
||||
'title': '泰坦尼克号',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'f3cf468b39dddb30d676f89a91200dc1_part1',
|
||||
'ext': 'f4v',
|
||||
'title': '泰坦尼克号',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'f3cf468b39dddb30d676f89a91200dc1_part2',
|
||||
'ext': 'f4v',
|
||||
'title': '泰坦尼克号',
|
||||
},
|
||||
}],
|
||||
'expected_warnings': ['Needs a VIP account for full video'],
|
||||
}, {
|
||||
'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
|
||||
'info_dict': {
|
||||
'id': '202918101',
|
||||
'title': '灌篮高手 国语版',
|
||||
},
|
||||
'playlist_count': 101,
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = [
|
||||
@@ -104,11 +284,98 @@ class IqiyiIE(InfoExtractor):
|
||||
('10', 'h1'),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def md5_text(text):
|
||||
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def construct_video_urls(self, data, video_id, _uuid):
|
||||
@staticmethod
|
||||
def _rsa_fun(data):
|
||||
# public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
|
||||
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
|
||||
e = 65537
|
||||
|
||||
return ohdave_rsa_encrypt(data, e, N)
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
|
||||
# No authentication to be performed
|
||||
if not username:
|
||||
return True
|
||||
|
||||
data = self._download_json(
|
||||
'http://kylin.iqiyi.com/get_token', None,
|
||||
note='Get token for logging', errnote='Unable to get token for logging')
|
||||
sdk = data['sdk']
|
||||
timestamp = int(time.time())
|
||||
target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % (
|
||||
username, self._rsa_fun(password.encode('utf-8')))
|
||||
|
||||
interp = IqiyiSDKInterpreter(sdk)
|
||||
sign = interp.run(target, data['ip'], timestamp)
|
||||
|
||||
validation_params = {
|
||||
'target': target,
|
||||
'server': 'BEA3AA1908656AABCCFF76582C4C6660',
|
||||
'token': data['token'],
|
||||
'bird_src': 'f8d91d57af224da7893dd397d52d811a',
|
||||
'sign': sign,
|
||||
'bird_t': timestamp,
|
||||
}
|
||||
validation_result = self._download_json(
|
||||
'http://kylin.iqiyi.com/validate?' + compat_urllib_parse.urlencode(validation_params), None,
|
||||
note='Validate credentials', errnote='Unable to validate credentials')
|
||||
|
||||
MSG_MAP = {
|
||||
'P00107': 'please login via the web interface and enter the CAPTCHA code',
|
||||
'P00117': 'bad username or password',
|
||||
}
|
||||
|
||||
code = validation_result['code']
|
||||
if code != 'A00000':
|
||||
msg = MSG_MAP.get(code)
|
||||
if not msg:
|
||||
msg = 'error %s' % code
|
||||
if validation_result.get('msg'):
|
||||
msg += ': ' + validation_result['msg']
|
||||
self._downloader.report_warning('unable to log in: ' + msg)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning):
|
||||
auth_params = {
|
||||
# version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
|
||||
'version': '2.0',
|
||||
'platform': 'b6c13e26323c537d',
|
||||
'aid': tvid,
|
||||
'tvid': tvid,
|
||||
'uid': '',
|
||||
'deviceId': _uuid,
|
||||
'playType': 'main', # XXX: always main?
|
||||
'filename': os.path.splitext(url_basename(api_video_url))[0],
|
||||
}
|
||||
|
||||
qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query)
|
||||
for key, val in qd_items.items():
|
||||
auth_params[key] = val[0]
|
||||
|
||||
auth_req = sanitized_Request(
|
||||
'http://api.vip.iqiyi.com/services/ckn.action',
|
||||
urlencode_postdata(auth_params))
|
||||
# iQiyi server throws HTTP 405 error without the following header
|
||||
auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
auth_result = self._download_json(
|
||||
auth_req, video_id,
|
||||
note='Downloading video authentication JSON',
|
||||
errnote='Unable to download video authentication JSON')
|
||||
if auth_result['code'] == 'Q00506': # requires a VIP account
|
||||
if do_report_warning:
|
||||
self.report_warning('Needs a VIP account for full video')
|
||||
return False
|
||||
|
||||
return auth_result
|
||||
|
||||
def construct_video_urls(self, data, video_id, _uuid, tvid):
|
||||
def do_xor(x, y):
|
||||
a = y % 3
|
||||
if a == 1:
|
||||
@@ -134,9 +401,10 @@ class IqiyiIE(InfoExtractor):
|
||||
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
|
||||
)['t']
|
||||
t = str(int(math.floor(int(tm) / (600.0))))
|
||||
return self.md5_text(t + mg + x)
|
||||
return md5_text(t + mg + x)
|
||||
|
||||
video_urls_dict = {}
|
||||
need_vip_warning_report = True
|
||||
for format_item in data['vp']['tkl'][0]['vs']:
|
||||
if 0 < int(format_item['bid']) <= 10:
|
||||
format_id = self.get_format(format_item['bid'])
|
||||
@@ -155,11 +423,13 @@ class IqiyiIE(InfoExtractor):
|
||||
vl = segment['l']
|
||||
if not vl.startswith('/'):
|
||||
vl = get_encode_code(vl)
|
||||
key = get_path_key(
|
||||
vl.split('/')[-1].split('.')[0], format_id, segment_index)
|
||||
is_vip_video = '/vip/' in vl
|
||||
filesize = segment['b']
|
||||
base_url = data['vp']['du'].split('/')
|
||||
base_url.insert(-1, key)
|
||||
if not is_vip_video:
|
||||
key = get_path_key(
|
||||
vl.split('/')[-1].split('.')[0], format_id, segment_index)
|
||||
base_url.insert(-1, key)
|
||||
base_url = '/'.join(base_url)
|
||||
param = {
|
||||
'su': _uuid,
|
||||
@@ -170,8 +440,23 @@ class IqiyiIE(InfoExtractor):
|
||||
'ct': '',
|
||||
'tn': str(int(time.time()))
|
||||
}
|
||||
api_video_url = base_url + vl + '?' + \
|
||||
compat_urllib_parse.urlencode(param)
|
||||
api_video_url = base_url + vl
|
||||
if is_vip_video:
|
||||
api_video_url = api_video_url.replace('.f4v', '.hml')
|
||||
auth_result = self._authenticate_vip_video(
|
||||
api_video_url, video_id, tvid, _uuid, need_vip_warning_report)
|
||||
if auth_result is False:
|
||||
need_vip_warning_report = False
|
||||
break
|
||||
param.update({
|
||||
't': auth_result['data']['t'],
|
||||
# cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
|
||||
'cid': 'afbe8fd3d73448c9',
|
||||
'vid': video_id,
|
||||
'QY00001': auth_result['data']['u'],
|
||||
})
|
||||
api_video_url += '?' if '?' not in api_video_url else '&'
|
||||
api_video_url += compat_urllib_parse.urlencode(param)
|
||||
js = self._download_json(
|
||||
api_video_url, video_id,
|
||||
note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
|
||||
@@ -195,16 +480,17 @@ class IqiyiIE(InfoExtractor):
|
||||
tail = tm + tvid
|
||||
param = {
|
||||
'key': 'fvip',
|
||||
'src': self.md5_text('youtube-dl'),
|
||||
'src': md5_text('youtube-dl'),
|
||||
'tvId': tvid,
|
||||
'vid': video_id,
|
||||
'vinfo': 1,
|
||||
'tm': tm,
|
||||
'enc': self.md5_text(enc_key + tail),
|
||||
'enc': md5_text(enc_key + tail),
|
||||
'qyid': _uuid,
|
||||
'tn': random.random(),
|
||||
'um': 0,
|
||||
'authkey': self.md5_text(self.md5_text('') + tail),
|
||||
'authkey': md5_text(md5_text('') + tail),
|
||||
'k_tag': 1,
|
||||
}
|
||||
|
||||
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
|
||||
@@ -218,9 +504,49 @@ class IqiyiIE(InfoExtractor):
|
||||
enc_key = '6ab6d0280511493ba85594779759d4ed'
|
||||
return enc_key
|
||||
|
||||
def _extract_playlist(self, webpage):
|
||||
PAGE_SIZE = 50
|
||||
|
||||
links = re.findall(
|
||||
r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
|
||||
webpage)
|
||||
if not links:
|
||||
return
|
||||
|
||||
album_id = self._search_regex(
|
||||
r'albumId\s*:\s*(\d+),', webpage, 'album ID')
|
||||
album_title = self._search_regex(
|
||||
r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False)
|
||||
|
||||
entries = list(map(self.url_result, links))
|
||||
|
||||
# Start from 2 because links in the first page are already on webpage
|
||||
for page_num in itertools.count(2):
|
||||
pagelist_page = self._download_webpage(
|
||||
'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE),
|
||||
album_id,
|
||||
note='Download playlist page %d' % page_num,
|
||||
errnote='Failed to download playlist page %d' % page_num)
|
||||
pagelist = self._parse_json(
|
||||
remove_start(pagelist_page, 'var tvInfoJs='), album_id)
|
||||
vlist = pagelist['data']['vlist']
|
||||
for item in vlist:
|
||||
entries.append(self.url_result(item['vurl']))
|
||||
if len(vlist) < PAGE_SIZE:
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, album_id, album_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(
|
||||
url, 'temp_id', note='download video page')
|
||||
|
||||
# There's no simple way to determine whether an URL is a playlist or not
|
||||
# So detect it
|
||||
playlist_result = self._extract_playlist(webpage)
|
||||
if playlist_result:
|
||||
return playlist_result
|
||||
|
||||
tvid = self._search_regex(
|
||||
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
|
||||
video_id = self._search_regex(
|
||||
@@ -236,16 +562,13 @@ class IqiyiIE(InfoExtractor):
|
||||
if raw_data['code'] != 'A000000':
|
||||
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
|
||||
|
||||
if not raw_data['data']['vp']['tkl']:
|
||||
raise ExtractorError('No support iQiqy VIP video')
|
||||
|
||||
data = raw_data['data']
|
||||
|
||||
title = data['vi']['vn']
|
||||
|
||||
# generate video_urls_dict
|
||||
video_urls_dict = self.construct_video_urls(
|
||||
data, video_id, _uuid)
|
||||
data, video_id, _uuid, tvid)
|
||||
|
||||
# construct info
|
||||
entries = []
|
||||
|
@@ -7,7 +7,46 @@ from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
class JWPlatformBaseIE(InfoExtractor):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
|
||||
video_data = jwplayer_data['playlist'][0]
|
||||
subtitles = {}
|
||||
for track in video_data['tracks']:
|
||||
if track['kind'] == 'captions':
|
||||
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_type = source.get('type') or ''
|
||||
if source_type in ('application/vnd.apple.mpegurl', 'hls'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
|
||||
elif source_type.startswith('audio'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class JWPlatformIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TEST = {
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
@@ -33,38 +72,4 @@ class JWPlatformIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
|
||||
video_data = json_data['playlist'][0]
|
||||
subtitles = {}
|
||||
for track in video_data['tracks']:
|
||||
if track['kind'] == 'captions':
|
||||
subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_type = source.get('type') or ''
|
||||
if source_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
|
||||
elif source_type.startswith('audio'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'],
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
return self._parse_jwplayer_data(json_data, video_id)
|
||||
|
@@ -28,7 +28,7 @@ class KankanIE(InfoExtractor):
|
||||
|
||||
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
|
||||
surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
|
||||
gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
|
||||
gcids = re.findall(r'http://.+?/.+?/(.+?)/', surls)
|
||||
gcid = gcids[-1]
|
||||
|
||||
info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
|
||||
|
107
youtube_dl/extractor/konserthusetplay.py
Normal file
107
youtube_dl/extractor/konserthusetplay.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KonserthusetPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
|
||||
'info_dict': {
|
||||
'id': 'CKDDnlCY-dhWAAqiMERd-A',
|
||||
'ext': 'flv',
|
||||
'title': 'Orkesterns instrument: Valthornen',
|
||||
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'duration': 398.8,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
e = self._search_regex(
|
||||
r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
|
||||
|
||||
rest = self._download_json(
|
||||
'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
|
||||
video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||
|
||||
media = rest['media']
|
||||
player_config = media['playerconfig']
|
||||
playlist = player_config['playlist']
|
||||
|
||||
source = next(f for f in playlist if f.get('bitrates'))
|
||||
|
||||
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
|
||||
|
||||
formats = []
|
||||
|
||||
fallback_url = source.get('fallbackUrl')
|
||||
fallback_format_id = None
|
||||
if fallback_url:
|
||||
fallback_format_id = self._search_regex(
|
||||
FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
|
||||
|
||||
connection_url = (player_config.get('rtmp', {}).get(
|
||||
'netConnectionUrl') or player_config.get(
|
||||
'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
|
||||
if connection_url:
|
||||
for f in source['bitrates']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
FORMAT_ID_REGEX, video_url, 'format id', default=None)
|
||||
f_common = {
|
||||
'vbr': int_or_none(f.get('bitrate')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
}
|
||||
f = f_common.copy()
|
||||
f.update({
|
||||
'url': connection_url,
|
||||
'play_path': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(f)
|
||||
if format_id and format_id == fallback_format_id:
|
||||
f = f_common.copy()
|
||||
f.update({
|
||||
'url': fallback_url,
|
||||
'format_id': 'http-%s' % format_id if format_id else 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
if not formats and fallback_url:
|
||||
formats.append({
|
||||
'url': fallback_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = player_config.get('title') or media['title']
|
||||
description = player_config.get('mediaInfo', {}).get('description')
|
||||
thumbnail = media.get('image')
|
||||
duration = float_or_none(media.get('duration'), 1000)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -31,6 +31,10 @@ class KuwoBaseIE(InfoExtractor):
|
||||
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
)
|
||||
|
||||
if song_url == 'IPDeny':
|
||||
raise ExtractorError('This song is blocked in this region', expected=True)
|
||||
|
||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
@@ -64,6 +68,7 @@ class KuwoIE(KuwoBaseIE):
|
||||
'id': '6446136',
|
||||
'ext': 'mp3',
|
||||
'title': '心',
|
||||
'description': 'md5:b2ab6295d014005bfc607525bfc1e38a',
|
||||
'creator': 'IU',
|
||||
'upload_date': '20150518',
|
||||
},
|
||||
|
@@ -1,86 +1,125 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class Laola1TvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/[^/]+/(?P<slug>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||
'info_dict': {
|
||||
'id': '227883',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'straubing-tigers-koelner-haie',
|
||||
'ext': 'flv',
|
||||
'title': 'Straubing Tigers - Kölner Haie',
|
||||
'categories': ['Eishockey'],
|
||||
'upload_date': '20140912',
|
||||
'is_live': False,
|
||||
'categories': ['Eishockey'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
|
||||
'info_dict': {
|
||||
'id': '464602',
|
||||
'display_id': 'straubing-tigers-koelner-haie',
|
||||
'ext': 'flv',
|
||||
'title': 'Straubing Tigers - Kölner Haie',
|
||||
'upload_date': '20160129',
|
||||
'is_live': False,
|
||||
'categories': ['Eishockey'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('slug')
|
||||
lang = mobj.group('lang')
|
||||
portal = mobj.group('portal')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]*?class="main_tv_player"[^>]*?src="([^"]+)"',
|
||||
webpage, 'iframe URL')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
iframe = self._download_webpage(
|
||||
iframe_url, video_id, note='Downloading iframe')
|
||||
flashvars_m = re.findall(
|
||||
r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
|
||||
flashvars = dict((m[0], m[1]) for m in flashvars_m)
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
|
||||
webpage, 'iframe url')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'videoid=(\d+)', iframe_url, 'video id')
|
||||
|
||||
iframe = self._download_webpage(compat_urlparse.urljoin(
|
||||
url, iframe_url), display_id, 'Downloading iframe')
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
|
||||
r'partnerid\s*:\s*(["\'])(?P<partner_id>.+?)\1',
|
||||
iframe, 'partner id', group='partner_id')
|
||||
|
||||
xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
|
||||
'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
|
||||
video_id, partner_id, portal, lang))
|
||||
hd_doc = self._download_xml(xml_url, video_id)
|
||||
hd_doc = self._download_xml(
|
||||
'http://www.laola1.tv/server/hd_video.php?%s'
|
||||
% compat_urllib_parse.urlencode({
|
||||
'play': video_id,
|
||||
'partner': partner_id,
|
||||
'portal': portal,
|
||||
'lang': lang,
|
||||
'v5ident': '',
|
||||
}), display_id)
|
||||
|
||||
title = xpath_text(hd_doc, './/video/title', fatal=True)
|
||||
flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
|
||||
uploader = xpath_text(hd_doc, './/video/meta_organistation')
|
||||
is_live = xpath_text(hd_doc, './/video/islive') == 'true'
|
||||
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
|
||||
title = _v('title', fatal=True)
|
||||
|
||||
categories = xpath_text(hd_doc, './/video/meta_sports')
|
||||
if categories:
|
||||
categories = categories.split(',')
|
||||
req = sanitized_Request(
|
||||
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
|
||||
compat_urllib_parse.urlencode({
|
||||
'videoId': video_id,
|
||||
'target': '2',
|
||||
'label': 'laola1tv',
|
||||
'area': _v('area'),
|
||||
}),
|
||||
urlencode_postdata(
|
||||
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))))
|
||||
|
||||
ident = random.randint(10000000, 99999999)
|
||||
token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % (
|
||||
flash_url, ident, flashvars['timestamp'], flashvars['auth'])
|
||||
token_url = self._download_json(req, display_id)['data']['stream-access'][0]
|
||||
token_doc = self._download_xml(token_url, display_id, 'Downloading token')
|
||||
|
||||
token_doc = self._download_xml(
|
||||
token_url, video_id, note='Downloading token')
|
||||
token_attrib = token_doc.find('.//token').attrib
|
||||
if token_attrib.get('auth') in ('blocked', 'restricted'):
|
||||
token_attrib = xpath_element(token_doc, './/token').attrib
|
||||
token_auth = token_attrib['auth']
|
||||
|
||||
if token_auth in ('blocked', 'restricted', 'error'):
|
||||
raise ExtractorError(
|
||||
'Token error: %s' % token_attrib.get('comment'), expected=True)
|
||||
'Token error: %s' % token_attrib['comment'], expected=True)
|
||||
|
||||
video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
|
||||
token_attrib['url'], token_attrib['auth'])
|
||||
formats = self._extract_f4m_formats(
|
||||
'%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
|
||||
video_id, f4m_id='hds')
|
||||
|
||||
categories_str = _v('meta_sports')
|
||||
categories = categories_str.split(',') if categories_str else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'is_live': is_live,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'uploader': uploader,
|
||||
'upload_date': unified_strdate(_v('time_date')),
|
||||
'uploader': _v('meta_organisation'),
|
||||
'categories': categories,
|
||||
'ext': 'mp4',
|
||||
'is_live': _v('islive') == 'true',
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -94,17 +94,16 @@ class LetvIE(InfoExtractor):
|
||||
return encrypted_data
|
||||
encrypted_data = encrypted_data[5:]
|
||||
|
||||
_loc4_ = bytearray()
|
||||
while encrypted_data:
|
||||
b = compat_ord(encrypted_data[0])
|
||||
_loc4_.extend([b // 16, b & 0x0f])
|
||||
encrypted_data = encrypted_data[1:]
|
||||
_loc4_ = bytearray(2 * len(encrypted_data))
|
||||
for idx, val in enumerate(encrypted_data):
|
||||
b = compat_ord(val)
|
||||
_loc4_[2 * idx] = b // 16
|
||||
_loc4_[2 * idx + 1] = b % 16
|
||||
idx = len(_loc4_) - 11
|
||||
_loc4_ = _loc4_[idx:] + _loc4_[:idx]
|
||||
_loc7_ = bytearray()
|
||||
while _loc4_:
|
||||
_loc7_.append(_loc4_[0] * 16 + _loc4_[1])
|
||||
_loc4_ = _loc4_[2:]
|
||||
_loc7_ = bytearray(len(encrypted_data))
|
||||
for i in range(len(encrypted_data)):
|
||||
_loc7_[i] = _loc4_[2 * i] * 16 + _loc4_[2 * i + 1]
|
||||
|
||||
return bytes(_loc7_)
|
||||
|
||||
|
@@ -47,7 +47,7 @@ class LiveLeakIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '801_1409392012',
|
||||
'ext': 'mp4',
|
||||
'description': "Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.",
|
||||
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
|
||||
'uploader': 'bony333',
|
||||
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia'
|
||||
}
|
||||
|
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class MailRuIE(InfoExtractor):
|
||||
@@ -34,14 +38,30 @@ class MailRuIE(InfoExtractor):
|
||||
'id': '46843144_1263',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||
'timestamp': 1397217632,
|
||||
'upload_date': '20140411',
|
||||
'uploader': 'hitech',
|
||||
'timestamp': 1397039888,
|
||||
'upload_date': '20140409',
|
||||
'uploader': 'hitech@corp.mail.ru',
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
{
|
||||
# only available via metaUrl API
|
||||
'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
|
||||
'md5': '3b26d2491c6949d031a32b96bd97c096',
|
||||
'info_dict': {
|
||||
'id': '56664382_502',
|
||||
'ext': 'mp4',
|
||||
'title': ':8336',
|
||||
'timestamp': 1449094163,
|
||||
'upload_date': '20151202',
|
||||
'uploader': '720pizle@mail.ru',
|
||||
'uploader_id': '720pizle@mail.ru',
|
||||
'duration': 6001,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -51,32 +71,55 @@ class MailRuIE(InfoExtractor):
|
||||
if not video_id:
|
||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
author = video_data['author']
|
||||
uploader = author['name']
|
||||
uploader_id = author.get('id') or author.get('email')
|
||||
view_count = video_data.get('views_count')
|
||||
video_data = None
|
||||
|
||||
page_config = self._parse_json(self._search_regex(
|
||||
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||
if page_config:
|
||||
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
|
||||
if meta_url:
|
||||
video_data = self._download_json(
|
||||
meta_url, video_id, 'Downloading video meta JSON', fatal=False)
|
||||
|
||||
# Fallback old approach
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
for f in video_data['videos']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = f.get('key')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta_data = video_data['meta']
|
||||
content_id = '%s_%s' % (
|
||||
meta_data.get('accId', ''), meta_data['itemId'])
|
||||
title = meta_data['title']
|
||||
if title.endswith('.mp4'):
|
||||
title = title[:-4]
|
||||
thumbnail = meta_data['poster']
|
||||
duration = meta_data['duration']
|
||||
timestamp = meta_data['timestamp']
|
||||
title = remove_end(meta_data['title'], '.mp4')
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['key'],
|
||||
'height': int(video['key'].rstrip('p'))
|
||||
} for video in video_data['videos']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
author = video_data.get('author')
|
||||
uploader = author.get('name')
|
||||
uploader_id = author.get('id') or author.get('email')
|
||||
view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
|
||||
|
||||
acc_id = meta_data.get('accId')
|
||||
item_id = meta_data.get('itemId')
|
||||
content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
|
||||
|
||||
thumbnail = meta_data.get('poster')
|
||||
duration = int_or_none(meta_data.get('duration'))
|
||||
timestamp = int_or_none(meta_data.get('timestamp'))
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_TESTS = [{
|
||||
# MDR regularly deletes its videos
|
||||
@@ -60,6 +60,9 @@ class MDRIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -68,8 +71,8 @@ class MDRIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = self._search_regex(
|
||||
r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
|
||||
webpage, 'data url', group='url')
|
||||
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
|
||||
webpage, 'data url', default=None, group='url').replace('\/', '/')
|
||||
|
||||
doc = self._download_xml(
|
||||
compat_urlparse.urljoin(url, data_url), video_id)
|
||||
|
@@ -38,7 +38,7 @@ class MofosexIE(InfoExtractor):
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
format = '-'.join(format)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
|
@@ -5,6 +5,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -12,55 +13,62 @@ from ..utils import (
|
||||
|
||||
class MotherlessIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://motherless.com/AC3FFE1',
|
||||
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
|
||||
'info_dict': {
|
||||
'id': 'AC3FFE1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fucked in the ass while playing PS3',
|
||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||
'upload_date': '20100913',
|
||||
'uploader_id': 'famouslyfuckedup',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://motherless.com/532291B',
|
||||
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
|
||||
'info_dict': {
|
||||
'id': '532291B',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amazing girl playing the omegle game, PERFECT!',
|
||||
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
|
||||
'upload_date': '20140622',
|
||||
'uploader_id': 'Sulivana7x',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://motherless.com/g/cosplay/633979F',
|
||||
'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
|
||||
'info_dict': {
|
||||
'id': '633979F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turtlette',
|
||||
'categories': ['superheroine heroine superher'],
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': 'shade0230',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/AC3FFE1',
|
||||
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
|
||||
'info_dict': {
|
||||
'id': 'AC3FFE1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fucked in the ass while playing PS3',
|
||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||
'upload_date': '20100913',
|
||||
'uploader_id': 'famouslyfuckedup',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'http://motherless.com/532291B',
|
||||
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
|
||||
'info_dict': {
|
||||
'id': '532291B',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amazing girl playing the omegle game, PERFECT!',
|
||||
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
|
||||
'game', 'hairy'],
|
||||
'upload_date': '20140622',
|
||||
'uploader_id': 'Sulivana7x',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': '404',
|
||||
}, {
|
||||
'url': 'http://motherless.com/g/cosplay/633979F',
|
||||
'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
|
||||
'info_dict': {
|
||||
'id': '633979F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turtlette',
|
||||
'categories': ['superheroine heroine superher'],
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': 'shade0230',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
# no keywords
|
||||
'url': 'http://motherless.com/8B4BBC1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if any(p in webpage for p in (
|
||||
'<title>404 - MOTHERLESS.COM<',
|
||||
">The page you're looking for cannot be found.<")):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
@@ -86,7 +94,7 @@ class MotherlessIE(InfoExtractor):
|
||||
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
|
||||
webpage, 'uploader_id')
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage)
|
||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||
if categories:
|
||||
categories = [cat.strip() for cat in categories.split(',')]
|
||||
|
||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
@@ -110,7 +111,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
uri = itemdoc.find('guid').text
|
||||
video_id = self._id_from_uri(uri)
|
||||
self.report_extraction(video_id)
|
||||
mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url']
|
||||
content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
|
||||
mediagen_url = content_el.attrib['url']
|
||||
# Remove the templates, like &device={device}
|
||||
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
|
||||
if 'acceptMethods' not in mediagen_url:
|
||||
@@ -165,6 +167,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
'id': video_id,
|
||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||
'description': description,
|
||||
'duration': float_or_none(content_el.attrib.get('duration')),
|
||||
}
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
|
@@ -18,8 +18,8 @@ class MySpassIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '11741',
|
||||
'ext': 'mp4',
|
||||
"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
|
||||
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
|
||||
'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
|
||||
'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2',
|
||||
},
|
||||
}
|
||||
|
||||
|
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MyVideoIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
|
||||
IE_NAME = 'myvideo'
|
||||
_TEST = {
|
||||
|
@@ -1,18 +1,26 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
xpath_text,
|
||||
xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)?video/(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||
@@ -44,14 +52,101 @@ class NBAIE(InfoExtractor):
|
||||
'timestamp': 1432134543,
|
||||
'upload_date': '20150520',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
||||
'info_dict': {
|
||||
'id': '1455672027478-Doc_Feb16_720',
|
||||
'ext': 'mp4',
|
||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
||||
'upload_date': '20160217',
|
||||
'timestamp': 1455672000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'timberwolves',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'params': {
|
||||
# Download the whole playlist takes too long time
|
||||
'playlist_items': '1-30',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'Wigginsmp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
||||
'upload_date': '20141212',
|
||||
'timestamp': 1418418600,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
|
||||
def _fetch_page(self, team, video_id, page):
|
||||
search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse.urlencode({
|
||||
'type': 'teamvideo',
|
||||
'start': page * self._PAGE_SIZE + 1,
|
||||
'npp': (page + 1) * self._PAGE_SIZE + 1,
|
||||
'sort': 'recent',
|
||||
'output': 'json',
|
||||
'site': team,
|
||||
})
|
||||
results = self._download_json(
|
||||
search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
|
||||
for item in results:
|
||||
yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
|
||||
|
||||
def _extract_playlist(self, orig_path, video_id, webpage):
|
||||
team = orig_path.split('/')[0]
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video because of --no-playlist')
|
||||
video_path = self._search_regex(
|
||||
r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
|
||||
video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
|
||||
return self.url_result(video_url)
|
||||
|
||||
self.to_screen('Downloading playlist - add --no-playlist to just download video')
|
||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, video_id),
|
||||
self._PAGE_SIZE, use_cache=True)
|
||||
|
||||
return self.playlist_result(entries, team, playlist_title)
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
orig_path = path
|
||||
if path.startswith('nba/'):
|
||||
path = path[3:]
|
||||
|
||||
if 'video/' not in path:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
|
||||
|
||||
if path == '{{id}}':
|
||||
return self._extract_playlist(orig_path, video_id, webpage)
|
||||
|
||||
# See prepareContentId() of pkgCvp.js
|
||||
if path.startswith('video/teams'):
|
||||
path = 'video/channels/proxy/' + path[6:]
|
||||
|
||||
video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
|
||||
video_id = xpath_text(video_info, 'slug')
|
||||
video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0]
|
||||
title = xpath_text(video_info, 'headline')
|
||||
description = xpath_text(video_info, 'description')
|
||||
duration = parse_duration(xpath_text(video_info, 'length'))
|
||||
|
@@ -57,7 +57,7 @@ class NBCIE(InfoExtractor):
|
||||
{
|
||||
# This video has expired but with an escaped embedURL
|
||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||
'skip': 'Expired'
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@@ -18,14 +18,14 @@ class NerdCubedFeedIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed = self._download_json(url, url, "Downloading NerdCubed JSON feed")
|
||||
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed')
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'title': feed_entry['title'],
|
||||
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
|
||||
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
|
||||
'url': "http://www.youtube.com/watch?v=" + feed_entry['youtube_id'],
|
||||
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
|
||||
} for feed_entry in feed]
|
||||
|
||||
return {
|
||||
|
69
youtube_dl/extractor/noz.py
Normal file
69
youtube_dl/extractor/noz.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class NozIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?noz\.de/video/(?P<id>[0-9]+)/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.noz.de/video/25151/32-Deutschland-gewinnt-Badminton-Lnderspiel-in-Melle',
|
||||
'info_dict': {
|
||||
'id': '25151',
|
||||
'ext': 'mp4',
|
||||
'duration': 215,
|
||||
'title': '3:2 - Deutschland gewinnt Badminton-Länderspiel in Melle',
|
||||
'description': 'Vor rund 370 Zuschauern gewinnt die deutsche Badminton-Nationalmannschaft am Donnerstag ein EM-Vorbereitungsspiel gegen Frankreich in Melle. Video Moritz Frankenberg.',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
edge_url = self._html_search_regex(
|
||||
r'<script\s+(?:type="text/javascript"\s+)?src="(.*?/videojs_.*?)"',
|
||||
webpage, 'edge URL')
|
||||
edge_content = self._download_webpage(edge_url, 'meta configuration')
|
||||
|
||||
config_url_encoded = self._search_regex(
|
||||
r'so\.addVariable\("config_url","[^,]*,(.*?)"',
|
||||
edge_content, 'config URL'
|
||||
)
|
||||
config_url = compat_urllib_parse_unquote(config_url_encoded)
|
||||
|
||||
doc = self._download_xml(config_url, 'video configuration')
|
||||
title = xpath_text(doc, './/title')
|
||||
thumbnail = xpath_text(doc, './/article/thumbnail/url')
|
||||
duration = int_or_none(xpath_text(
|
||||
doc, './/article/movie/file/duration'))
|
||||
formats = []
|
||||
for qnode in doc.findall('.//article/movie/file/qualities/qual'):
|
||||
video_node = qnode.find('./html_urls/video_url[@format="video/mp4"]')
|
||||
if video_node is None:
|
||||
continue # auto
|
||||
formats.append({
|
||||
'url': video_node.text,
|
||||
'format_name': xpath_text(qnode, './name'),
|
||||
'format_id': xpath_text(qnode, './id'),
|
||||
'height': int_or_none(xpath_text(qnode, './height')),
|
||||
'width': int_or_none(xpath_text(qnode, './width')),
|
||||
'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -112,6 +112,7 @@ class ORFTVthekIE(InfoExtractor):
|
||||
% geo_str),
|
||||
fatal=False)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
upload_date = unified_strdate(sd['created_date'])
|
||||
|
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
@@ -199,7 +201,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||
'duration': 3190,
|
||||
},
|
||||
'params': {
|
||||
@@ -213,7 +215,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||
'duration': 5050,
|
||||
},
|
||||
'params': {
|
||||
@@ -227,7 +229,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2201174722',
|
||||
'ext': 'mp4',
|
||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
||||
'description': 'md5:95a19f568689d09a166dff9edada3301',
|
||||
'duration': 801,
|
||||
},
|
||||
},
|
||||
@@ -237,8 +239,8 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2365297708',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
||||
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -278,7 +280,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'player',
|
||||
'ext': 'mp4',
|
||||
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
||||
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
|
||||
'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
|
||||
'duration': 682,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -287,20 +289,19 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.pbs.org/video/2365367186/',
|
||||
'url': 'http://www.pbs.org/video/2365245528/',
|
||||
'info_dict': {
|
||||
'id': '2365367186',
|
||||
'display_id': '2365367186',
|
||||
'id': '2365245528',
|
||||
'display_id': '2365245528',
|
||||
'ext': 'mp4',
|
||||
'title': 'To Catch A Comet - Full Episode',
|
||||
'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
|
||||
'duration': 3342,
|
||||
'title': 'FRONTLINE - United States of Secrets (Part One)',
|
||||
'description': 'md5:55756bd5c551519cc4b7703e373e217e',
|
||||
'duration': 6851,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
'skip': 'Expired',
|
||||
},
|
||||
{
|
||||
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
||||
@@ -312,7 +313,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||
'ext': 'mp4',
|
||||
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
||||
'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
|
||||
'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
|
||||
'duration': 1480,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -328,7 +329,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'the-atomic-artists',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - The Atomic Artists',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
|
||||
'duration': 723,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@@ -336,6 +337,21 @@ class PBSIE(InfoExtractor):
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
# Serves hd only via wigget/partnerplayer page
|
||||
'url': 'http://www.pbs.org/video/2365641075/',
|
||||
'info_dict': {
|
||||
'id': '2365641075',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - Netanyahu at War',
|
||||
'duration': 6852,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'formats': 'mincount:8',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||
'only_matching': True,
|
||||
@@ -365,10 +381,14 @@ class PBSIE(InfoExtractor):
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
# tabbed frontline videos
|
||||
tabbed_videos = re.findall(
|
||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date
|
||||
MULTI_PART_REGEXES = (
|
||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
|
||||
r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
|
||||
)
|
||||
for p in MULTI_PART_REGEXES:
|
||||
tabbed_videos = re.findall(p, webpage)
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date
|
||||
|
||||
MEDIA_ID_REGEXES = [
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
@@ -432,22 +452,54 @@ class PBSIE(InfoExtractor):
|
||||
for vid_id in video_id]
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id)
|
||||
info = None
|
||||
redirects = []
|
||||
redirect_urls = set()
|
||||
|
||||
def extract_redirect_urls(info):
|
||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||
redirect = info.get(encoding_name)
|
||||
if not redirect:
|
||||
continue
|
||||
redirect_url = redirect.get('url')
|
||||
if redirect_url and redirect_url not in redirect_urls:
|
||||
redirects.append(redirect)
|
||||
redirect_urls.add(redirect_url)
|
||||
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id, 'Downloading video info JSON')
|
||||
extract_redirect_urls(video_info)
|
||||
info = video_info
|
||||
except ExtractorError as e:
|
||||
# videoInfo API may not work for some videos
|
||||
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
|
||||
raise
|
||||
|
||||
# Player pages may also serve different qualities
|
||||
for page in ('widget/partnerplayer', 'portalplayer'):
|
||||
player = self._download_webpage(
|
||||
'http://player.pbs.org/%s/%s' % (page, video_id),
|
||||
display_id, 'Downloading %s page' % page, fatal=False)
|
||||
if player:
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
player, '%s video data' % page, default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
if video_info:
|
||||
extract_redirect_urls(video_info)
|
||||
if not info:
|
||||
info = video_info
|
||||
|
||||
formats = []
|
||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||
redirect = info.get(encoding_name)
|
||||
if not redirect:
|
||||
continue
|
||||
redirect_url = redirect.get('url')
|
||||
if not redirect_url:
|
||||
continue
|
||||
for num, redirect in enumerate(redirects):
|
||||
redirect_id = redirect.get('eeid')
|
||||
|
||||
redirect_info = self._download_json(
|
||||
redirect_url + '?format=json', display_id,
|
||||
'Downloading %s video url info' % encoding_name)
|
||||
'%s?format=json' % redirect['url'], display_id,
|
||||
'Downloading %s video url info' % (redirect_id or num))
|
||||
|
||||
if redirect_info['status'] == 'error':
|
||||
raise ExtractorError(
|
||||
@@ -466,8 +518,9 @@ class PBSIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': redirect.get('eeid'),
|
||||
'format_id': redirect_id,
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
rating_str = info.get('rating')
|
||||
@@ -493,7 +546,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'description': info['program'].get('description'),
|
||||
'description': info.get('description') or info.get('program', {}).get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'age_limit': age_limit,
|
||||
|
51
youtube_dl/extractor/plays.py
Normal file
51
youtube_dl/extractor/plays.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PlaysTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
|
||||
_TEST = {
|
||||
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
|
||||
'md5': 'dfeac1198506652b5257a62762cec7bc',
|
||||
'info_dict': {
|
||||
'id': '56af17f56c95335490',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you outplay the Azir wall',
|
||||
'description': 'Posted by Bjergsen',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
content = self._parse_json(
|
||||
self._search_regex(
|
||||
r'R\.bindContent\(({.+?})\);', webpage,
|
||||
'content'), video_id)['content']
|
||||
mpd_url, sources = re.search(
|
||||
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
|
||||
content).groups()
|
||||
formats = self._extract_mpd_formats(
|
||||
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
|
||||
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(format_url),
|
||||
'format_id': 'http-' + format_id,
|
||||
'height': int_or_none(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
@@ -11,6 +11,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
)
|
||||
@@ -23,13 +24,18 @@ class PornHubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
'md5': '882f488fa1f0026f023f33576004a2ed',
|
||||
'md5': '1e19b41231a02eba417839222ac9d58e',
|
||||
'info_dict': {
|
||||
'id': '648719015',
|
||||
'ext': 'mp4',
|
||||
"uploader": "Babes",
|
||||
"title": "Seductive Indian beauty strips down and fingers her pink pussy",
|
||||
"age_limit": 18
|
||||
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
|
||||
'uploader': 'Babes',
|
||||
'duration': 361,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
@@ -67,13 +73,23 @@ class PornHubIE(InfoExtractor):
|
||||
'PornHub said: %s' % error_msg,
|
||||
expected=True, video_id=video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashv1ars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
video_id)
|
||||
if flashvars:
|
||||
video_title = flashvars.get('video_title')
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
else:
|
||||
video_title, thumbnail, duration = [None] * 3
|
||||
|
||||
if not video_title:
|
||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = compat_urllib_parse_unquote(thumbnail)
|
||||
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||
@@ -95,7 +111,7 @@ class PornHubIE(InfoExtractor):
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
format = "-".join(format)
|
||||
format = '-'.join(format)
|
||||
|
||||
m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format)
|
||||
if m is None:
|
||||
@@ -120,6 +136,7 @@ class PornHubIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
@@ -129,7 +146,31 @@ class PornHubIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class PornHubPlaylistIE(InfoExtractor):
|
||||
class PornHubPlaylistBaseIE(InfoExtractor):
|
||||
def _extract_entries(self, webpage):
|
||||
return [
|
||||
self.url_result('http://www.pornhub.com/%s' % video_url, PornHubIE.ie_key())
|
||||
for video_url in set(re.findall(
|
||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage))
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = self._extract_entries(webpage)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
||||
playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
||||
|
||||
|
||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/playlist/6201671',
|
||||
@@ -140,21 +181,20 @@ class PornHubPlaylistIE(InfoExtractor):
|
||||
'playlist_mincount': 35,
|
||||
}]
|
||||
|
||||
|
||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/users/rushandlia/videos',
|
||||
'info_dict': {
|
||||
'id': 'rushandlia',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
user_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
|
||||
for video_url in set(re.findall(
|
||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage))
|
||||
]
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
||||
playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
||||
return self.playlist_result(self._extract_entries(webpage), user_id)
|
||||
|
@@ -56,7 +56,7 @@ class PornoVoisinesIE(InfoExtractor):
|
||||
r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
|
||||
description = self._html_search_regex(
|
||||
r'<article id="descriptif">(.+?)</article>',
|
||||
webpage, "description", fatal=False, flags=re.DOTALL)
|
||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
|
||||
|
@@ -28,16 +28,16 @@ class RadioBremenIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id
|
||||
meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id
|
||||
meta_doc = self._download_webpage(
|
||||
meta_url, video_id, 'Downloading metadata')
|
||||
title = self._html_search_regex(
|
||||
r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title")
|
||||
r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title')
|
||||
description = self._html_search_regex(
|
||||
r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False)
|
||||
r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r"Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>",
|
||||
meta_doc, "duration", fatal=False))
|
||||
r'Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>',
|
||||
meta_doc, 'duration', fatal=False))
|
||||
|
||||
page_doc = self._download_webpage(
|
||||
url, video_id, 'Downloading video information')
|
||||
@@ -51,7 +51,7 @@ class RadioBremenIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'width': int(mobj.group("width")),
|
||||
'width': int(mobj.group('width')),
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -16,9 +16,9 @@ class RadioFranceIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'one-one',
|
||||
'ext': 'ogg',
|
||||
"title": "One to one",
|
||||
"description": "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
||||
"uploader": "Thomas Hercouët",
|
||||
'title': 'One to one',
|
||||
'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.",
|
||||
'uploader': 'Thomas Hercouët',
|
||||
},
|
||||
}
|
||||
|
||||
|
@@ -18,11 +18,11 @@ class RBMARadioIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||
'ext': 'mp3',
|
||||
"uploader_id": "ford-lopatin",
|
||||
"location": "Spain",
|
||||
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||
"uploader": "Ford & Lopatin",
|
||||
"title": "Live at Primavera Sound 2011",
|
||||
'uploader_id': 'ford-lopatin',
|
||||
'location': 'Spain',
|
||||
'description': 'Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.',
|
||||
'uploader': 'Ford & Lopatin',
|
||||
'title': 'Live at Primavera Sound 2011',
|
||||
},
|
||||
}
|
||||
|
||||
|
@@ -12,12 +12,12 @@ class ReverbNationIE(InfoExtractor):
|
||||
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
||||
'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
|
||||
'info_dict': {
|
||||
"id": "16965047",
|
||||
"ext": "mp3",
|
||||
"title": "MONA LISA",
|
||||
"uploader": "ALKILADOS",
|
||||
"uploader_id": "216429",
|
||||
"thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
|
||||
'id': '16965047',
|
||||
'ext': 'mp3',
|
||||
'title': 'MONA LISA',
|
||||
'uploader': 'ALKILADOS',
|
||||
'uploader_id': '216429',
|
||||
'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$'
|
||||
},
|
||||
}]
|
||||
|
||||
|
@@ -8,13 +8,13 @@ from .common import InfoExtractor
|
||||
class RingTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
"url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
|
||||
"md5": "d25945f5df41cdca2d2587165ac28720",
|
||||
"info_dict": {
|
||||
'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30',
|
||||
'md5': 'd25945f5df41cdca2d2587165ac28720',
|
||||
'info_dict': {
|
||||
'id': '857645',
|
||||
'ext': 'mp4',
|
||||
"title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
|
||||
"description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
|
||||
'title': 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
|
||||
'description': 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,8 +32,8 @@ class RingTVIE(InfoExtractor):
|
||||
description = self._html_search_regex(
|
||||
r'addthis:description="([^"]+)"',
|
||||
webpage, 'description', fatal=False)
|
||||
final_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4" % video_id
|
||||
thumbnail_url = "http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg" % video_id
|
||||
final_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4' % video_id
|
||||
thumbnail_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg' % video_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
@@ -41,7 +43,7 @@ class RteIE(InfoExtractor):
|
||||
r'<meta name="thumbnail" content="uri:irus:(.*?)" />', webpage, 'thumbnail')
|
||||
thumbnail = 'http://img.rasset.ie/' + thumbnail_id + '.jpg'
|
||||
|
||||
feeds_url = self._html_search_meta("feeds-prefix", webpage, 'feeds url') + video_id
|
||||
feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id
|
||||
json_string = self._download_json(feeds_url, video_id)
|
||||
|
||||
# f4m_url = server + relative_url
|
||||
@@ -61,12 +63,15 @@ class RteIE(InfoExtractor):
|
||||
class RteRadioIE(InfoExtractor):
|
||||
IE_NAME = 'rte:radio'
|
||||
IE_DESC = 'Raidió Teilifís Éireann radio'
|
||||
# Radioplayer URLs have the specifier #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||
# Radioplayer URLs have two distinct specifier formats,
|
||||
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
|
||||
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
|
||||
# An <id> uniquely defines an individual recording, and is the only part we require.
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:[0-9]*)(?:%3A|:)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# Old-style player URL; HLS and RTMPE formats
|
||||
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
|
||||
'info_dict': {
|
||||
'id': '10507902',
|
||||
@@ -81,7 +86,23 @@ class RteRadioIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'f4m fails with --test atm'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# New-style player URL; RTMPE formats only
|
||||
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
|
||||
'info_dict': {
|
||||
'id': '3250678',
|
||||
'ext': 'flv',
|
||||
'title': 'The Lyric Concert with Paul Herriott',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'timestamp': 1333742400,
|
||||
'upload_date': '20120406',
|
||||
'duration': 7199.016,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'f4m fails with --test atm'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
@@ -102,8 +123,18 @@ class RteRadioIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
if mg.get('url') and not mg['url'].startswith('rtmpe:'):
|
||||
formats.append({'url': mg['url']})
|
||||
if mg.get('url'):
|
||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||
if m:
|
||||
m = m.groupdict()
|
||||
formats.append({
|
||||
'url': m['url'] + '/' + m['app'],
|
||||
'app': m['app'],
|
||||
'play_path': m['playpath'],
|
||||
'player_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
|
||||
if mg.get('hls_server') and mg.get('hls_url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
@@ -63,7 +63,7 @@ class RTL2IE(InfoExtractor):
|
||||
download_url = video_info['streamurl']
|
||||
download_url = download_url.replace('\\', '')
|
||||
stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL')
|
||||
rtmp_conn = ["S:connect", "O:1", "NS:pageUrl:" + url, "NB:fpad:0", "NN:videoFunction:1", "O:0"]
|
||||
rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
|
||||
|
||||
formats = [{
|
||||
'url': download_url,
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
remove_end,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
struct_unpack,
|
||||
@@ -178,14 +179,14 @@ class RTVEInfantilIE(InfoExtractor):
|
||||
class RTVELiveIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
_VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
|
||||
_VALID_URL = r'http://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/noticias/directo-la-1/',
|
||||
'url': 'http://www.rtve.es/directo/la-1/',
|
||||
'info_dict': {
|
||||
'id': 'directo-la-1',
|
||||
'ext': 'flv',
|
||||
'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||
'id': 'la-1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
@@ -198,23 +199,20 @@ class RTVELiveIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo')
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||
title = remove_start(title, 'Estoy viendo ')
|
||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
||||
|
||||
vidplayer_id = self._search_regex(
|
||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||
r'playerId=player([0-9]+)', webpage, 'internal video ID')
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
m3u8_url = _decrypt_url(png)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'app': 'rtve-live-live?ovpfv=2.1.2',
|
||||
'player_url': player_url,
|
||||
'rtmp_live': True,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
138
youtube_dl/extractor/screenjunkies.py
Normal file
138
youtube_dl/extractor/screenjunkies.py
Normal file
@@ -0,0 +1,138 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
)
|
||||
|
||||
|
||||
class ScreenJunkiesIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915',
|
||||
'md5': '5c2b686bec3d43de42bde9ec047536b0',
|
||||
'info_dict': {
|
||||
'id': '2841915',
|
||||
'display_id': 'best-quentin-tarantino-movie',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best Quentin Tarantino Movie',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 3671,
|
||||
'age_limit': 13,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.screenjunkies.com/video/honest-trailers-the-dark-knight',
|
||||
'info_dict': {
|
||||
'id': '2348808',
|
||||
'display_id': 'honest-trailers-the-dark-knight',
|
||||
'ext': 'mp4',
|
||||
'title': "Honest Trailers: 'The Dark Knight'",
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'age_limit': 10,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
# requires subscription but worked around
|
||||
'url': 'http://www.screenjunkies.com/video/knocking-dead-ep-1-the-show-so-far-3003285',
|
||||
'info_dict': {
|
||||
'id': '3003285',
|
||||
'display_id': 'knocking-dead-ep-1-the-show-so-far',
|
||||
'ext': 'mp4',
|
||||
'title': 'Knocking Dead Ep 1: State of The Dead Recap',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 3307,
|
||||
'age_limit': 13,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
|
||||
_DEFAULT_BITRATES = (48, 150, 496, 864, 2240)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
(r'src=["\']/embed/(\d+)', r'data-video-content-id=["\'](\d+)'),
|
||||
webpage, 'video id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.screenjunkies.com/embed/%s' % video_id,
|
||||
display_id, 'Downloading video embed page')
|
||||
embed_vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)embedVars\s*=\s*({.+?})\s*</script>', webpage, 'embed vars'),
|
||||
display_id)
|
||||
|
||||
title = embed_vars['contentName']
|
||||
|
||||
formats = []
|
||||
bitrates = []
|
||||
for f in embed_vars.get('media', []):
|
||||
if not f.get('uri') or f.get('mediaPurpose') != 'play':
|
||||
continue
|
||||
bitrate = int_or_none(f.get('bitRate'))
|
||||
if bitrate:
|
||||
bitrates.append(bitrate)
|
||||
formats.append({
|
||||
'url': f['uri'],
|
||||
'format_id': 'http-%d' % bitrate if bitrate else 'http',
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'tbr': bitrate,
|
||||
'format': 'mp4',
|
||||
})
|
||||
|
||||
if not bitrates:
|
||||
# When subscriptionLevel > 0, i.e. plus subscription is required
|
||||
# media list will be empty. However, hds and hls uris are still
|
||||
# available. We can grab them assuming bitrates to be default.
|
||||
bitrates = self._DEFAULT_BITRATES
|
||||
|
||||
auth_token = embed_vars.get('AuthToken')
|
||||
|
||||
def construct_manifest_url(base_url, ext):
|
||||
pieces = [base_url]
|
||||
pieces.extend([compat_str(b) for b in bitrates])
|
||||
pieces.append('_kbps.mp4.%s?%s' % (ext, auth_token))
|
||||
return ','.join(pieces)
|
||||
|
||||
if bitrates and auth_token:
|
||||
hds_url = embed_vars.get('hdsUri')
|
||||
if hds_url:
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
construct_manifest_url(hds_url, 'f4m'),
|
||||
display_id, f4m_id='hds', fatal=False)
|
||||
if len(f4m_formats) == len(bitrates):
|
||||
for f, bitrate in zip(f4m_formats, bitrates):
|
||||
if not f.get('tbr'):
|
||||
f['format_id'] = 'hds-%d' % bitrate
|
||||
f['tbr'] = bitrate
|
||||
# TODO: fix f4m downloader to handle manifests without bitrates if possible
|
||||
# formats.extend(f4m_formats)
|
||||
|
||||
hls_url = embed_vars.get('hlsUri')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
construct_manifest_url(hls_url, 'm3u8'),
|
||||
display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': embed_vars.get('thumbUri'),
|
||||
'duration': int_or_none(embed_vars.get('videoLengthInSeconds')) or None,
|
||||
'age_limit': parse_age_limit(embed_vars.get('audienceRating')),
|
||||
'tags': embed_vars.get('tags', '').split(','),
|
||||
'formats': formats,
|
||||
}
|
@@ -40,7 +40,7 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
re.sub(
|
||||
r'(?s)/\*.*?\*/', '',
|
||||
self._search_regex(
|
||||
r"sources\s*:\s*(\[[^\]]+?\])", playerconfig,
|
||||
r'sources\s*:\s*(\[[^\]]+?\])', playerconfig,
|
||||
'sources',
|
||||
).replace(
|
||||
"' + thisObj.options.videoserver + '",
|
||||
@@ -70,25 +70,27 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source['type'] == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
if source.get('type') == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
|
||||
else:
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
format_label = source.get('label')
|
||||
format_id = self._search_regex(
|
||||
r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
|
||||
if not self._is_valid_url(file_, video_id, format_id or 'video'):
|
||||
continue
|
||||
format_label = source.get('label')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]', format_label, 'height', default=None))
|
||||
formats.append({
|
||||
'url': source['file'],
|
||||
'url': file_,
|
||||
'format_id': format_id,
|
||||
'format': format_label,
|
||||
'ext': source.get('type'),
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -15,37 +15,37 @@ from ..compat import (
|
||||
|
||||
class SenateISVPIE(InfoExtractor):
|
||||
_COMM_MAP = [
|
||||
["ag", "76440", "http://ag-f.akamaihd.net"],
|
||||
["aging", "76442", "http://aging-f.akamaihd.net"],
|
||||
["approps", "76441", "http://approps-f.akamaihd.net"],
|
||||
["armed", "76445", "http://armed-f.akamaihd.net"],
|
||||
["banking", "76446", "http://banking-f.akamaihd.net"],
|
||||
["budget", "76447", "http://budget-f.akamaihd.net"],
|
||||
["cecc", "76486", "http://srs-f.akamaihd.net"],
|
||||
["commerce", "80177", "http://commerce1-f.akamaihd.net"],
|
||||
["csce", "75229", "http://srs-f.akamaihd.net"],
|
||||
["dpc", "76590", "http://dpc-f.akamaihd.net"],
|
||||
["energy", "76448", "http://energy-f.akamaihd.net"],
|
||||
["epw", "76478", "http://epw-f.akamaihd.net"],
|
||||
["ethics", "76449", "http://ethics-f.akamaihd.net"],
|
||||
["finance", "76450", "http://finance-f.akamaihd.net"],
|
||||
["foreign", "76451", "http://foreign-f.akamaihd.net"],
|
||||
["govtaff", "76453", "http://govtaff-f.akamaihd.net"],
|
||||
["help", "76452", "http://help-f.akamaihd.net"],
|
||||
["indian", "76455", "http://indian-f.akamaihd.net"],
|
||||
["intel", "76456", "http://intel-f.akamaihd.net"],
|
||||
["intlnarc", "76457", "http://intlnarc-f.akamaihd.net"],
|
||||
["jccic", "85180", "http://jccic-f.akamaihd.net"],
|
||||
["jec", "76458", "http://jec-f.akamaihd.net"],
|
||||
["judiciary", "76459", "http://judiciary-f.akamaihd.net"],
|
||||
["rpc", "76591", "http://rpc-f.akamaihd.net"],
|
||||
["rules", "76460", "http://rules-f.akamaihd.net"],
|
||||
["saa", "76489", "http://srs-f.akamaihd.net"],
|
||||
["smbiz", "76461", "http://smbiz-f.akamaihd.net"],
|
||||
["srs", "75229", "http://srs-f.akamaihd.net"],
|
||||
["uscc", "76487", "http://srs-f.akamaihd.net"],
|
||||
["vetaff", "76462", "http://vetaff-f.akamaihd.net"],
|
||||
["arch", "", "http://ussenate-f.akamaihd.net/"]
|
||||
['ag', '76440', 'http://ag-f.akamaihd.net'],
|
||||
['aging', '76442', 'http://aging-f.akamaihd.net'],
|
||||
['approps', '76441', 'http://approps-f.akamaihd.net'],
|
||||
['armed', '76445', 'http://armed-f.akamaihd.net'],
|
||||
['banking', '76446', 'http://banking-f.akamaihd.net'],
|
||||
['budget', '76447', 'http://budget-f.akamaihd.net'],
|
||||
['cecc', '76486', 'http://srs-f.akamaihd.net'],
|
||||
['commerce', '80177', 'http://commerce1-f.akamaihd.net'],
|
||||
['csce', '75229', 'http://srs-f.akamaihd.net'],
|
||||
['dpc', '76590', 'http://dpc-f.akamaihd.net'],
|
||||
['energy', '76448', 'http://energy-f.akamaihd.net'],
|
||||
['epw', '76478', 'http://epw-f.akamaihd.net'],
|
||||
['ethics', '76449', 'http://ethics-f.akamaihd.net'],
|
||||
['finance', '76450', 'http://finance-f.akamaihd.net'],
|
||||
['foreign', '76451', 'http://foreign-f.akamaihd.net'],
|
||||
['govtaff', '76453', 'http://govtaff-f.akamaihd.net'],
|
||||
['help', '76452', 'http://help-f.akamaihd.net'],
|
||||
['indian', '76455', 'http://indian-f.akamaihd.net'],
|
||||
['intel', '76456', 'http://intel-f.akamaihd.net'],
|
||||
['intlnarc', '76457', 'http://intlnarc-f.akamaihd.net'],
|
||||
['jccic', '85180', 'http://jccic-f.akamaihd.net'],
|
||||
['jec', '76458', 'http://jec-f.akamaihd.net'],
|
||||
['judiciary', '76459', 'http://judiciary-f.akamaihd.net'],
|
||||
['rpc', '76591', 'http://rpc-f.akamaihd.net'],
|
||||
['rules', '76460', 'http://rules-f.akamaihd.net'],
|
||||
['saa', '76489', 'http://srs-f.akamaihd.net'],
|
||||
['smbiz', '76461', 'http://smbiz-f.akamaihd.net'],
|
||||
['srs', '75229', 'http://srs-f.akamaihd.net'],
|
||||
['uscc', '76487', 'http://srs-f.akamaihd.net'],
|
||||
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
|
||||
['arch', '', 'http://ussenate-f.akamaihd.net/']
|
||||
]
|
||||
_IE_NAME = 'senate.gov'
|
||||
_VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)'
|
||||
|
@@ -13,8 +13,8 @@ class SlutloadIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'TD73btpBqSxc',
|
||||
'ext': 'mp4',
|
||||
"title": "virginie baisee en cam",
|
||||
"age_limit": 18,
|
||||
'title': 'virginie baisee en cam',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:https?://.*?\.jpg'
|
||||
}
|
||||
}
|
||||
|
@@ -170,7 +170,7 @@ class SmotriIE(InfoExtractor):
|
||||
'getvideoinfo': '1',
|
||||
}
|
||||
|
||||
video_password = self._downloader.params.get('videopassword', None)
|
||||
video_password = self._downloader.params.get('videopassword')
|
||||
if video_password:
|
||||
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
|
||||
@@ -356,7 +356,7 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
|
||||
url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
|
||||
|
||||
broadcast_password = self._downloader.params.get('videopassword', None)
|
||||
broadcast_password = self._downloader.params.get('videopassword')
|
||||
if broadcast_password:
|
||||
url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
|
||||
|
||||
|
@@ -43,7 +43,7 @@ class SnotrIE(InfoExtractor):
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
|
||||
video_url = 'http://cdn.videos.snotr.com/%s.flv' % video_id
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
|
||||
|
@@ -222,7 +222,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
full_title = track_id
|
||||
token = mobj.group('secret_token')
|
||||
if token:
|
||||
info_json_url += "&secret_token=" + token
|
||||
info_json_url += '&secret_token=' + token
|
||||
elif mobj.group('player'):
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
real_url = query['url'][0]
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user