Compare commits
228 Commits
2016.06.23
...
2016.07.13
Author | SHA1 | Date | |
---|---|---|---|
c485959034 | |||
a0560d8ab8 | |||
0385aa6199 | |||
00f4764cb7 | |||
51c2cd0b83 | |||
5f5a9d6158 | |||
2d19fb5072 | |||
9d865a1af6 | |||
41aa44259d | |||
381ff44756 | |||
7f29cf545a | |||
7d1219f3e0 | |||
f1b4af7d79 | |||
8a8590a617 | |||
4a7a5e41f7 | |||
2a49d01600 | |||
b99af8a51c | |||
8e7020daef | |||
a26bcc61c1 | |||
5c4dcf8172 | |||
e9fb6a4bbe | |||
e2dbcaa1bf | |||
ae01850165 | |||
c3baaedfc8 | |||
0b68de3cc1 | |||
39e9d524e5 | |||
865b087224 | |||
3121b25639 | |||
0286b85c79 | |||
ab52bb5137 | |||
61a98b8623 | |||
6daf34a045 | |||
c03adf90bd | |||
0ece114b7b | |||
5b6a74856b | |||
ce43100a01 | |||
8cc9b4016d | |||
31eeab9f41 | |||
9558dcec9c | |||
6e6b70d65f | |||
d417fd88d0 | |||
9e4f5dc1e9 | |||
1251565ee0 | |||
1f7258a367 | |||
0af985069b | |||
0de168f7ed | |||
95b31e266b | |||
6b3a3098b5 | |||
2de624fdd5 | |||
3fee7f636c | |||
89e2fff2b7 | |||
cedc70b292 | |||
07d7689f2e | |||
ae8cb5328d | |||
2e32ac0b9a | |||
672f01c370 | |||
e2d616dd30 | |||
0ab7f4fe2b | |||
29c4a07776 | |||
826e911e41 | |||
30d22dae8e | |||
ec3518725b | |||
5f87d845eb | |||
571808a7aa | |||
dfe5fa49ae | |||
01a0c511eb | |||
b3d30315ce | |||
882af14d7d | |||
47335a0efa | |||
34bc2d9dfd | |||
08c7af4afa | |||
f7291a0b7c | |||
c65aa4e9e1 | |||
ad213a1d74 | |||
43f1e4e41e | |||
54b0e909d5 | |||
f8752b86ac | |||
84c237fb8a | |||
ab49d7a9fa | |||
b4173f1551 | |||
2817b99cf2 | |||
001fffd004 | |||
0e94b4713d | |||
a6d3b89feb | |||
6c26815d63 | |||
73c4ac2c95 | |||
84f214d840 | |||
e3f88be7a9 | |||
31af3e35e0 | |||
94a5cff91d | |||
77082c7b9e | |||
252a1f75d2 | |||
5abf513cf8 | |||
c6054e3201 | |||
4080530624 | |||
c25f1a9b63 | |||
dfaa86b75e | |||
d9163ae3b6 | |||
dafafe7cf1 | |||
81953d1ae5 | |||
3a212ed62e | |||
195f084542 | |||
aa7a455b2e | |||
6a4e659c93 | |||
40f3666f6b | |||
dd801bbe18 | |||
38cce791c7 | |||
bf3ae6a543 | |||
bff98341d5 | |||
2644e911be | |||
a5f67895d3 | |||
15e4b6b758 | |||
2b28b892d8 | |||
7507fc98cb | |||
477b7a8474 | |||
034a884957 | |||
64436cb1a4 | |||
f138873900 | |||
e793338c88 | |||
369bb06206 | |||
2cb31d288e | |||
c723d1cd8d | |||
1f55234057 | |||
04006fae8d | |||
4cb13d0d6a | |||
a1f6f5c768 | |||
05c7feec77 | |||
bf83024826 | |||
a0cfd82dda | |||
1b734adb2d | |||
9b724d7277 | |||
c3a5dd3b5d | |||
e3755a624b | |||
95cf60e826 | |||
6b03e1e25d | |||
712b0b5b70 | |||
6a424391d9 | |||
dbf0157a26 | |||
7deef1ba67 | |||
fd6ca38262 | |||
bdafd88da0 | |||
7a1e71575e | |||
ac2d8f54d1 | |||
14ff6baa0e | |||
bb08101ec4 | |||
bc4b2d75ba | |||
35fc3021ba | |||
347227237b | |||
564dc3c6e8 | |||
9f4576a7eb | |||
f11315e8d4 | |||
0c2ac64bb8 | |||
a9eede3913 | |||
9e29ef13a3 | |||
eaaaaec042 | |||
3cb3b60064 | |||
044e3d91b5 | |||
c9e538a3b1 | |||
76dad392f5 | |||
9617b557aa | |||
bf4fa24414 | |||
20361b4f25 | |||
05a0068a76 | |||
66a42309fa | |||
fd94e2671a | |||
8ff6697861 | |||
eafa643715 | |||
049da7cb6c | |||
7dbeee7e22 | |||
93ad6c6bfa | |||
329179073b | |||
4d86d2008e | |||
ab47b6e881 | |||
df43389ade | |||
397b305cfe | |||
e496fa50cd | |||
06a96da15b | |||
70157c2c43 | |||
c58ed8563d | |||
4c7821227c | |||
42362fdb5e | |||
97124e572d | |||
32616c14cc | |||
8174d0fe95 | |||
8704778d95 | |||
c287f2bc60 | |||
9ea5c04c0d | |||
fd7a7498a4 | |||
e3a6747d8f | |||
f41ffc00d1 | |||
81fda15369 | |||
427cd050a3 | |||
b0c200f1ec | |||
92747e664a | |||
f1f336322d | |||
bf8dd79045 | |||
c6781156aa | |||
59bbe4911a | |||
4f3c5e0627 | |||
f484c5fa25 | |||
88d9f6c0c4 | |||
3c9c088f9c | |||
fc3996bfe1 | |||
5b6ad8630c | |||
30105f4ac0 | |||
1143535d76 | |||
7d52c052ef | |||
a2406fce3c | |||
3b34ab538c | |||
ac782306f1 | |||
0c00e889f3 | |||
ce96ed05f4 | |||
0463b77a1f | |||
2d185706ea | |||
b72b44318c | |||
46f59e89ea | |||
b4241e308e | |||
3d4b08dfc7 | |||
be49068d65 | |||
525cedb971 | |||
de3c7fe0d4 | |||
896cc72750 | |||
c1ff6e1ad0 | |||
fee70322d7 | |||
8065d6c55f | |||
494172d2e5 | |||
6e3c2047f8 | |||
ccff2c404d |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23.1**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.13**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.06.23.1
|
||||
[debug] youtube-dl version 2016.07.13
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
22
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
22
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
## Please follow the guide below
|
||||
|
||||
- You will be asked some questions, please read them **carefully** and answer honestly
|
||||
- Put an `x` into all the boxes [ ] relevant to your *pull request* (like that [x])
|
||||
- Use *Preview* tab to see how your *pull request* will actually look like
|
||||
|
||||
---
|
||||
|
||||
### Before submitting a *pull request* make sure you have:
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
|
||||
### What is the purpose of your *pull request*?
|
||||
- [ ] Bug fix
|
||||
- [ ] New extractor
|
||||
- [ ] New feature
|
||||
|
||||
---
|
||||
|
||||
### Description of your *pull request* and other information
|
||||
|
||||
Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible.
|
@ -7,9 +7,6 @@ python:
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
sudo: false
|
||||
install:
|
||||
- bash ./devscripts/install_srelay.sh
|
||||
- export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
3
AUTHORS
3
AUTHORS
@ -175,3 +175,6 @@ Tomáš Čech
|
||||
Déstin Reed
|
||||
Roman Tsiupa
|
||||
Artur Krysiak
|
||||
Jakub Adam Wieczorek
|
||||
Aleksandar Topuzović
|
||||
Nehal Patel
|
||||
|
152
CONTRIBUTING.md
152
CONTRIBUTING.md
@ -97,9 +97,17 @@ If you want to add support for a new site, first of all **make sure** this site
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||
2. Check out the source code with:
|
||||
|
||||
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
|
||||
|
||||
3. Start a new git branch with
|
||||
|
||||
cd youtube-dl
|
||||
git checkout -b yourextractor
|
||||
|
||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||
|
||||
```python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
@ -143,16 +151,148 @@ After you have ensured this site is distributing it's content legally, you can f
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
## youtube-dl coding conventions
|
||||
|
||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
|
||||
|
||||
```python
|
||||
meta = self._download_json(url, video_id)
|
||||
```
|
||||
|
||||
Assume at this point `meta`'s layout is:
|
||||
|
||||
```python
|
||||
{
|
||||
...
|
||||
"summary": "some fancy summary text",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
|
||||
```python
|
||||
description = meta.get('summary') # correct
|
||||
```
|
||||
|
||||
and not like:
|
||||
|
||||
```python
|
||||
description = meta['summary'] # incorrect
|
||||
```
|
||||
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||
|
||||
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||
|
||||
```python
|
||||
description = self._search_regex(
|
||||
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||
webpage, 'description', fatal=False)
|
||||
```
|
||||
|
||||
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
|
||||
|
||||
You can also pass `default=<some fallback value>`, for example:
|
||||
|
||||
```python
|
||||
description = self._search_regex(
|
||||
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||
|
||||
### Provide fallbacks
|
||||
|
||||
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||
|
||||
#### Example
|
||||
|
||||
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||
|
||||
```python
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
```python
|
||||
title = meta.get('title') or self._og_search_title(webpage)
|
||||
```
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
|
||||
#### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
```html
|
||||
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
|
||||
```
|
||||
|
||||
The code for that task should look similar to:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
|
||||
```
|
||||
|
||||
Or even better:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
|
||||
The code definitely should not look like:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
|
161
README.md
161
README.md
@ -103,9 +103,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
(experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
(experimental)
|
||||
--cn-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some Chinese sites. The default proxy
|
||||
specified by --proxy (or none, if the
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
options is not present) is used for the
|
||||
actual downloading. (experimental)
|
||||
|
||||
@ -424,7 +424,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
@ -432,6 +432,7 @@ For example, with the following configuration file youtube-dl will always extrac
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
# Lines starting with # are comments
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
@ -890,9 +891,17 @@ If you want to add support for a new site, first of all **make sure** this site
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||
2. Check out the source code with:
|
||||
|
||||
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
|
||||
|
||||
3. Start a new git branch with
|
||||
|
||||
cd youtube-dl
|
||||
git checkout -b yourextractor
|
||||
|
||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||
|
||||
```python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
@ -936,19 +945,151 @@ After you have ensured this site is distributing it's content legally, you can f
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
|
||||
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/extractors.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
||||
## youtube-dl coding conventions
|
||||
|
||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
|
||||
|
||||
```python
|
||||
meta = self._download_json(url, video_id)
|
||||
```
|
||||
|
||||
Assume at this point `meta`'s layout is:
|
||||
|
||||
```python
|
||||
{
|
||||
...
|
||||
"summary": "some fancy summary text",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
|
||||
```python
|
||||
description = meta.get('summary') # correct
|
||||
```
|
||||
|
||||
and not like:
|
||||
|
||||
```python
|
||||
description = meta['summary'] # incorrect
|
||||
```
|
||||
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||
|
||||
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||
|
||||
```python
|
||||
description = self._search_regex(
|
||||
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||
webpage, 'description', fatal=False)
|
||||
```
|
||||
|
||||
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
|
||||
|
||||
You can also pass `default=<some fallback value>`, for example:
|
||||
|
||||
```python
|
||||
description = self._search_regex(
|
||||
r'<span[^>]+id="title"[^>]*>([^<]+)<',
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||
|
||||
### Provide fallbacks
|
||||
|
||||
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||
|
||||
#### Example
|
||||
|
||||
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||
|
||||
```python
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
```python
|
||||
title = meta.get('title') or self._og_search_title(webpage)
|
||||
```
|
||||
|
||||
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
|
||||
|
||||
### Make regular expressions flexible
|
||||
|
||||
When using regular expressions try to write them fuzzy and flexible.
|
||||
|
||||
#### Example
|
||||
|
||||
Say you need to extract `title` from the following HTML code:
|
||||
|
||||
```html
|
||||
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
|
||||
```
|
||||
|
||||
The code for that task should look similar to:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
|
||||
```
|
||||
|
||||
Or even better:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
|
||||
The code definitely should not look like:
|
||||
|
||||
```python
|
||||
title = self._search_regex(
|
||||
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
# EMBEDDING YOUTUBE-DL
|
||||
|
||||
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).
|
||||
|
@ -15,13 +15,9 @@ data = urllib.request.urlopen(URL).read()
|
||||
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
|
||||
template = tmplf.read()
|
||||
|
||||
md5sum = hashlib.md5(data).hexdigest()
|
||||
sha1sum = hashlib.sha1(data).hexdigest()
|
||||
sha256sum = hashlib.sha256(data).hexdigest()
|
||||
template = template.replace('@PROGRAM_VERSION@', version)
|
||||
template = template.replace('@PROGRAM_URL@', URL)
|
||||
template = template.replace('@PROGRAM_MD5SUM@', md5sum)
|
||||
template = template.replace('@PROGRAM_SHA1SUM@', sha1sum)
|
||||
template = template.replace('@PROGRAM_SHA256SUM@', sha256sum)
|
||||
template = template.replace('@EXE_URL@', versions_info['versions'][version]['exe'][0])
|
||||
template = template.replace('@EXE_SHA256SUM@', versions_info['versions'][version]['exe'][1])
|
||||
|
@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
mkdir -p tmp && cd tmp
|
||||
wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz
|
||||
tar zxvf srelay-0.4.8b6.tar.gz
|
||||
cd srelay-0.4.8b6
|
||||
./configure
|
||||
make
|
41
devscripts/show-downloads-statistics.py
Normal file
41
devscripts/show-downloads-statistics.py
Normal file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_print,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from youtube_dl.utils import format_bytes
|
||||
|
||||
|
||||
def format_size(bytes):
|
||||
return '%s (%d bytes)' % (format_bytes(bytes), bytes)
|
||||
|
||||
|
||||
total_bytes = 0
|
||||
|
||||
releases = json.loads(compat_urllib_request.urlopen(
|
||||
'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
|
||||
|
||||
for release in releases:
|
||||
compat_print(release['name'])
|
||||
for asset in release['assets']:
|
||||
asset_name = asset['name']
|
||||
total_bytes += asset['download_count'] * asset['size']
|
||||
if all(not re.match(p, asset_name) for p in (
|
||||
r'^youtube-dl$',
|
||||
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
|
||||
r'^youtube-dl\.exe$')):
|
||||
continue
|
||||
compat_print(
|
||||
' %s size: %s downloads: %d'
|
||||
% (asset_name, format_size(asset['size']), asset['download_count']))
|
||||
|
||||
compat_print('total downloads traffic: %s' % format_size(total_bytes))
|
@ -45,7 +45,6 @@
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
@ -153,6 +152,8 @@
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **CultureUnplugged**
|
||||
- **CWTV**
|
||||
@ -223,6 +224,7 @@
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Flipagram**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
@ -241,6 +243,7 @@
|
||||
- **FreeVideo**
|
||||
- **Funimation**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
- **GameInformer**
|
||||
- **Gamekings**
|
||||
- **GameOne**
|
||||
@ -273,6 +276,7 @@
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HistoricFilms**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HornBunny**
|
||||
@ -280,6 +284,8 @@
|
||||
- **HotStar**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
- **HRTi**
|
||||
- **HRTiPlaylist**
|
||||
- **HuffPost**: Huffington Post
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
@ -307,6 +313,7 @@
|
||||
- **jpopsuki.tv**
|
||||
- **JWPlatform**
|
||||
- **Kaltura**
|
||||
- **Kamcord**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
@ -326,7 +333,7 @@
|
||||
- **kuwo:mv**: 酷我音乐 - MV
|
||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.tv**
|
||||
- **la7.it**
|
||||
- **Laola1Tv**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
@ -359,6 +366,7 @@
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **META**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
@ -385,6 +393,7 @@
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtviggy.com**
|
||||
@ -438,6 +447,7 @@
|
||||
- **nick.de**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NineCNineMedia**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **Noco**
|
||||
@ -468,6 +478,8 @@
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **onet.tv**
|
||||
- **onet.tv:channel**
|
||||
- **OnionStudios**
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
@ -501,8 +513,9 @@
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
- **PolskieRadio**
|
||||
- **PornHd**
|
||||
- **PornHub**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPlaylist**
|
||||
- **PornHubUserVideos**
|
||||
- **Pornotube**
|
||||
@ -541,6 +554,7 @@
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
- **RottenTomatoes**
|
||||
- **Roxwel**
|
||||
- **RTBF**
|
||||
@ -554,6 +568,7 @@
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RTVNH**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **RulePorn**
|
||||
- **rutube**: Rutube videos
|
||||
@ -586,8 +601,10 @@
|
||||
- **Shared**: shared.sx and vivo.sx
|
||||
- **ShareSix**
|
||||
- **Sina**
|
||||
- **SixPlay**
|
||||
- **skynewsarabia:article**
|
||||
- **skynewsarabia:video**
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
- **Slideshare**
|
||||
- **Slutload**
|
||||
- **smotri**: Smotri.com
|
||||
@ -619,6 +636,7 @@
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **SSA**
|
||||
@ -719,6 +737,7 @@
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USAToday**
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
@ -736,6 +755,7 @@
|
||||
- **vh1.com**
|
||||
- **Vice**
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
@ -775,6 +795,7 @@
|
||||
- **vine:user**
|
||||
- **vk**: VK
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
@ -843,6 +864,7 @@
|
||||
- **youtube:search**: YouTube.com searches
|
||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||
- **youtube:search_url**: YouTube.com search URLs
|
||||
- **youtube:shared**
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
|
63
setup.py
63
setup.py
@ -21,25 +21,37 @@ try:
|
||||
import py2exe
|
||||
except ImportError:
|
||||
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||
print("Cannot import py2exe", file=sys.stderr)
|
||||
print('Cannot import py2exe', file=sys.stderr)
|
||||
exit(1)
|
||||
|
||||
py2exe_options = {
|
||||
"bundle_files": 1,
|
||||
"compressed": 1,
|
||||
"optimize": 2,
|
||||
"dist_dir": '.',
|
||||
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
|
||||
'bundle_files': 1,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': '.',
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
}
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
|
||||
DESCRIPTION = 'YouTube video downloader'
|
||||
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
|
||||
|
||||
py2exe_console = [{
|
||||
"script": "./youtube_dl/__main__.py",
|
||||
"dest_base": "youtube-dl",
|
||||
'script': './youtube_dl/__main__.py',
|
||||
'dest_base': 'youtube-dl',
|
||||
'version': __version__,
|
||||
'description': DESCRIPTION,
|
||||
'comments': LONG_DESCRIPTION,
|
||||
'product_name': 'youtube-dl',
|
||||
'product_version': __version__,
|
||||
}]
|
||||
|
||||
py2exe_params = {
|
||||
'console': py2exe_console,
|
||||
'options': {"py2exe": py2exe_options},
|
||||
'options': {'py2exe': py2exe_options},
|
||||
'zipfile': None
|
||||
}
|
||||
|
||||
@ -72,7 +84,7 @@ else:
|
||||
params['scripts'] = ['bin/youtube-dl']
|
||||
|
||||
class build_lazy_extractors(Command):
|
||||
description = "Build the extractor lazy loading module"
|
||||
description = 'Build the extractor lazy loading module'
|
||||
user_options = []
|
||||
|
||||
def initialize_options(self):
|
||||
@ -87,16 +99,11 @@ class build_lazy_extractors(Command):
|
||||
dry_run=self.dry_run,
|
||||
)
|
||||
|
||||
# Get the version from youtube_dl/version.py without importing the package
|
||||
exec(compile(open('youtube_dl/version.py').read(),
|
||||
'youtube_dl/version.py', 'exec'))
|
||||
|
||||
setup(
|
||||
name='youtube_dl',
|
||||
version=__version__,
|
||||
description='YouTube video downloader',
|
||||
long_description='Small command-line program to download videos from'
|
||||
' YouTube.com and other video sites.',
|
||||
description=DESCRIPTION,
|
||||
long_description=LONG_DESCRIPTION,
|
||||
url='https://github.com/rg3/youtube-dl',
|
||||
author='Ricardo Garcia',
|
||||
author_email='ytdl@yt-dl.org',
|
||||
@ -112,17 +119,17 @@ setup(
|
||||
# test_requires = ['nosetest'],
|
||||
|
||||
classifiers=[
|
||||
"Topic :: Multimedia :: Video",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Console",
|
||||
"License :: Public Domain",
|
||||
"Programming Language :: Python :: 2.6",
|
||||
"Programming Language :: Python :: 2.7",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.2",
|
||||
"Programming Language :: Python :: 3.3",
|
||||
"Programming Language :: Python :: 3.4",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
'Topic :: Multimedia :: Video',
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'License :: Public Domain',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.2',
|
||||
'Programming Language :: Python :: 3.3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
|
@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
@ -66,6 +66,11 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(ie._html_search_meta('d', html), '4')
|
||||
self.assertEqual(ie._html_search_meta('e', html), '5')
|
||||
self.assertEqual(ie._html_search_meta('f', html), '6')
|
||||
self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
|
||||
self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
|
||||
self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||
|
||||
def test_download_json(self):
|
||||
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
|
||||
|
@ -6,6 +6,7 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
import collections
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
@ -130,6 +131,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
||||
['Yahoo'])
|
||||
|
||||
def test_no_duplicated_ie_names(self):
|
||||
name_accu = collections.defaultdict(list)
|
||||
for ie in self.ies:
|
||||
name_accu[ie.IE_NAME.lower()].append(type(ie).__name__)
|
||||
for (ie_name, ie_list) in name_accu.items():
|
||||
self.assertEqual(
|
||||
len(ie_list), 1,
|
||||
'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -87,6 +87,8 @@ class TestCompat(unittest.TestCase):
|
||||
|
||||
def test_compat_shlex_split(self):
|
||||
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
|
||||
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
|
||||
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
|
||||
|
||||
def test_compat_etree_fromstring(self):
|
||||
xml = '''
|
||||
|
@ -138,27 +138,27 @@ class TestProxy(unittest.TestCase):
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.cn_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('cn'))
|
||||
self.cn_port = http_server_port(self.cn_proxy)
|
||||
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||
self.cn_proxy_thread.daemon = True
|
||||
self.cn_proxy_thread.start()
|
||||
self.geo_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('geo'))
|
||||
self.geo_port = http_server_port(self.geo_proxy)
|
||||
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
|
||||
self.geo_proxy_thread.daemon = True
|
||||
self.geo_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
||||
geo_proxy = 'localhost:{0}'.format(self.geo_port)
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'cn_verification_proxy': cn_proxy,
|
||||
'geo_verification_proxy': geo_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
response = ydl.urlopen(url).read().decode('utf-8')
|
||||
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Ytdl-request-proxy', cn_proxy)
|
||||
req.add_header('Ytdl-request-proxy', geo_proxy)
|
||||
response = ydl.urlopen(req).read().decode('utf-8')
|
||||
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||
self.assertEqual(response, 'geo: {0}'.format(url))
|
||||
|
||||
def test_proxy_with_idn(self):
|
||||
ydl = YoutubeDL({
|
||||
|
@ -33,6 +33,7 @@ from youtube_dl.utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
get_element_by_class,
|
||||
InAdvancePagedList,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
@ -60,11 +61,13 @@ from youtube_dl.utils import (
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
urshift,
|
||||
update_url_query,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
@ -78,6 +81,7 @@ from youtube_dl.utils import (
|
||||
cli_option,
|
||||
cli_valueless_option,
|
||||
cli_bool_option,
|
||||
parse_codecs,
|
||||
)
|
||||
from youtube_dl.compat import (
|
||||
compat_chr,
|
||||
@ -283,8 +287,28 @@ class TestUtil(unittest.TestCase):
|
||||
'20150202')
|
||||
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
def test_unified_timestamps(self):
|
||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||
self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
|
||||
self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
|
||||
self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
|
||||
self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
|
||||
self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
|
||||
self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
|
||||
self.assertEqual(
|
||||
unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
|
||||
1417001400)
|
||||
self.assertEqual(
|
||||
unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
|
||||
1422902860)
|
||||
self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
|
||||
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
|
||||
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
|
||||
@ -383,6 +407,12 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(res_url, url)
|
||||
self.assertEqual(res_data, None)
|
||||
|
||||
smug_url = smuggle_url(url, {'a': 'b'})
|
||||
smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
|
||||
res_url, res_data = unsmuggle_url(smug_smug_url)
|
||||
self.assertEqual(res_url, url)
|
||||
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
|
||||
|
||||
def test_shell_quote(self):
|
||||
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
|
||||
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
|
||||
@ -579,6 +609,29 @@ class TestUtil(unittest.TestCase):
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_parse_codecs(self):
|
||||
self.assertEqual(parse_codecs(''), {})
|
||||
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||
'vcodec': 'avc1.77.30',
|
||||
'acodec': 'mp4a.40.2',
|
||||
})
|
||||
self.assertEqual(parse_codecs('mp4a.40.2'), {
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp4a.40.2',
|
||||
})
|
||||
self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), {
|
||||
'vcodec': 'avc1.42001e',
|
||||
'acodec': 'mp4a.40.5',
|
||||
})
|
||||
self.assertEqual(parse_codecs('avc3.640028'), {
|
||||
'vcodec': 'avc3.640028',
|
||||
'acodec': 'none',
|
||||
})
|
||||
self.assertEqual(parse_codecs(', h264,,newcodec,aac'), {
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||
@ -959,5 +1012,17 @@ The first line
|
||||
self.assertRaises(ValueError, encode_base_n, 0, 70)
|
||||
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
|
||||
|
||||
def test_urshift(self):
|
||||
self.assertEqual(urshift(3, 1), 1)
|
||||
self.assertEqual(urshift(-3, 1), 2147483646)
|
||||
|
||||
def test_get_element_by_class(self):
|
||||
html = '''
|
||||
<span class="foo bar">nice</span>
|
||||
'''
|
||||
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -196,8 +196,8 @@ class YoutubeDL(object):
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
proxy: URL of the proxy server to use
|
||||
cn_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on Chinese sites. (Experimental)
|
||||
geo_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on geo-restricted sites. (Experimental)
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
support, using fridibi
|
||||
@ -304,6 +304,11 @@ class YoutubeDL(object):
|
||||
self.params.update(params)
|
||||
self.cache = Cache(self)
|
||||
|
||||
if self.params.get('cn_verification_proxy') is not None:
|
||||
self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
|
||||
if self.params.get('geo_verification_proxy') is None:
|
||||
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
|
||||
|
||||
if params.get('bidi_workaround', False):
|
||||
try:
|
||||
import pty
|
||||
|
@ -382,6 +382,8 @@ def _real_main(argv=None):
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'postprocessor_args': postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
@ -2594,15 +2595,19 @@ except ImportError: # Python < 3.3
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
if sys.version_info >= (2, 7, 3):
|
||||
try:
|
||||
args = shlex.split('中文')
|
||||
assert (isinstance(args, list) and
|
||||
isinstance(args[0], compat_str) and
|
||||
args[0] == '中文')
|
||||
compat_shlex_split = shlex.split
|
||||
else:
|
||||
except (AssertionError, UnicodeEncodeError):
|
||||
# Working around shlex issue with unicode strings on some python 2
|
||||
# versions (see http://bugs.python.org/issue1548891)
|
||||
def compat_shlex_split(s, comments=False, posix=True):
|
||||
if isinstance(s, compat_str):
|
||||
s = s.encode('utf-8')
|
||||
return shlex.split(s, comments, posix)
|
||||
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
|
@ -196,6 +196,11 @@ def build_fragments_list(boot_info):
|
||||
first_frag_number = fragment_run_entry_table[0]['first']
|
||||
fragments_counter = itertools.count(first_frag_number)
|
||||
for segment, fragments_count in segment_run_table['segment_run']:
|
||||
# In some live HDS streams (for example Rai), `fragments_count` is
|
||||
# abnormal and causing out-of-memory errors. It's OK to change the
|
||||
# number of fragments for live streams as they are updated periodically
|
||||
if fragments_count == 4294967295 and boot_info['live']:
|
||||
fragments_count = 2
|
||||
for _ in range(fragments_count):
|
||||
res.append((segment, next(fragments_counter)))
|
||||
|
||||
@ -329,7 +334,11 @@ class F4mFD(FragmentFD):
|
||||
|
||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
|
||||
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
|
||||
# From Adobe F4M 3.0 spec:
|
||||
# The <baseURL> element SHALL be the base URL for all relative
|
||||
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
|
||||
# URLs should be relative to the location of the containing document.
|
||||
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
|
||||
live = boot_info['live']
|
||||
metadata_node = media.find(_add_ns('metadata'))
|
||||
if metadata_node is not None:
|
||||
|
@ -2,23 +2,137 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
unescapeHTML,
|
||||
extract_attributes,
|
||||
get_element_by_attribute,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class AENetworksIE(InfoExtractor):
|
||||
class AENetworksBaseIE(ThePlatformIE):
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
||||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
|
||||
'info_dict': {
|
||||
'id': '71889446852',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
|
||||
'info_dict': {
|
||||
'id': 'SERIES4317',
|
||||
'title': 'Atlanta Plastic',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
'aetv.com': 'AETV',
|
||||
'mylifetime.com': 'LIFETIME',
|
||||
'fyi.tv': 'FYI',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
if url_parts_len == 1:
|
||||
entries = []
|
||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
elif url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
episode_url = compat_urlparse.urljoin(
|
||||
url, episode_attributes['data-canonical'])
|
||||
entries.append(self.url_result(
|
||||
episode_url, 'AENetworks',
|
||||
episode_attributes['data-videoid']))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': 'medium_video_s3'
|
||||
}
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
media_url = update_url_query(media_url, query)
|
||||
media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
|
||||
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class HistoryTopicIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:topic'
|
||||
IE_DESC = 'History.com Topic'
|
||||
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)(?:/[^/]+(?:/(?P<video_display_id>[^/?#]+))?)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||
'info_dict': {
|
||||
'id': 'g12m5Gyt3fdR',
|
||||
'id': '40700995724',
|
||||
'ext': 'mp4',
|
||||
'title': "Bet You Didn't Know: Valentine's Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
@ -31,57 +145,61 @@ class AENetworksIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'expected_warnings': ['JSON-LD'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
||||
'info_dict': {
|
||||
'id': 'eg47EERs_JsZ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
|
||||
'info_dict':
|
||||
{
|
||||
'id': 'world-war-i-history',
|
||||
'title': 'World War I History',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'playlist_mincount': 24,
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
|
||||
'only_matching': True
|
||||
'url': 'http://www.history.com/topics/world-war-i-history/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
|
||||
'only_matching': True
|
||||
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
|
||||
'only_matching': True
|
||||
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url_re = [
|
||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||
r"media_url\s*=\s*'([^']+)'"
|
||||
]
|
||||
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
|
||||
query = {'mbr': 'true'}
|
||||
if page_type == 'shows':
|
||||
query['assetTypes'] = 'medium_video_s3'
|
||||
if 'switch=hds' in video_url:
|
||||
query['switch'] = 'hls'
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
info.update({
|
||||
def theplatform_url_result(self, theplatform_url, video_id, query):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': smuggle_url(
|
||||
update_url_query(video_url, query),
|
||||
update_url_query(theplatform_url, query),
|
||||
{
|
||||
'sig': {
|
||||
'key': 'crazyjava',
|
||||
'secret': 's3cr3t'},
|
||||
'key': self._THEPLATFORM_KEY,
|
||||
'secret': self._THEPLATFORM_SECRET,
|
||||
},
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
|
||||
if video_display_id:
|
||||
webpage = self._download_webpage(url, video_display_id)
|
||||
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
|
||||
release_url = unescapeHTML(release_url)
|
||||
|
||||
return self.theplatform_url_result(
|
||||
release_url, video_id, {
|
||||
'mbr': 'true',
|
||||
'switch': 'hls'
|
||||
})
|
||||
return info
|
||||
else:
|
||||
webpage = self._download_webpage(url, topic_id)
|
||||
entries = []
|
||||
for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
|
||||
video_attributes = extract_attributes(episode_item)
|
||||
entries.append(self.theplatform_url_result(
|
||||
video_attributes['data-release-url'], video_attributes['data-id'], {
|
||||
'mbr': 'true',
|
||||
'switch': 'hls'
|
||||
}))
|
||||
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
|
||||
|
@ -5,6 +5,8 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@ -50,21 +52,25 @@ class AMPIE(InfoExtractor):
|
||||
if isinstance(media_content, dict):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data['@attributes']
|
||||
media_type = media['type']
|
||||
if media_type in ('video/f4m', 'application/f4m+xml'):
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif media_type == 'application/x-mpegURL':
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
|
||||
_NETRC_MACHINE = 'animeondemand'
|
||||
_TESTS = [{
|
||||
# jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/161',
|
||||
'info_dict': {
|
||||
'id': '161',
|
||||
@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
# Film wording is used instead of Episode
|
||||
# Film wording is used instead of Episode, ger/jap, Dub/OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/39',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Episodes without titles
|
||||
# Episodes without titles, jap, OmU
|
||||
'url': 'https://www.anime-on-demand.de/anime/162',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/169',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Full length film, non-series, ger/jap, Dub/OmU, account required
|
||||
'url': 'https://www.anime-on-demand.de/anime/185',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@ -110,35 +115,12 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
|
||||
for input_ in re.findall(
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
|
||||
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
|
||||
attributes = extract_attributes(input_)
|
||||
playlist_urls = []
|
||||
for playlist_key in ('data-playlist', 'data-otherplaylist'):
|
||||
@ -161,7 +143,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
format_id_list.append(lang)
|
||||
if kind:
|
||||
format_id_list.append(kind)
|
||||
if not format_id_list:
|
||||
if not format_id_list and num is not None:
|
||||
format_id_list.append(compat_str(num))
|
||||
format_id = '-'.join(format_id_list)
|
||||
format_note = ', '.join(filter(None, (kind, lang_note)))
|
||||
@ -215,28 +197,74 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
})
|
||||
formats.extend(file_formats)
|
||||
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
return {
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
})
|
||||
}
|
||||
|
||||
def extract_entries(html, video_id, common_info, num=None):
|
||||
info = extract_info(html, video_id, num)
|
||||
|
||||
if info['formats']:
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update(info)
|
||||
entries.append(f)
|
||||
|
||||
# Extract teaser only when full episode is not available
|
||||
if not formats:
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
m = re.search(
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
|
||||
episode_html)
|
||||
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
|
||||
html)
|
||||
if m:
|
||||
f = common_info.copy()
|
||||
f.update({
|
||||
'id': '%s-teaser' % f['id'],
|
||||
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
|
||||
'title': m.group('title'),
|
||||
'url': compat_urlparse.urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
|
||||
episodebox_title = self._search_regex(
|
||||
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
|
||||
episode_html, 'episodebox title', default=None, group='title')
|
||||
if not episodebox_title:
|
||||
continue
|
||||
|
||||
episode_number = int(self._search_regex(
|
||||
r'(?:Episode|Film)\s*(\d+)',
|
||||
episodebox_title, 'episode number', default=num))
|
||||
episode_title = self._search_regex(
|
||||
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
|
||||
episodebox_title, 'episode title', default=None)
|
||||
|
||||
video_id = 'episode-%d' % episode_number
|
||||
|
||||
common_info = {
|
||||
'id': video_id,
|
||||
'series': anime_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
extract_entries(episode_html, video_id, common_info)
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
'id': anime_id,
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
extract_entries(html, video_id, common_info)
|
||||
|
||||
extract_episodes(webpage)
|
||||
|
||||
if not entries:
|
||||
extract_film(webpage, anime_id)
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||
|
@ -7,6 +7,8 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
|
||||
'info_dict': {
|
||||
'id': 'manofsteel',
|
||||
'id': '5111',
|
||||
'title': 'Man of Steel',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'id': 'blackthorn',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
|
||||
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
|
||||
'info_dict': {
|
||||
'id': '15881',
|
||||
'title': 'Kung Fu Panda 3',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||
'only_matching': True,
|
||||
@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
|
||||
movie = mobj.group('movie')
|
||||
uploader_id = mobj.group('company')
|
||||
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
entries = []
|
||||
for clip in film_data.get('clips', []):
|
||||
clip_title = clip['title']
|
||||
|
||||
formats = []
|
||||
for version, version_data in clip.get('versions', {}).items():
|
||||
for size, size_data in version_data.get('sizes', {}).items():
|
||||
src = size_data.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
'language': version[:2],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
|
||||
'formats': formats,
|
||||
'title': clip_title,
|
||||
'thumbnail': clip.get('screen') or clip.get('thumb'),
|
||||
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
|
||||
'upload_date': unified_strdate(clip.get('posted')),
|
||||
'uploader_id': uploader_id,
|
||||
})
|
||||
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
@ -34,6 +35,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
|
||||
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
|
||||
@ -44,6 +46,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
|
||||
'duration': 5252,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
@ -55,6 +58,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
|
||||
'duration': 3240,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
@ -113,11 +117,14 @@ class ARDMediathekIE(InfoExtractor):
|
||||
continue
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False))
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
@ -231,7 +238,8 @@ class ARDIE(InfoExtractor):
|
||||
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||
'upload_date': '20140804',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -419,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'PL-013263',
|
||||
'title': 'Areva & Uramin',
|
||||
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
|
@ -44,6 +44,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
||||
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||
# Unified Streaming Platform
|
||||
_USP_RE = r'/([^/]+)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
_NAMESPACES = (
|
||||
_MEDIASELECTION_NS,
|
||||
@ -55,12 +57,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
@ -92,7 +93,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
'skip': 'this episode is not currently available',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
@ -107,7 +108,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
'skip': 'this episode is not currently available',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
@ -127,13 +128,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p022h44j',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
|
||||
'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
|
||||
'duration': 227,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
@ -141,13 +141,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
@ -163,7 +162,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
'skip': 'this episode is not currently available',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||
'info_dict': {
|
||||
@ -177,7 +176,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'geolocation',
|
||||
'skip': 'this episode is not currently available',
|
||||
}, {
|
||||
# iptv-all mediaset fails with geolocation however there is no geo restriction
|
||||
# for this programme at all
|
||||
@ -192,18 +191,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Now it\'s really geo-restricted',
|
||||
'skip': 'this episode is not currently available on BBC iPlayer Radio',
|
||||
}, {
|
||||
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
|
||||
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
||||
'info_dict': {
|
||||
'id': 'p028bfkj',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||
'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@ -248,9 +246,15 @@ class BBCCoUkIE(InfoExtractor):
|
||||
elif transfer_format == 'dash':
|
||||
pass
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
is_unified_streaming = re.search(self._USP_RE, href)
|
||||
if is_unified_streaming:
|
||||
href = re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href)
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=supplier, fatal=False))
|
||||
m3u8_id=supplier, fatal=False)
|
||||
if is_unified_streaming:
|
||||
self._check_formats(m3u8_formats, programme_id)
|
||||
formats.extend(m3u8_formats)
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
@ -305,6 +309,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
if format.get('protocol') != 'm3u8_native':
|
||||
format.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
|
@ -2,11 +2,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
from .rudo import RudoIE
|
||||
|
||||
|
||||
class BioBioChileTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
_VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
|
||||
@ -18,6 +22,7 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Fernando Atria',
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
# different uploader layout
|
||||
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
|
||||
@ -32,6 +37,16 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html',
|
||||
}, {
|
||||
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||
'info_dict': {
|
||||
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
||||
'ext': 'mp4',
|
||||
'uploader': '(none)',
|
||||
'upload_date': '20160708',
|
||||
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||
'only_matching': True,
|
||||
@ -45,42 +60,22 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
rudo_url = RudoIE._extract_url(webpage)
|
||||
if not rudo_url:
|
||||
raise ExtractorError('No videos found')
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
|
||||
|
||||
file_url = self._search_regex(
|
||||
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'file url', group='url')
|
||||
|
||||
base_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
|
||||
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
|
||||
group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
f = {
|
||||
'url': '%s%s' % (base_url, file_url),
|
||||
'format_id': 'http',
|
||||
'protocol': 'http',
|
||||
'preference': 1,
|
||||
}
|
||||
if formats:
|
||||
f_copy = formats[-1].copy()
|
||||
f_copy.update(f)
|
||||
f = f_copy
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
|
||||
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': rudo_url,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
@ -90,6 +91,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'description': 'md5:363109c02998fee92ec02211bd8000df',
|
||||
'uploader': 'National Ballet of Canada',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
},
|
||||
{
|
||||
# test flv videos served by akamaihd.net
|
||||
@ -108,7 +110,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
# playlist test
|
||||
# playlist with 'videoList'
|
||||
# from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||
'info_dict': {
|
||||
@ -117,6 +119,15 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
{
|
||||
# playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965)
|
||||
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
|
||||
'info_dict': {
|
||||
'id': '1522758701001',
|
||||
'title': 'Lesson 08',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
},
|
||||
]
|
||||
FLV_VCODECS = {
|
||||
1: 'SORENSON',
|
||||
@ -298,13 +309,19 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
info_url, player_key, 'Downloading playlist information')
|
||||
|
||||
json_data = json.loads(playlist_info)
|
||||
if 'videoList' not in json_data:
|
||||
raise ExtractorError('Empty playlist')
|
||||
if 'videoList' in json_data:
|
||||
playlist_info = json_data['videoList']
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||
playlist_dto = playlist_info['mediaCollectionDTO']
|
||||
elif 'playlistTabs' in json_data:
|
||||
playlist_info = json_data['playlistTabs']
|
||||
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
|
||||
else:
|
||||
raise ExtractorError('Empty playlist')
|
||||
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||
playlist_title=playlist_dto['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
video_id = compat_str(video_info['id'])
|
||||
@ -585,6 +602,13 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
@ -597,7 +621,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': json_data.get('description'),
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
|
@ -5,6 +5,7 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .facebook import FacebookIE
|
||||
|
||||
|
||||
class BuzzFeedIE(InfoExtractor):
|
||||
@ -20,11 +21,11 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'aVCR29aE_OQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Angry Ram destroys a punching bag..',
|
||||
'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
|
||||
'upload_date': '20141024',
|
||||
'uploader_id': 'Buddhanz1',
|
||||
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
|
||||
'uploader': 'Buddhanz',
|
||||
'title': 'Angry Ram destroys a punching bag',
|
||||
'uploader': 'Angry Ram',
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
@ -41,13 +42,30 @@ class BuzzFeedIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'mVmBL8B-In0',
|
||||
'ext': 'mp4',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'description': 're:© 2014 Munchkin the',
|
||||
'uploader': 're:^Munchkin the',
|
||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
|
||||
'info_dict': {
|
||||
'id': 'the-most-adorable-crash-landing-ever',
|
||||
'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
|
||||
'description': 'This gosling knows how to stick a landing.',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '763ca415512f91ca62e4621086900a23',
|
||||
'info_dict': {
|
||||
'id': '971793786185728',
|
||||
'ext': 'mp4',
|
||||
'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
|
||||
'uploader': 'Calgary Outdoor Centre-University of Calgary',
|
||||
},
|
||||
}],
|
||||
'add_ie': ['Facebook'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -66,6 +84,10 @@ class BuzzFeedIE(InfoExtractor):
|
||||
continue
|
||||
entries.append(self.url_result(video['url']))
|
||||
|
||||
facebook_url = FacebookIE._extract_url(webpage)
|
||||
if facebook_url:
|
||||
entries.append(self.url_result(facebook_url))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
|
@ -80,9 +80,6 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
|
||||
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
|
||||
formats, subtitles = [], {}
|
||||
if site == 'cnet':
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id)
|
||||
for (fkey, vid) in vdata['files'].items():
|
||||
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
|
||||
continue
|
||||
@ -94,7 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
||||
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
@ -1,16 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
from .onet import OnetBaseIE
|
||||
|
||||
|
||||
class ClipRsIE(InfoExtractor):
|
||||
class ClipRsIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
|
||||
_TEST = {
|
||||
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
|
||||
@ -27,64 +21,13 @@ class ClipRsIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||
mvp_id = self._search_mvp_id(webpage)
|
||||
|
||||
response = self._download_json(
|
||||
'http://qi.ckm.onetapi.pl/', video_id,
|
||||
query={
|
||||
'body[id]': video_id,
|
||||
'body[jsonrpc]': '2.0',
|
||||
'body[method]': 'get_asset_detail',
|
||||
'body[params][ID_Publikacji]': video_id,
|
||||
'body[params][Service]': 'www.onet.pl',
|
||||
'content-type': 'application/jsonp',
|
||||
'x-onet-app': 'player.front.onetapi.pl',
|
||||
})
|
||||
info_dict = self._extract_from_id(mvp_id, webpage)
|
||||
info_dict['display_id'] = display_id
|
||||
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error['message']), expected=True)
|
||||
|
||||
video = response['result'].get('0')
|
||||
|
||||
formats = []
|
||||
for _, formats_dict in video['formats'].items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_list in formats_dict.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for f in format_list:
|
||||
if not f.get('url'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(f.get('vertical_resolution')),
|
||||
'width': int_or_none(f.get('horizontal_resolution')),
|
||||
'abr': float_or_none(f.get('audio_bitrate')),
|
||||
'vbr': float_or_none(f.get('video_bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta = video.get('meta', {})
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
||||
duration = meta.get('length') or meta.get('lenght')
|
||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
return info_dict
|
||||
|
@ -44,6 +44,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
@ -54,6 +55,8 @@ from ..utils import (
|
||||
update_Request,
|
||||
update_url_query,
|
||||
parse_m3u8_attributes,
|
||||
extract_attributes,
|
||||
parse_codecs,
|
||||
)
|
||||
|
||||
|
||||
@ -161,6 +164,7 @@ class InfoExtractor(object):
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@ -749,10 +753,12 @@ class InfoExtractor(object):
|
||||
return self._og_search_property('url', html, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
||||
if not isinstance(name, (list, tuple)):
|
||||
name = [name]
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
display_name = name[0]
|
||||
return self._html_search_regex(
|
||||
self._meta_regex(name),
|
||||
[self._meta_regex(n) for n in name],
|
||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
@ -801,15 +807,17 @@ class InfoExtractor(object):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
def _search_json_ld(self, html, video_id, **kwargs):
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
if not json_ld:
|
||||
return {}
|
||||
return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
|
||||
return self._json_ld(
|
||||
json_ld, video_id, fatal=kwargs.get('fatal', True),
|
||||
expected_type=expected_type)
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True):
|
||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
if isinstance(json_ld, compat_str):
|
||||
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
||||
if not json_ld:
|
||||
@ -817,6 +825,8 @@ class InfoExtractor(object):
|
||||
info = {}
|
||||
if json_ld.get('@context') == 'http://schema.org':
|
||||
item_type = json_ld.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
if item_type == 'TVEpisode':
|
||||
info.update({
|
||||
'episode': unescapeHTML(json_ld.get('name')),
|
||||
@ -835,6 +845,19 @@ class InfoExtractor(object):
|
||||
'title': unescapeHTML(json_ld.get('headline')),
|
||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
info.update({
|
||||
'url': json_ld.get('contentUrl'),
|
||||
'title': unescapeHTML(json_ld.get('name')),
|
||||
'description': unescapeHTML(json_ld.get('description')),
|
||||
'thumbnail': json_ld.get('thumbnailUrl'),
|
||||
'duration': parse_duration(json_ld.get('duration')),
|
||||
'timestamp': unified_timestamp(json_ld.get('uploadDate')),
|
||||
'filesize': float_or_none(json_ld.get('contentSize')),
|
||||
'tbr': int_or_none(json_ld.get('bitrate')),
|
||||
'width': int_or_none(json_ld.get('width')),
|
||||
'height': int_or_none(json_ld.get('height')),
|
||||
})
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
@ -876,7 +899,11 @@ class InfoExtractor(object):
|
||||
f['ext'] = determine_ext(f['url'])
|
||||
|
||||
if isinstance(field_preference, (list, tuple)):
|
||||
return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
|
||||
return tuple(
|
||||
f.get(field)
|
||||
if f.get(field) is not None
|
||||
else ('' if field == 'format_id' else -1)
|
||||
for field in field_preference)
|
||||
|
||||
preference = f.get('preference')
|
||||
if preference is None:
|
||||
@ -1180,6 +1207,7 @@ class InfoExtractor(object):
|
||||
'url': format_url(line.strip()),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
@ -1188,24 +1216,17 @@ class InfoExtractor(object):
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
vcodec, acodec = [None] * 2
|
||||
va_codecs = codecs.split(',')
|
||||
if len(va_codecs) == 1:
|
||||
# Audio only entries usually come with single codec and
|
||||
# no resolution. For more robustness we also check it to
|
||||
# be mp4 audio.
|
||||
if not resolution and va_codecs[0].startswith('mp4a'):
|
||||
vcodec, acodec = 'none', va_codecs[0]
|
||||
else:
|
||||
vcodec = va_codecs[0]
|
||||
else:
|
||||
vcodec, acodec = va_codecs[:2]
|
||||
# Unified Streaming Platform
|
||||
mobj = re.search(
|
||||
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||
if mobj:
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
f.update({
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
@ -1610,6 +1631,62 @@ class InfoExtractor(object):
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
|
||||
def parse_content_type(content_type):
|
||||
if not content_type:
|
||||
return {}
|
||||
ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
|
||||
if ctr:
|
||||
mimetype, codecs = ctr.groups()
|
||||
f = parse_codecs(codecs)
|
||||
f['ext'] = mimetype2ext(mimetype)
|
||||
return f
|
||||
return {}
|
||||
|
||||
entries = []
|
||||
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = media_attributes.get('src')
|
||||
if src:
|
||||
media_info['formats'].append({
|
||||
'url': absolute_url(src),
|
||||
'vcodec': 'none' if media_type == 'audio' else None,
|
||||
})
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
if media_content:
|
||||
for source_tag in re.findall(r'<source[^>]+>', media_content):
|
||||
source_attributes = extract_attributes(source_tag)
|
||||
src = source_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(source_attributes.get('type'))
|
||||
f.update({
|
||||
'url': absolute_url(src),
|
||||
'vcodec': 'none' if media_type == 'audio' else None,
|
||||
})
|
||||
media_info['formats'].append(f)
|
||||
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind == 'subtitles':
|
||||
src = track_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
if media_info['formats']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@ -1723,6 +1800,13 @@ class InfoExtractor(object):
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def geo_verification_headers(self):
|
||||
headers = {}
|
||||
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
|
||||
if geo_verification_proxy:
|
||||
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||
return headers
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
30
youtube_dl/extractor/ctv.py
Normal file
30
youtube_dl/extractor/ctv.py
Normal file
@ -0,0 +1,30 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
'info_dict': {
|
||||
'id': '706966',
|
||||
'ext': 'mp4',
|
||||
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
|
||||
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
|
||||
'upload_date': '20150919',
|
||||
'timestamp': 1442624700,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:ctv_web:%s' % video_id,
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
65
youtube_dl/extractor/ctvnews.py
Normal file
65
youtube_dl/extractor/ctvnews.py
Normal file
@ -0,0 +1,65 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import orderedSet
|
||||
|
||||
|
||||
class CTVNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
|
||||
'info_dict': {
|
||||
'id': '901995',
|
||||
'ext': 'mp4',
|
||||
'title': 'Extended: \'That person cannot be me\' Johnson says',
|
||||
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
||||
'timestamp': 1467286284,
|
||||
'upload_date': '20160630',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '1.2966224',
|
||||
},
|
||||
'playlist_mincount': 19,
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '1.2876780',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/1.810401',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
def ninecninemedia_url_result(clip_id):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': clip_id,
|
||||
'url': '9c9media:ctvnews_web:%s' % clip_id,
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
|
||||
if page_id.isdigit():
|
||||
return ninecninemedia_url_result(page_id)
|
||||
else:
|
||||
webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
|
||||
'ot': 'example.AjaxPageLayout.ot',
|
||||
'maxItemsPerPage': 1000000,
|
||||
})
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
|
||||
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
return self.playlist_result(entries, page_id)
|
@ -16,6 +16,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@ -111,6 +112,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
return list(map(lambda m: unescapeHTML(m[1]), matches))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@ -153,18 +161,19 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
type_ = media.get('type')
|
||||
if type_ == 'application/vnd.lumberjack.manifest':
|
||||
continue
|
||||
ext = determine_ext(media_url)
|
||||
if type_ == 'application/x-mpegURL' or ext == 'm3u8':
|
||||
ext = mimetype2ext(type_) or determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', preference=-1,
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif type_ == 'application/f4m' or ext == 'f4m':
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
f = {
|
||||
'url': media_url,
|
||||
'format_id': 'http-%s' % quality,
|
||||
'ext': ext,
|
||||
}
|
||||
m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
|
||||
if m:
|
||||
|
@ -66,22 +66,32 @@ class DaumIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
# Requires dte_type=WEB (#9972)
|
||||
'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU',
|
||||
'md5': 'a8917742069a4dd442516b86e7d66529',
|
||||
'info_dict': {
|
||||
'id': 's3794Uf1NZeZ1qMpGpeqeRU',
|
||||
'ext': 'mp4',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회',
|
||||
'upload_date': '20160611',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||
query = compat_urllib_parse_urlencode({'vid': video_id})
|
||||
movie_data = self._download_json(
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
||||
video_id, 'Downloading video formats info')
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json',
|
||||
video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'})
|
||||
|
||||
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
||||
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
||||
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
||||
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
'Downloading video info')
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id,
|
||||
'Downloading video info', query={'vid': video_id})
|
||||
|
||||
formats = []
|
||||
for format_el in movie_data['output_list']['output_list']:
|
||||
|
@ -4,78 +4,47 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:(?:lazyplayer|player)/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc',
|
||||
'md5': '2e24f67936517b143a234b4cadf792ec',
|
||||
'info_dict': {
|
||||
'id': '33100',
|
||||
'id': '3649835190001',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1404039863.438,
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'timestamp': 1404039863,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
'uploader_id': '1027729757001',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew']
|
||||
}, {
|
||||
'url': 'http://dbtv.no/3649835190001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/vice/5000634109001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/filmtrailer/3359293614001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.dbtv.no/discovery/%s' % video_id, display_id)
|
||||
|
||||
video = data['playlist'][0]
|
||||
|
||||
formats = [{
|
||||
'url': f['URL'],
|
||||
'vcodec': f.get('container'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'vbr': float_or_none(f.get('rate'), 1000),
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
} for f in video['renditions'] if 'URL' in f]
|
||||
|
||||
if not formats:
|
||||
for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]:
|
||||
if url_key in video:
|
||||
formats.append({
|
||||
'url': video[url_key],
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
return {
|
||||
'id': compat_str(video['id']),
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video['title'],
|
||||
'description': clean_html(video['desc']),
|
||||
'thumbnail': video.get('splash') or video.get('thumb'),
|
||||
'timestamp': float_or_none(video.get('publishedAt'), 1000),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'categories': video.get('tags'),
|
||||
'formats': formats,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
|
||||
@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_video_formats(self, webpage, video_id, entry_protocol):
|
||||
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
|
||||
formats = []
|
||||
m3u8_url = self._html_search_regex(
|
||||
r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False)
|
||||
if m3u8_url:
|
||||
format_url_base = 'http' + self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# format_url_base + '/manifest.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None))
|
||||
|
||||
rtsp_url = self._search_regex(
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url_base + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class DCNVideoIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:video'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
|
||||
'info_dict':
|
||||
{
|
||||
@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE):
|
||||
|
||||
class DCNLiveIE(DCNBaseIE):
|
||||
IE_NAME = 'dcn:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE):
|
||||
|
||||
class DCNSeasonIE(InfoExtractor):
|
||||
IE_NAME = 'dcn:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
|
||||
_TEST = {
|
||||
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
|
||||
'info_dict':
|
||||
|
@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor):
|
||||
'skip': 'Georestricted',
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
@staticmethod
|
||||
def _handle_error(response):
|
||||
status = int_or_none(response.get('status', 200))
|
||||
|
@ -6,12 +6,13 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
)
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
|
||||
'md5': '4294cf98bc165f218aaa0b89e0fd8042',
|
||||
'info_dict': {
|
||||
@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor):
|
||||
'timestamp': 1428035648,
|
||||
'upload_date': '20150403',
|
||||
'uploader_id': 'batchUser',
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# not available via http://widgets.ellentube.com/
|
||||
'url': 'http://www.ellentv.com/videos/1-szkgu2m2/',
|
||||
'info_dict': {
|
||||
'id': '1_szkgu2m2',
|
||||
'ext': 'flv',
|
||||
'title': "Ellen's Amazingly Talented Audience",
|
||||
'description': 'md5:86ff1e376ff0d717d7171590e273f0a5',
|
||||
'timestamp': 1255140900,
|
||||
'upload_date': '20091010',
|
||||
'uploader_id': 'ellenkaltura@gmail.com',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url)
|
||||
|
||||
for num, url_ in enumerate(URLS, 1):
|
||||
webpage = self._download_webpage(
|
||||
'http://widgets.ellentube.com/videos/%s' % video_id,
|
||||
video_id)
|
||||
url_, video_id, fatal=num == len(URLS))
|
||||
|
||||
default = NO_DEFAULT if num == len(URLS) else None
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id')
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id',
|
||||
default=default)
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
[r'id="kaltura_player_([^"]+)"',
|
||||
r"_wb_entry_id\s*:\s*'([^']+)",
|
||||
r'data-kaltura-entry-id="([^"]+)'],
|
||||
webpage, 'kaltura id')
|
||||
webpage, 'kaltura id', default=default)
|
||||
|
||||
if partner_id and kaltura_id:
|
||||
break
|
||||
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
|
||||
|
||||
|
@ -20,7 +20,10 @@ from .adobetv import (
|
||||
AdobeTVVideoIE,
|
||||
)
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aenetworks import AENetworksIE
|
||||
from .aenetworks import (
|
||||
AENetworksIE,
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
@ -136,9 +139,9 @@ from .chirbit import (
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudy import CloudyIE
|
||||
@ -168,6 +171,8 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymail import DailyMailIE
|
||||
@ -251,6 +256,7 @@ from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .flipagram import FlipagramIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
@ -276,6 +282,7 @@ from .freespeech import FreespeechIE
|
||||
from .freevideo import FreeVideoIE
|
||||
from .funimation import FunimationIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import (
|
||||
@ -320,6 +327,10 @@ from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import HotStarIE
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .hrti import (
|
||||
HRTiIE,
|
||||
HRTiPlaylistIE,
|
||||
)
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
@ -358,6 +369,7 @@ from .jove import JoveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kamcord import KamcordIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
@ -422,6 +434,7 @@ from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .meta import METAIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
@ -454,6 +467,7 @@ from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
@ -521,6 +535,7 @@ from .nick import (
|
||||
NickDeIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninegag import NineGagIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
@ -568,6 +583,10 @@ from .nytimes import (
|
||||
from .nuvid import NuvidIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .onet import (
|
||||
OnetIE,
|
||||
OnetChannelIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .ooyala import (
|
||||
OoyalaIE,
|
||||
@ -606,6 +625,7 @@ from .pluralsight import (
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .polskieradio import PolskieRadioIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
@ -660,6 +680,7 @@ from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .roosterteeth import RoosterTeethIE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
@ -670,6 +691,7 @@ from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
from .rutube import (
|
||||
@ -704,10 +726,12 @@ from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
from .skynewsarabia import (
|
||||
SkyNewsArabiaIE,
|
||||
SkyNewsArabiaArticleIE,
|
||||
)
|
||||
from .skysports import SkySportsIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
@ -889,6 +913,7 @@ from .udn import UDNEmbedIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usatoday import USATodayIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import (
|
||||
@ -915,6 +940,7 @@ from .vice import (
|
||||
ViceIE,
|
||||
ViceShowIE,
|
||||
)
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
@ -963,6 +989,7 @@ from .viki import (
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vodlocker import VodlockerIE
|
||||
@ -1048,6 +1075,7 @@ from .youtube import (
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeSharedVideoIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
|
@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
# Facebook API embed
|
||||
# see https://developers.facebook.com/docs/plugins/embedded-video-player
|
||||
mobj = re.search(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
|
||||
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
def _login(self):
|
||||
(useremail, password) = self._get_login_info()
|
||||
if useremail is None:
|
||||
@ -204,12 +219,25 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
|
||||
if m:
|
||||
swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
|
||||
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
|
||||
|
||||
for m in re.findall(PATTERN, webpage):
|
||||
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
|
||||
data = dict(json.loads(swf_params))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data = json.loads(params_raw)['video_data']
|
||||
video_data_candidate = json.loads(params_raw)['video_data']
|
||||
for _, f in video_data_candidate.items():
|
||||
if not f:
|
||||
continue
|
||||
if isinstance(f, dict):
|
||||
f = [f]
|
||||
if not isinstance(f, list):
|
||||
continue
|
||||
if f[0].get('video_id') == video_id:
|
||||
video_data = video_data_candidate
|
||||
break
|
||||
if video_data:
|
||||
break
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
|
115
youtube_dl/extractor/flipagram.py
Normal file
115
youtube_dl/extractor/flipagram.py
Normal file
@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FlipagramIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||
'md5': '888dcf08b7ea671381f00fab74692755',
|
||||
'info_dict': {
|
||||
'id': 'nyvTSJMKId',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
|
||||
'duration': 35.571,
|
||||
'timestamp': 1461244995,
|
||||
'upload_date': '20160421',
|
||||
'uploader': 'kitty juria',
|
||||
'uploader_id': 'sjuria101',
|
||||
'creator': 'kitty juria',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
|
||||
video_id)
|
||||
|
||||
flipagram = video_data['flipagram']
|
||||
video = flipagram['video']
|
||||
|
||||
json_ld = self._search_json_ld(webpage, video_id, default=False)
|
||||
title = json_ld.get('title') or flipagram['captionText']
|
||||
description = json_ld.get('description') or flipagram.get('captionText')
|
||||
|
||||
formats = [{
|
||||
'url': video['url'],
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': int_or_none(video_data.get('size')),
|
||||
}]
|
||||
|
||||
preview_url = try_get(
|
||||
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
|
||||
if preview_url:
|
||||
formats.append({
|
||||
'url': preview_url,
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
counts = flipagram.get('counts', {})
|
||||
user = flipagram.get('user', {})
|
||||
video_data = flipagram.get('video', {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(cover['url']),
|
||||
'width': int_or_none(cover.get('width')),
|
||||
'height': int_or_none(cover.get('height')),
|
||||
'filesize': int_or_none(cover.get('size')),
|
||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||
|
||||
# Note that this only retrieves comments that are initally loaded.
|
||||
# For videos with large amounts of comments, most won't be retrieved.
|
||||
comments = []
|
||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||
text = comment.get('comment')
|
||||
if not text or not isinstance(text, list):
|
||||
continue
|
||||
comments.append({
|
||||
'author': comment.get('user', {}).get('name'),
|
||||
'author_id': comment.get('user', {}).get('username'),
|
||||
'id': comment.get('id'),
|
||||
'text': text[0],
|
||||
'timestamp': unified_timestamp(comment.get('created')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': float_or_none(flipagram.get('duration'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
|
||||
'uploader': user.get('name'),
|
||||
'uploader_id': user.get('username'),
|
||||
'creator': user.get('name'),
|
||||
'view_count': int_or_none(counts.get('plays')),
|
||||
'like_count': int_or_none(counts.get('likes')),
|
||||
'repost_count': int_or_none(counts.get('reflips')),
|
||||
'comment_count': int_or_none(counts.get('comments')),
|
||||
'comments': comments,
|
||||
'formats': formats,
|
||||
}
|
@ -14,7 +14,10 @@ from ..utils import (
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
)
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@ -188,6 +191,21 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Dailymotion embed
|
||||
'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
|
||||
'md5': 'ee7f1828f25a648addc90cb2687b1f12',
|
||||
'info_dict': {
|
||||
'id': 'x4iiko0',
|
||||
'ext': 'mp4',
|
||||
'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
|
||||
'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
|
||||
'timestamp': 1467011958,
|
||||
'upload_date': '20160627',
|
||||
'uploader': 'France Inter',
|
||||
'uploader_id': 'x2q2ez',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -197,7 +215,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
|
||||
if dmcloud_url:
|
||||
return self.url_result(dmcloud_url, 'DailymotionCloud')
|
||||
return self.url_result(dmcloud_url, DailymotionCloudIE.ie_key())
|
||||
|
||||
dailymotion_urls = DailymotionIE._extract_urls(webpage)
|
||||
if dailymotion_urls:
|
||||
return self.playlist_result([
|
||||
self.url_result(dailymotion_url, DailymotionIE.ie_key())
|
||||
for dailymotion_url in dailymotion_urls])
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
(r'id-video=([^@]+@[^"]+)',
|
||||
|
35
youtube_dl/extractor/fusion.py
Normal file
35
youtube_dl/extractor/fusion.py
Normal file
@ -0,0 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class FusionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'info_dict': {
|
||||
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
|
||||
'ext': 'mp4',
|
||||
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
|
||||
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
|
||||
'duration': 140.0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://fusion.net/video/201781',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-video-id=(["\'])(?P<code>.+?)\1',
|
||||
webpage, 'ooyala code', group='code')
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_code)
|
@ -49,7 +49,10 @@ from .pornhub import PornHubIE
|
||||
from .xhamster import XHamsterEmbedIE
|
||||
from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionCloudIE,
|
||||
)
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .viewlift import ViewLiftEmbedIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
@ -64,6 +67,9 @@ from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .vessel import VesselIE
|
||||
from .kaltura import KalturaIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .facebook import FacebookIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -920,6 +926,24 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
{
|
||||
# Kaltura embedded via quoted entry_id
|
||||
'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
|
||||
'info_dict': {
|
||||
'id': '0_utuok90b',
|
||||
'ext': 'mp4',
|
||||
'title': '06_matthew_brender_raj_dutt',
|
||||
'timestamp': 1466638791,
|
||||
'upload_date': '20160622',
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
'expected_warnings': [
|
||||
'Could not send HEAD request'
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Eagle.Platform embed (generic URL)
|
||||
{
|
||||
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
|
||||
@ -1091,12 +1115,17 @@ class GenericIE(InfoExtractor):
|
||||
# Dailymotion Cloud video
|
||||
{
|
||||
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
|
||||
'md5': '49444254273501a64675a7e68c502681',
|
||||
'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
|
||||
'info_dict': {
|
||||
'id': '5585de919473990de4bee11b',
|
||||
'id': 'x2uy8t3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le débat',
|
||||
'title': 'Sauvons les abeilles ! - Le débat',
|
||||
'description': 'md5:d9082128b1c5277987825d684939ca26',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
'timestamp': 1434970506,
|
||||
'upload_date': '20150622',
|
||||
'uploader': 'Public Sénat',
|
||||
'uploader_id': 'xa9gza',
|
||||
}
|
||||
},
|
||||
# OnionStudios embed
|
||||
@ -1220,6 +1249,102 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'www.hudl.com',
|
||||
},
|
||||
},
|
||||
# twitter:player embed
|
||||
{
|
||||
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
|
||||
'md5': 'a3e0df96369831de324f0778e126653c',
|
||||
'info_dict': {
|
||||
'id': '4909620399001',
|
||||
'ext': 'mp4',
|
||||
'title': 'What Do Black Holes Sound Like?',
|
||||
'description': 'what do black holes sound like',
|
||||
'upload_date': '20160524',
|
||||
'uploader_id': '29913724001',
|
||||
'timestamp': 1464107587,
|
||||
'uploader': 'TheAtlantic',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
},
|
||||
# Facebook <iframe> embed
|
||||
{
|
||||
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
|
||||
'md5': 'fbcde74f534176ecb015849146dd3aee',
|
||||
'info_dict': {
|
||||
'id': '599637780109885',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #599637780109885',
|
||||
},
|
||||
},
|
||||
# Facebook API embed
|
||||
{
|
||||
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
|
||||
'md5': 'a47372ee61b39a7b90287094d447d94e',
|
||||
'info_dict': {
|
||||
'id': '10153467542406923',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #10153467542406923',
|
||||
},
|
||||
},
|
||||
# Wordpress "YouTube Video Importer" plugin
|
||||
{
|
||||
'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
|
||||
'md5': 'd16797741b560b485194eddda8121b48',
|
||||
'info_dict': {
|
||||
'id': 'HNTXWDXV9Is',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Devils Drumline Stanford lot 2016',
|
||||
'upload_date': '20160627',
|
||||
'uploader_id': 'GENOCIDE8GENERAL10',
|
||||
'uploader': 'cylus cyrus',
|
||||
},
|
||||
},
|
||||
{
|
||||
# video stored on custom kaltura server
|
||||
'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
|
||||
'md5': '537617d06e64dfed891fa1593c4b30cc',
|
||||
'info_dict': {
|
||||
'id': '0_1iotm5bh',
|
||||
'ext': 'mp4',
|
||||
'title': 'Elecciones británicas: 5 lecciones para Rajoy',
|
||||
'description': 'md5:435a89d68b9760b92ce67ed227055f16',
|
||||
'uploader_id': 'videos.expansion@el-mundo.net',
|
||||
'upload_date': '20150429',
|
||||
'timestamp': 1430303472,
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
{
|
||||
# Non-standard Vimeo embed
|
||||
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
||||
'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
|
||||
'info_dict': {
|
||||
'id': '148867247',
|
||||
'ext': 'mp4',
|
||||
'title': 'Understanding the web - Teaser',
|
||||
'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
|
||||
'upload_date': '20151214',
|
||||
'uploader': 'OpenClassrooms',
|
||||
'uploader_id': 'openclassrooms',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
# 'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||
# 'md5': '888dcf08b7ea671381f00fab74692755',
|
||||
# 'info_dict': {
|
||||
# 'id': 'nyvTSJMKId',
|
||||
# 'ext': 'mp4',
|
||||
# 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||
# 'description': '#love for cats.',
|
||||
# 'timestamp': 1461244995,
|
||||
# 'upload_date': '20160421',
|
||||
# },
|
||||
# 'params': {
|
||||
# 'force_generic_extractor': True,
|
||||
# },
|
||||
# }
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@ -1576,12 +1701,16 @@ class GenericIE(InfoExtractor):
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
# Look for Wordpress "YouTube Video Importer" plugin
|
||||
matches = re.findall(r'''(?x)<div[^>]+
|
||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
return _playlist_from_matches(matches, lambda m: m[-1])
|
||||
|
||||
matches = DailymotionIE._extract_urls(webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches)
|
||||
|
||||
# Look for embedded Dailymotion playlist player (#3822)
|
||||
m = re.search(
|
||||
@ -1718,10 +1847,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded Facebook player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Facebook')
|
||||
facebook_url = FacebookIE._extract_url(webpage)
|
||||
if facebook_url is not None:
|
||||
return self.url_result(facebook_url, 'Facebook')
|
||||
|
||||
# Look for embedded VK player
|
||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||
@ -1903,18 +2031,14 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
|
||||
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
|
||||
if mobj is not None:
|
||||
return self.url_result(smuggle_url(
|
||||
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
|
||||
{'source_url': url}), 'Kaltura')
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
|
||||
|
||||
# Look for Eagle.Platform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'EaglePlatform')
|
||||
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
||||
if eagleplatform_url:
|
||||
return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
|
||||
|
||||
# Look for ClipYou (uses Eagle.Platform) embeds
|
||||
mobj = re.search(
|
||||
@ -2060,6 +2184,24 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url:
|
||||
return self.url_result(embed_url)
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default=None, expected_type='VideoObject')
|
||||
if json_ld and json_ld.get('url'):
|
||||
info_dict.update({
|
||||
'title': video_title or info_dict['title'],
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': age_limit
|
||||
})
|
||||
info_dict.update(json_ld)
|
||||
return info_dict
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
|
202
youtube_dl/extractor/hrti.py
Normal file
202
youtube_dl/extractor/hrti.py
Normal file
@ -0,0 +1,202 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class HRTiBaseIE(InfoExtractor):
|
||||
"""
|
||||
Base Information Extractor for Croatian Radiotelevision
|
||||
video on demand site https://hrti.hrt.hr
|
||||
Reverse engineered from the JavaScript app in app.min.js
|
||||
"""
|
||||
_NETRC_MACHINE = 'hrti'
|
||||
|
||||
_APP_LANGUAGE = 'hr'
|
||||
_APP_VERSION = '1.1'
|
||||
_APP_PUBLICATION_ID = 'all_in_one'
|
||||
_API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json'
|
||||
|
||||
def _initialize_api(self):
|
||||
init_data = {
|
||||
'application_publication_id': self._APP_PUBLICATION_ID
|
||||
}
|
||||
|
||||
uuid = self._download_json(
|
||||
self._API_URL, None, note='Downloading uuid',
|
||||
errnote='Unable to download uuid',
|
||||
data=json.dumps(init_data).encode('utf-8'))['uuid']
|
||||
|
||||
app_data = {
|
||||
'uuid': uuid,
|
||||
'application_publication_id': self._APP_PUBLICATION_ID,
|
||||
'application_version': self._APP_VERSION
|
||||
}
|
||||
|
||||
req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
|
||||
req.get_method = lambda: 'PUT'
|
||||
|
||||
resources = self._download_json(
|
||||
req, None, note='Downloading session information',
|
||||
errnote='Unable to download session information')
|
||||
|
||||
self._session_id = resources['session_id']
|
||||
|
||||
modules = resources['modules']
|
||||
|
||||
self._search_url = modules['vod_catalog']['resources']['search']['uri'].format(
|
||||
language=self._APP_LANGUAGE,
|
||||
application_id=self._APP_PUBLICATION_ID)
|
||||
|
||||
self._login_url = (modules['user']['resources']['login']['uri'] +
|
||||
'/format/json').format(session_id=self._session_id)
|
||||
|
||||
self._logout_url = modules['user']['resources']['logout']['uri']
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
# TODO: figure out authentication with cookies
|
||||
if username is None or password is None:
|
||||
self.raise_login_required()
|
||||
|
||||
auth_data = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
try:
|
||||
auth_info = self._download_json(
|
||||
self._login_url, None, note='Logging in', errnote='Unable to log in',
|
||||
data=json.dumps(auth_data).encode('utf-8'))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
|
||||
auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
|
||||
else:
|
||||
raise
|
||||
|
||||
error_message = auth_info.get('error', {}).get('message')
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error_message),
|
||||
expected=True)
|
||||
|
||||
self._token = auth_info['secure_streaming_token']
|
||||
|
||||
def _real_initialize(self):
|
||||
self._initialize_api()
|
||||
self._login()
|
||||
|
||||
|
||||
class HRTiIE(HRTiBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
hrti:(?P<short_id>[0-9]+)|
|
||||
https?://
|
||||
hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd',
|
||||
'info_dict': {
|
||||
'id': '2181385',
|
||||
'display_id': 'republika-dokumentarna-serija-16-hd',
|
||||
'ext': 'mp4',
|
||||
'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)',
|
||||
'description': 'md5:48af85f620e8e0e1df4096270568544f',
|
||||
'duration': 2922,
|
||||
'view_count': int,
|
||||
'average_rating': int,
|
||||
'episode_number': int,
|
||||
'season_number': int,
|
||||
'age_limit': 12,
|
||||
},
|
||||
'skip': 'Requires account credentials',
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/#/video/show/2181385/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'hrti:2181385',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('short_id') or mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
video = self._download_json(
|
||||
'%s/video_id/%s/format/json' % (self._search_url, video_id),
|
||||
display_id, 'Downloading video metadata JSON')['video'][0]
|
||||
|
||||
title_info = video['title']
|
||||
title = title_info['title_long']
|
||||
|
||||
movie = video['video_assets']['movie'][0]
|
||||
m3u8_url = movie['url'].format(TOKEN=self._token)
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = clean_html(title_info.get('summary_long'))
|
||||
age_limit = parse_age_limit(video.get('parental_control', {}).get('rating'))
|
||||
view_count = int_or_none(video.get('views'))
|
||||
average_rating = int_or_none(video.get('user_rating'))
|
||||
duration = int_or_none(movie.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class HRTiPlaylistIE(HRTiBaseIE):
|
||||
_VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
|
||||
'info_dict': {
|
||||
'id': '212',
|
||||
'title': 'ekumena',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
'skip': 'Requires account credentials',
|
||||
}, {
|
||||
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
category_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or category_id
|
||||
|
||||
response = self._download_json(
|
||||
'%s/category_id/%s/format/json' % (self._search_url, category_id),
|
||||
display_id, 'Downloading video metadata JSON')
|
||||
|
||||
video_ids = try_get(
|
||||
response, lambda x: x['video_listings'][0]['alternatives'][0]['list'],
|
||||
list) or [video['id'] for video in response.get('videos', []) if video.get('id')]
|
||||
|
||||
entries = [self.url_result('hrti:%s' % video_id) for video_id in video_ids]
|
||||
|
||||
return self.playlist_result(entries, category_id, display_id)
|
@ -3,28 +3,22 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import itertools
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
decode_packed_codes,
|
||||
get_element_by_id,
|
||||
get_element_by_attribute,
|
||||
ExtractorError,
|
||||
ohdave_rsa_encrypt,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@ -171,70 +165,21 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
|
||||
'md5': '2cb594dc2781e6c941a110d8f358118b',
|
||||
# MD5 checksum differs on my machine and Travis CI
|
||||
'info_dict': {
|
||||
'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
|
||||
'ext': 'mp4',
|
||||
'title': '美国德州空中惊现奇异云团 酷似UFO',
|
||||
'ext': 'f4v',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
|
||||
'md5': '667171934041350c5de3f5015f7f1152',
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
|
||||
'ext': 'f4v',
|
||||
'title': '名侦探柯南第752集',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ext': 'mp4',
|
||||
'title': '名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
|
||||
'only_matching': True,
|
||||
@ -250,22 +195,10 @@ class IqiyiIE(InfoExtractor):
|
||||
'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
|
||||
'info_dict': {
|
||||
'id': 'f3cf468b39dddb30d676f89a91200dc1',
|
||||
'ext': 'mp4',
|
||||
'title': '泰坦尼克号',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'f3cf468b39dddb30d676f89a91200dc1_part1',
|
||||
'ext': 'f4v',
|
||||
'title': '泰坦尼克号',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'f3cf468b39dddb30d676f89a91200dc1_part2',
|
||||
'ext': 'f4v',
|
||||
'title': '泰坦尼克号',
|
||||
},
|
||||
}],
|
||||
'expected_warnings': ['Needs a VIP account for full video'],
|
||||
'skip': 'Geo-restricted to China',
|
||||
}, {
|
||||
'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
|
||||
'info_dict': {
|
||||
@ -278,20 +211,15 @@ class IqiyiIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = [
|
||||
('1', 'h6'),
|
||||
('2', 'h5'),
|
||||
('3', 'h4'),
|
||||
('4', 'h3'),
|
||||
('5', 'h2'),
|
||||
('10', 'h1'),
|
||||
]
|
||||
|
||||
AUTH_API_ERRORS = {
|
||||
# No preview available (不允许试看鉴权失败)
|
||||
'Q00505': 'This video requires a VIP account',
|
||||
# End of preview time (试看结束鉴权失败)
|
||||
'Q00506': 'Needs a VIP account for full video',
|
||||
_FORMATS_MAP = {
|
||||
'96': 1, # 216p, 240p
|
||||
'1': 2, # 336p, 360p
|
||||
'2': 3, # 480p, 504p
|
||||
'21': 4, # 504p
|
||||
'4': 5, # 720p
|
||||
'17': 5, # 720p
|
||||
'5': 6, # 1072p, 1080p
|
||||
'18': 7, # 1080p
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -352,177 +280,23 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
return True
|
||||
|
||||
def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning):
|
||||
auth_params = {
|
||||
# version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
|
||||
'version': '2.0',
|
||||
'platform': 'b6c13e26323c537d',
|
||||
'aid': tvid,
|
||||
def get_raw_data(self, tvid, video_id):
|
||||
tm = int(time.time() * 1000)
|
||||
|
||||
key = 'd5fb4bd9d50c4be6948c97edd7254b0e'
|
||||
sc = md5_text(compat_str(tm) + key + tvid)
|
||||
params = {
|
||||
'tvid': tvid,
|
||||
'uid': '',
|
||||
'deviceId': _uuid,
|
||||
'playType': 'main', # XXX: always main?
|
||||
'filename': os.path.splitext(url_basename(api_video_url))[0],
|
||||
}
|
||||
|
||||
qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query)
|
||||
for key, val in qd_items.items():
|
||||
auth_params[key] = val[0]
|
||||
|
||||
auth_req = sanitized_Request(
|
||||
'http://api.vip.iqiyi.com/services/ckn.action',
|
||||
urlencode_postdata(auth_params))
|
||||
# iQiyi server throws HTTP 405 error without the following header
|
||||
auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
auth_result = self._download_json(
|
||||
auth_req, video_id,
|
||||
note='Downloading video authentication JSON',
|
||||
errnote='Unable to download video authentication JSON')
|
||||
|
||||
code = auth_result.get('code')
|
||||
msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code
|
||||
if code == 'Q00506':
|
||||
if do_report_warning:
|
||||
self.report_warning(msg)
|
||||
return False
|
||||
if 'data' not in auth_result:
|
||||
if msg is not None:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True)
|
||||
raise ExtractorError('Unexpected error from Iqiyi auth API')
|
||||
|
||||
return auth_result['data']
|
||||
|
||||
def construct_video_urls(self, data, video_id, _uuid, tvid):
|
||||
def do_xor(x, y):
|
||||
a = y % 3
|
||||
if a == 1:
|
||||
return x ^ 121
|
||||
if a == 2:
|
||||
return x ^ 72
|
||||
return x ^ 103
|
||||
|
||||
def get_encode_code(l):
|
||||
a = 0
|
||||
b = l.split('-')
|
||||
c = len(b)
|
||||
s = ''
|
||||
for i in range(c - 1, -1, -1):
|
||||
a = do_xor(int(b[c - i - 1], 16), i)
|
||||
s += chr(a)
|
||||
return s[::-1]
|
||||
|
||||
def get_path_key(x, format_id, segment_index):
|
||||
mg = ')(*&^flash@#$%a'
|
||||
tm = self._download_json(
|
||||
'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
|
||||
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
|
||||
)['t']
|
||||
t = str(int(math.floor(int(tm) / (600.0))))
|
||||
return md5_text(t + mg + x)
|
||||
|
||||
video_urls_dict = {}
|
||||
need_vip_warning_report = True
|
||||
for format_item in data['vp']['tkl'][0]['vs']:
|
||||
if 0 < int(format_item['bid']) <= 10:
|
||||
format_id = self.get_format(format_item['bid'])
|
||||
else:
|
||||
continue
|
||||
|
||||
video_urls = []
|
||||
|
||||
video_urls_info = format_item['fs']
|
||||
if not format_item['fs'][0]['l'].startswith('/'):
|
||||
t = get_encode_code(format_item['fs'][0]['l'])
|
||||
if t.endswith('mp4'):
|
||||
video_urls_info = format_item['flvs']
|
||||
|
||||
for segment_index, segment in enumerate(video_urls_info):
|
||||
vl = segment['l']
|
||||
if not vl.startswith('/'):
|
||||
vl = get_encode_code(vl)
|
||||
is_vip_video = '/vip/' in vl
|
||||
filesize = segment['b']
|
||||
base_url = data['vp']['du'].split('/')
|
||||
if not is_vip_video:
|
||||
key = get_path_key(
|
||||
vl.split('/')[-1].split('.')[0], format_id, segment_index)
|
||||
base_url.insert(-1, key)
|
||||
base_url = '/'.join(base_url)
|
||||
param = {
|
||||
'su': _uuid,
|
||||
'qyid': uuid.uuid4().hex,
|
||||
'client': '',
|
||||
'z': '',
|
||||
'bt': '',
|
||||
'ct': '',
|
||||
'tn': str(int(time.time()))
|
||||
}
|
||||
api_video_url = base_url + vl
|
||||
if is_vip_video:
|
||||
api_video_url = api_video_url.replace('.f4v', '.hml')
|
||||
auth_result = self._authenticate_vip_video(
|
||||
api_video_url, video_id, tvid, _uuid, need_vip_warning_report)
|
||||
if auth_result is False:
|
||||
need_vip_warning_report = False
|
||||
break
|
||||
param.update({
|
||||
't': auth_result['t'],
|
||||
# cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
|
||||
'cid': 'afbe8fd3d73448c9',
|
||||
'vid': video_id,
|
||||
'QY00001': auth_result['u'],
|
||||
})
|
||||
api_video_url += '?' if '?' not in api_video_url else '&'
|
||||
api_video_url += compat_urllib_parse_urlencode(param)
|
||||
js = self._download_json(
|
||||
api_video_url, video_id,
|
||||
note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
|
||||
video_url = js['l']
|
||||
video_urls.append(
|
||||
(video_url, filesize))
|
||||
|
||||
video_urls_dict[format_id] = video_urls
|
||||
return video_urls_dict
|
||||
|
||||
def get_format(self, bid):
|
||||
matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
|
||||
return matched_format_ids[0] if len(matched_format_ids) else None
|
||||
|
||||
def get_bid(self, format_id):
|
||||
matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
|
||||
return matched_bids[0] if len(matched_bids) else None
|
||||
|
||||
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
|
||||
tm = str(int(time.time()))
|
||||
tail = tm + tvid
|
||||
param = {
|
||||
'key': 'fvip',
|
||||
'src': md5_text('youtube-dl'),
|
||||
'tvId': tvid,
|
||||
'vid': video_id,
|
||||
'vinfo': 1,
|
||||
'tm': tm,
|
||||
'enc': md5_text(enc_key + tail),
|
||||
'qyid': _uuid,
|
||||
'tn': random.random(),
|
||||
# In iQiyi's flash player, um is set to 1 if there's a logged user
|
||||
# Some 1080P formats are only available with a logged user.
|
||||
# Here force um=1 to trick the iQiyi server
|
||||
'um': 1,
|
||||
'authkey': md5_text(md5_text('') + tail),
|
||||
'k_tag': 1,
|
||||
'src': '76f90cbd92f94a2e925d83e8ccd22cb7',
|
||||
'sc': sc,
|
||||
't': tm,
|
||||
}
|
||||
|
||||
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
|
||||
compat_urllib_parse_urlencode(param)
|
||||
raw_data = self._download_json(api_url, video_id)
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, video_id):
|
||||
# TODO: automatic key extraction
|
||||
# last update at 2016-01-22 for Zombie::bite
|
||||
enc_key = '4a1caba4b4465345366f28da7c117d20'
|
||||
return enc_key
|
||||
return self._download_json(
|
||||
'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id),
|
||||
video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='),
|
||||
query=params, headers=self.geo_verification_headers())
|
||||
|
||||
def _extract_playlist(self, webpage):
|
||||
PAGE_SIZE = 50
|
||||
@ -571,58 +345,41 @@ class IqiyiIE(InfoExtractor):
|
||||
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
|
||||
video_id = self._search_regex(
|
||||
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
|
||||
_uuid = uuid.uuid4().hex
|
||||
|
||||
enc_key = self.get_enc_key(video_id)
|
||||
formats = []
|
||||
for _ in range(5):
|
||||
raw_data = self.get_raw_data(tvid, video_id)
|
||||
|
||||
raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
|
||||
|
||||
if raw_data['code'] != 'A000000':
|
||||
if raw_data['code'] != 'A00000':
|
||||
if raw_data['code'] == 'A00111':
|
||||
self.raise_geo_restricted()
|
||||
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
|
||||
|
||||
data = raw_data['data']
|
||||
|
||||
title = data['vi']['vn']
|
||||
for stream in data['vidl']:
|
||||
if 'm3utx' not in stream:
|
||||
continue
|
||||
vd = compat_str(stream['vd'])
|
||||
formats.append({
|
||||
'url': stream['m3utx'],
|
||||
'format_id': vd,
|
||||
'ext': 'mp4',
|
||||
'preference': self._FORMATS_MAP.get(vd, -1),
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
|
||||
# generate video_urls_dict
|
||||
video_urls_dict = self.construct_video_urls(
|
||||
data, video_id, _uuid, tvid)
|
||||
if formats:
|
||||
break
|
||||
|
||||
# construct info
|
||||
entries = []
|
||||
for format_id in video_urls_dict:
|
||||
video_urls = video_urls_dict[format_id]
|
||||
for i, video_url_info in enumerate(video_urls):
|
||||
if len(entries) < i + 1:
|
||||
entries.append({'formats': []})
|
||||
entries[i]['formats'].append(
|
||||
{
|
||||
'url': video_url_info[0],
|
||||
'filesize': video_url_info[-1],
|
||||
'format_id': format_id,
|
||||
'preference': int(self.get_bid(format_id))
|
||||
}
|
||||
)
|
||||
self._sleep(5, video_id)
|
||||
|
||||
for i in range(len(entries)):
|
||||
self._sort_formats(entries[i]['formats'])
|
||||
entries[i].update(
|
||||
{
|
||||
'id': '%s_part%d' % (video_id, i + 1),
|
||||
'title': title,
|
||||
}
|
||||
)
|
||||
self._sort_formats(formats)
|
||||
title = (get_element_by_id('widget-videotitle', webpage) or
|
||||
clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)))
|
||||
|
||||
if len(entries) > 1:
|
||||
info = {
|
||||
'_type': 'multi_video',
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
info = entries[0]
|
||||
info['id'] = video_id
|
||||
info['title'] = title
|
||||
|
||||
return info
|
||||
|
@ -6,7 +6,6 @@ import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
@ -15,6 +14,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor):
|
||||
)(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
|
||||
)
|
||||
'''
|
||||
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
|
||||
_SERVICE_URL = 'http://cdnapi.kaltura.com'
|
||||
_SERVICE_BASE = '/api_v3/index.php'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'kaltura:269692:1_1jc2y3e4',
|
||||
@ -64,16 +65,50 @@ class KalturaIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = (
|
||||
re.search(
|
||||
r"""(?xs)
|
||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||
\{.*?
|
||||
(?P<q1>['\"])wid(?P=q1)\s*:\s*
|
||||
(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
|
||||
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||
(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
|
||||
""", webpage) or
|
||||
re.search(
|
||||
r'''(?xs)
|
||||
(?P<q1>["\'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
|
||||
(?P=q1).*?
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
|
||||
)\s*:\s*
|
||||
(?P<q3>["\'])(?P<id>.+?)(?P=q3)
|
||||
''', webpage))
|
||||
if mobj:
|
||||
embed_info = mobj.groupdict()
|
||||
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
||||
escaped_pid = re.escape(embed_info['partner_id'])
|
||||
service_url = re.search(
|
||||
r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
|
||||
webpage)
|
||||
if service_url:
|
||||
url = smuggle_url(url, {'service_url': service_url.group(1)})
|
||||
return url
|
||||
|
||||
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
|
||||
params = actions[0]
|
||||
if len(actions) > 1:
|
||||
for i, a in enumerate(actions[1:], start=1):
|
||||
for k, v in a.items():
|
||||
params['%d:%s' % (i, k)] = v
|
||||
|
||||
query = compat_urllib_parse_urlencode(params)
|
||||
url = self._API_BASE + query
|
||||
data = self._download_json(url, video_id, *args, **kwargs)
|
||||
data = self._download_json(
|
||||
(service_url or self._SERVICE_URL) + self._SERVICE_BASE,
|
||||
video_id, query=params, *args, **kwargs)
|
||||
|
||||
status = data if len(actions) == 1 else data[0]
|
||||
if status.get('objectType') == 'KalturaAPIException':
|
||||
@ -82,7 +117,7 @@ class KalturaIE(InfoExtractor):
|
||||
|
||||
return data
|
||||
|
||||
def _get_kaltura_signature(self, video_id, partner_id):
|
||||
def _get_kaltura_signature(self, video_id, partner_id, service_url=None):
|
||||
actions = [{
|
||||
'apiVersion': '3.1',
|
||||
'expiry': 86400,
|
||||
@ -92,10 +127,10 @@ class KalturaIE(InfoExtractor):
|
||||
'widgetId': '_%s' % partner_id,
|
||||
}]
|
||||
return self._kaltura_api_call(
|
||||
video_id, actions, note='Downloading Kaltura signature')['ks']
|
||||
video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
|
||||
|
||||
def _get_video_info(self, video_id, partner_id):
|
||||
signature = self._get_kaltura_signature(video_id, partner_id)
|
||||
def _get_video_info(self, video_id, partner_id, service_url=None):
|
||||
signature = self._get_kaltura_signature(video_id, partner_id, service_url)
|
||||
actions = [
|
||||
{
|
||||
'action': 'null',
|
||||
@ -118,7 +153,7 @@ class KalturaIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
return self._kaltura_api_call(
|
||||
video_id, actions, note='Downloading video info JSON')
|
||||
video_id, actions, service_url, note='Downloading video info JSON')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
@ -127,7 +162,7 @@ class KalturaIE(InfoExtractor):
|
||||
partner_id, entry_id = mobj.group('partner_id', 'id')
|
||||
ks = None
|
||||
if partner_id and entry_id:
|
||||
info, flavor_assets = self._get_video_info(entry_id, partner_id)
|
||||
info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
|
||||
else:
|
||||
path, query = mobj.group('path', 'query')
|
||||
if not path and not query:
|
||||
@ -175,12 +210,17 @@ class KalturaIE(InfoExtractor):
|
||||
unsigned_url += '?referrer=%s' % referrer
|
||||
return unsigned_url
|
||||
|
||||
data_url = info['dataUrl']
|
||||
if '/flvclipper/' in data_url:
|
||||
data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
|
||||
|
||||
formats = []
|
||||
for f in flavor_assets:
|
||||
# Continue if asset is not ready
|
||||
if f['status'] != 2:
|
||||
continue
|
||||
video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
formats.append({
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'ext': f.get('fileExt'),
|
||||
@ -193,9 +233,12 @@ class KalturaIE(InfoExtractor):
|
||||
'width': int_or_none(f.get('width')),
|
||||
'url': video_url,
|
||||
})
|
||||
m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
|
||||
if '/playManifest/' in data_url:
|
||||
m3u8_url = sign_url(data_url.replace(
|
||||
'format/url', 'format/applehttp'))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
m3u8_url, entry_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
self._check_formats(formats, entry_id)
|
||||
self._sort_formats(formats)
|
||||
|
71
youtube_dl/extractor/kamcord.py
Normal file
71
youtube_dl/extractor/kamcord.py
Normal file
@ -0,0 +1,71 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class KamcordIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?kamcord\.com/v/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.kamcord.com/v/hNYRduDgWb4',
|
||||
'md5': 'c3180e8a9cfac2e86e1b88cb8751b54c',
|
||||
'info_dict': {
|
||||
'id': 'hNYRduDgWb4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drinking Madness',
|
||||
'uploader': 'jacksfilms',
|
||||
'uploader_id': '3044562',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__props\s*=\s*({.+?});?(?:\n|\s*</script)',
|
||||
webpage, 'video'),
|
||||
video_id)['video']
|
||||
|
||||
title = video['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video['play']['hls'], video_id, 'mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = video.get('user', {}).get('username')
|
||||
uploader_id = video.get('user', {}).get('id')
|
||||
|
||||
view_count = int_or_none(video.get('viewCount'))
|
||||
like_count = int_or_none(video.get('heartCount'))
|
||||
comment_count = int_or_none(video.get('messageCount'))
|
||||
|
||||
preference_key = qualities(('small', 'medium', 'large'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
'id': thumbnail_id,
|
||||
'preference': preference_key(thumbnail_id),
|
||||
} for thumbnail_id, thumbnail_url in (video.get('thumbnail') or {}).items()
|
||||
if isinstance(thumbnail_id, compat_str) and isinstance(thumbnail_url, compat_str)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@ -26,11 +26,6 @@ class KuwoBaseIE(InfoExtractor):
|
||||
def _get_formats(self, song_id, tolerate_ip_deny=False):
|
||||
formats = []
|
||||
for file_format in self._FORMATS:
|
||||
headers = {}
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
headers['Ytdl-request-proxy'] = cn_verification_proxy
|
||||
|
||||
query = {
|
||||
'format': file_format['ext'],
|
||||
'br': file_format.get('br', ''),
|
||||
@ -42,7 +37,7 @@ class KuwoBaseIE(InfoExtractor):
|
||||
song_url = self._download_webpage(
|
||||
'http://antiserver.kuwo.cn/anti.s',
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
query=query, headers=headers,
|
||||
query=query, headers=self.geo_verification_headers(),
|
||||
)
|
||||
|
||||
if song_url == 'IPDeny' and not tolerate_ip_deny:
|
||||
|
@ -1,60 +1,65 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class LA7IE(InfoExtractor):
|
||||
IE_NAME = 'la7.tv'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?la7\.tv/
|
||||
(?:
|
||||
richplayer/\?assetid=|
|
||||
\?contentId=
|
||||
)
|
||||
(?P<id>[0-9]+)'''
|
||||
IE_NAME = 'la7.it'
|
||||
_VALID_URL = r'''(?x)(https?://)?(?:
|
||||
(?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
|
||||
tg\.la7\.it/repliche-tgla7\?id=
|
||||
)(?P<id>.+)'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
|
||||
'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
|
||||
_TESTS = [{
|
||||
# 'src' is a plain URL
|
||||
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
|
||||
'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
|
||||
'info_dict': {
|
||||
'id': '50355319',
|
||||
'id': 'inccool8-02-10-2015-163722',
|
||||
'ext': 'mp4',
|
||||
'title': 'IL DIVO',
|
||||
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
|
||||
'duration': 6254,
|
||||
'title': 'Inc.Cool8',
|
||||
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'uploader_id': 'kdla7pillole@iltrovatore.it',
|
||||
'timestamp': 1443814869,
|
||||
'upload_date': '20151002',
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
}
|
||||
}, {
|
||||
# 'src' is a dictionary
|
||||
'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
|
||||
'md5': '6b0d8888d286e39870208dfeceaf456b',
|
||||
'info_dict': {
|
||||
'id': '189080',
|
||||
'ext': 'mp4',
|
||||
'title': 'TG LA7',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
|
||||
doc = self._download_xml(xml_url, video_id)
|
||||
|
||||
video_title = doc.find('title').text
|
||||
description = doc.find('description').text
|
||||
duration = parse_duration(doc.find('duration').text)
|
||||
thumbnail = doc.find('img').text
|
||||
view_count = int(doc.find('views').text)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:')
|
||||
|
||||
formats = [{
|
||||
'format': vnode.find('quality').text,
|
||||
'tbr': int(vnode.find('quality').text),
|
||||
'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
|
||||
} for vnode in doc.findall('.//videos/video')]
|
||||
self._sort_formats(formats)
|
||||
player_data = self._parse_json(
|
||||
self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
|
||||
'service_url': 'http://kdam.iltrovatore.it',
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'view_count': view_count,
|
||||
'title': player_data['title'],
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': player_data.get('poster'),
|
||||
'ie_key': 'Kaltura',
|
||||
}
|
||||
|
@ -20,9 +20,10 @@ from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
url_basename,
|
||||
urshift,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@ -74,15 +75,11 @@ class LeIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def urshift(val, n):
|
||||
return val >> n if val >= 0 else (val + 0x100000000) >> n
|
||||
|
||||
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
|
||||
def ror(self, param1, param2):
|
||||
_loc3_ = 0
|
||||
while _loc3_ < param2:
|
||||
param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
|
||||
param1 = urshift(param1, 1) + ((param1 & 1) << 31)
|
||||
_loc3_ += 1
|
||||
return param1
|
||||
|
||||
@ -93,6 +90,10 @@ class LeIE(InfoExtractor):
|
||||
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||
return _loc3_
|
||||
|
||||
# reversed from http://jstatic.letvcdn.com/sdk/player.js
|
||||
def get_mms_key(self, time):
|
||||
return self.ror(time, 8) ^ 185025305
|
||||
|
||||
# see M3U8Encryption class in KLetvPlayer.swf
|
||||
@staticmethod
|
||||
def decrypt_m3u8(encrypted_data):
|
||||
@ -113,28 +114,7 @@ class LeIE(InfoExtractor):
|
||||
|
||||
return bytes(_loc7_)
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
params = {
|
||||
'id': media_id,
|
||||
'platid': 1,
|
||||
'splatid': 101,
|
||||
'format': 1,
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.le.com'
|
||||
}
|
||||
play_json_req = sanitized_Request(
|
||||
'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse_urlencode(params)
|
||||
)
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||
|
||||
play_json = self._download_json(
|
||||
play_json_req,
|
||||
media_id, 'Downloading playJson data')
|
||||
|
||||
def _check_errors(self, play_json):
|
||||
# Check for errors
|
||||
playstatus = play_json['playstatus']
|
||||
if playstatus['status'] == 0:
|
||||
@ -145,15 +125,52 @@ class LeIE(InfoExtractor):
|
||||
msg = 'Generic error. flag = %d' % flag
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
playurl = play_json['playurl']
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
|
||||
formats = ['350', '1000', '1300', '720p', '1080p']
|
||||
dispatch = playurl['dispatch']
|
||||
play_json_h5 = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJsonH5',
|
||||
media_id, 'Downloading html5 playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 3,
|
||||
'splatid': 304,
|
||||
'format': 1,
|
||||
'tkey': self.get_mms_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
'tss': 'no',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_h5)
|
||||
|
||||
urls = []
|
||||
for format_id in formats:
|
||||
if format_id in dispatch:
|
||||
media_url = playurl['domain'][0] + dispatch[format_id][0]
|
||||
play_json_flash = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJson',
|
||||
media_id, 'Downloading flash playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 1,
|
||||
'splatid': 101,
|
||||
'format': 1,
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_flash)
|
||||
|
||||
def get_h5_urls(media_url, format_id):
|
||||
location = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id, query={
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'tss': 'no',
|
||||
})['location']
|
||||
|
||||
return {
|
||||
'http': update_url_query(location, {'tss': 'no'}),
|
||||
'hls': update_url_query(location, {'tss': 'ios'}),
|
||||
}
|
||||
|
||||
def get_flash_urls(media_url, format_id):
|
||||
media_url += '&' + compat_urllib_parse_urlencode({
|
||||
'm3v': 1,
|
||||
'format': 1,
|
||||
@ -171,17 +188,36 @@ class LeIE(InfoExtractor):
|
||||
|
||||
m3u8_data = self.decrypt_m3u8(req.read())
|
||||
|
||||
url_info_dict = {
|
||||
'url': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
|
||||
'ext': determine_ext(dispatch[format_id][1]),
|
||||
'format_id': format_id,
|
||||
'protocol': 'm3u8',
|
||||
return {
|
||||
'hls': encode_data_uri(m3u8_data, 'application/vnd.apple.mpegurl'),
|
||||
}
|
||||
|
||||
extracted_formats = []
|
||||
formats = []
|
||||
for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)):
|
||||
playurl = play_json['playurl']
|
||||
play_domain = playurl['domain'][0]
|
||||
|
||||
for format_id, format_data in playurl.get('dispatch', []).items():
|
||||
if format_id in extracted_formats:
|
||||
continue
|
||||
extracted_formats.append(format_id)
|
||||
|
||||
media_url = play_domain + format_data[0]
|
||||
for protocol, format_url in get_urls(media_url, format_id).items():
|
||||
f = {
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_data[1]),
|
||||
'format_id': '%s-%s' % (protocol, format_id),
|
||||
'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
|
||||
'quality': int_or_none(format_id),
|
||||
}
|
||||
|
||||
if format_id[-1:] == 'p':
|
||||
url_info_dict['height'] = int_or_none(format_id[:-1])
|
||||
f['height'] = int_or_none(format_id[:-1])
|
||||
|
||||
urls.append(url_info_dict)
|
||||
formats.append(f)
|
||||
self._sort_formats(formats, ('height', 'quality', 'format_id'))
|
||||
|
||||
publish_time = parse_iso8601(self._html_search_regex(
|
||||
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
||||
@ -190,7 +226,7 @@ class LeIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'formats': urls,
|
||||
'formats': formats,
|
||||
'title': playurl['title'],
|
||||
'thumbnail': playurl['pic'],
|
||||
'description': description,
|
||||
|
@ -100,7 +100,7 @@ class LyndaIE(LyndaBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': '114408',
|
||||
'ext': 'mp4',
|
||||
|
@ -1,8 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@ -23,34 +21,5 @@ class M6IE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
|
||||
'Downloading video RSS')
|
||||
|
||||
title = rss.find('./channel/item/title').text
|
||||
description = rss.find('./channel/item/description').text
|
||||
thumbnail = rss.find('./channel/item/visuel_clip_big').text
|
||||
duration = int(rss.find('./channel/item/duration').text)
|
||||
view_count = int(rss.find('./channel/item/nombre_vues').text)
|
||||
|
||||
formats = []
|
||||
for format_id in ['lq', 'sd', 'hq', 'hd']:
|
||||
video_url = rss.find('./channel/item/url_video_%s' % format_id)
|
||||
if video_url is None:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url.text,
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result('6play:%s' % video_id, 'SixPlay', video_id)
|
||||
|
73
youtube_dl/extractor/meta.py
Normal file
73
youtube_dl/extractor/meta.py
Normal file
@ -0,0 +1,73 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .pladform import PladformIE
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class METAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://video.meta.ua/5502115.video',
|
||||
'md5': '71b6f3ee274bef16f1ab410f7f56b476',
|
||||
'info_dict': {
|
||||
'id': '5502115',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sony Xperia Z camera test [HQ]',
|
||||
'description': 'Xperia Z shoots video in FullHD HDR.',
|
||||
'uploader_id': 'nomobile',
|
||||
'uploader': 'CHЁZA.TV',
|
||||
'upload_date': '20130211',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://video.meta.ua/iframe/5502115',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# pladform embed
|
||||
'url': 'http://video.meta.ua/7121015.video',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
st_html5 = self._search_regex(
|
||||
r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None)
|
||||
|
||||
if st_html5:
|
||||
# uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js
|
||||
json_str = ''
|
||||
for i in range(0, len(st_html5), 3):
|
||||
json_str += '�%s;' % st_html5[i:i + 3]
|
||||
uppod_data = self._parse_json(unescapeHTML(json_str), video_id)
|
||||
error = uppod_data.get('customnotfound')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_url = uppod_data['file']
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': uppod_data.get('comment') or self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage),
|
||||
'duration': int_or_none(self._og_search_property(
|
||||
'video:duration', webpage, default=None)),
|
||||
}
|
||||
if 'youtube.com/' in video_url:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Youtube',
|
||||
})
|
||||
return info
|
||||
|
||||
pladform_url = PladformIE._extract_url(webpage)
|
||||
if pladform_url:
|
||||
return self.url_result(pladform_url)
|
@ -11,13 +11,14 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
get_element_by_attribute,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
class MetacafeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/(?P<video_id>[^/]+)/(?P<display_id>[^/?#]+)'
|
||||
_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
|
||||
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
|
||||
IE_NAME = 'metacafe'
|
||||
@ -47,6 +48,7 @@ class MetacafeIE(InfoExtractor):
|
||||
'uploader': 'ign',
|
||||
'description': 'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.',
|
||||
},
|
||||
'skip': 'Page is temporarily unavailable.',
|
||||
},
|
||||
# AnyClip video
|
||||
{
|
||||
@ -55,8 +57,8 @@ class MetacafeIE(InfoExtractor):
|
||||
'id': 'an-dVVXnuY7Jh77J',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Andromeda Strain (1971): Stop the Bomb Part 3',
|
||||
'uploader': 'anyclip',
|
||||
'description': 'md5:38c711dd98f5bb87acf973d573442e67',
|
||||
'uploader': 'AnyClip',
|
||||
'description': 'md5:cbef0460d31e3807f6feb4e7a5952e5b',
|
||||
},
|
||||
},
|
||||
# age-restricted video
|
||||
@ -110,28 +112,25 @@ class MetacafeIE(InfoExtractor):
|
||||
def report_disclaimer(self):
|
||||
self.to_screen('Retrieving disclaimer')
|
||||
|
||||
def _real_initialize(self):
|
||||
def _confirm_age(self):
|
||||
# Retrieve disclaimer
|
||||
self.report_disclaimer()
|
||||
self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
|
||||
|
||||
# Confirm age
|
||||
disclaimer_form = {
|
||||
self.report_age_confirmation()
|
||||
self._download_webpage(
|
||||
self._FILTER_POST, None, False, 'Unable to confirm age',
|
||||
data=urlencode_postdata({
|
||||
'filters': '0',
|
||||
'submit': "Continue - I'm over 18",
|
||||
}
|
||||
request = sanitized_Request(self._FILTER_POST, urlencode_postdata(disclaimer_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self.report_age_confirmation()
|
||||
self._download_webpage(request, None, False, 'Unable to confirm age')
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id and simplified title from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
# the video may come from an external site
|
||||
m_external = re.match('^(\w{2})-(.*)$', video_id)
|
||||
@ -144,15 +143,24 @@ class MetacafeIE(InfoExtractor):
|
||||
if prefix == 'cb':
|
||||
return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
req = sanitized_Request('http://www.metacafe.com/watch/%s/' % video_id)
|
||||
# self._confirm_age()
|
||||
|
||||
# AnyClip videos require the flashversion cookie so that we get the link
|
||||
# to the mp4 file
|
||||
mobj_an = re.match(r'^an-(.*?)$', video_id)
|
||||
if mobj_an:
|
||||
req.headers['Cookie'] = 'flashVersion=0;'
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
headers = {}
|
||||
if video_id.startswith('an-'):
|
||||
headers['Cookie'] = 'flashVersion=0;'
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id, headers=headers)
|
||||
|
||||
error = get_element_by_attribute(
|
||||
'class', 'notfound-page-title', webpage)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
video_title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title', default=None) or self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
@ -216,20 +224,40 @@ class MetacafeIE(InfoExtractor):
|
||||
'player_url': player_url,
|
||||
'ext': play_path.partition(':')[0],
|
||||
})
|
||||
if video_url is None:
|
||||
flashvars = self._parse_json(self._search_regex(
|
||||
r'flashvars\s*=\s*({.*});', webpage, 'flashvars',
|
||||
default=None), video_id, fatal=False)
|
||||
if flashvars:
|
||||
video_url = []
|
||||
for source in flashvars.get('sources'):
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
video_url.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
video_url.append({
|
||||
'url': source_url,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
if video_url is None:
|
||||
raise ExtractorError('Unsupported video type')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?im)<title>(.*) - Video</title>', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'twitter:description', 'description'],
|
||||
webpage, 'title', fatal=False)
|
||||
thumbnail = self._html_search_meta(
|
||||
['og:image', 'twitter:image'], webpage, 'title', fatal=False)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, 'uploader nickname', fatal=False)
|
||||
duration = int_or_none(
|
||||
self._html_search_meta('video:duration', webpage))
|
||||
|
||||
self._html_search_meta('video:duration', webpage, default=None))
|
||||
age_limit = (
|
||||
18
|
||||
if re.search(r'(?:"contentRating":|"rating",)"restricted"', webpage)
|
||||
@ -242,10 +270,11 @@ class MetacafeIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
}]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
|
@ -26,7 +26,8 @@ class MGTVIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
api_data = self._download_json(
|
||||
'http://v.api.mgtv.com/player/video', video_id,
|
||||
query={'video_id': video_id})['data']
|
||||
query={'video_id': video_id},
|
||||
headers=self.geo_verification_headers())['data']
|
||||
info = api_data['info']
|
||||
|
||||
formats = []
|
||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
@ -18,13 +19,16 @@ class MioMioIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# "type=video" in flashvars
|
||||
'url': 'http://www.miomio.tv/watch/cc88912/',
|
||||
'md5': '317a5f7f6b544ce8419b784ca8edae65',
|
||||
'info_dict': {
|
||||
'id': '88912',
|
||||
'ext': 'flv',
|
||||
'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
|
||||
'duration': 5923,
|
||||
},
|
||||
'params': {
|
||||
# The server provides broken file
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.miomio.tv/watch/cc184024/',
|
||||
'info_dict': {
|
||||
@ -32,7 +36,7 @@ class MioMioIE(InfoExtractor):
|
||||
'title': '《动漫同人插画绘制》',
|
||||
},
|
||||
'playlist_mincount': 86,
|
||||
'skip': 'This video takes time too long for retrieving the URL',
|
||||
'skip': 'Unable to load videos',
|
||||
}, {
|
||||
'url': 'http://www.miomio.tv/watch/cc173113/',
|
||||
'info_dict': {
|
||||
@ -40,20 +44,23 @@ class MioMioIE(InfoExtractor):
|
||||
'title': 'The New Macbook 2015 上手试玩与简评'
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'skip': 'Unable to load videos',
|
||||
}, {
|
||||
# new 'h5' player
|
||||
'url': 'http://www.miomio.tv/watch/cc273295/',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '273295',
|
||||
'ext': 'mp4',
|
||||
'title': 'アウト×デラックス 20160526',
|
||||
},
|
||||
'params': {
|
||||
# intermittent HTTP 500
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
mioplayer_path = self._search_regex(
|
||||
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
|
||||
|
||||
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
|
||||
|
||||
def _extract_mioplayer(self, webpage, video_id, title, http_headers):
|
||||
xml_config = self._search_regex(
|
||||
r'flashvars="type=(?:sina|video)&(.+?)&',
|
||||
webpage, 'xml config')
|
||||
@ -92,10 +99,34 @@ class MioMioIE(InfoExtractor):
|
||||
'http_headers': http_headers,
|
||||
})
|
||||
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
mioplayer_path = self._search_regex(
|
||||
r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path')
|
||||
|
||||
if '_h5' in mioplayer_path:
|
||||
player_url = compat_urlparse.urljoin(url, mioplayer_path)
|
||||
player_webpage = self._download_webpage(
|
||||
player_url, video_id,
|
||||
note='Downloading player webpage', headers={'Referer': url})
|
||||
entries = self._parse_html5_media_entries(player_url, player_webpage)
|
||||
http_headers = {'Referer': player_url}
|
||||
else:
|
||||
http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
|
||||
entries = self._extract_mioplayer(webpage, video_id, title, http_headers)
|
||||
|
||||
if len(entries) == 1:
|
||||
segment = entries[0]
|
||||
segment['id'] = video_id
|
||||
segment['title'] = title
|
||||
segment['http_headers'] = http_headers
|
||||
return segment
|
||||
|
||||
return {
|
||||
|
@ -12,12 +12,69 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
class MiTeleBaseIE(InfoExtractor):
|
||||
def _get_player_info(self, url, webpage):
|
||||
player_data = extract_attributes(self._search_regex(
|
||||
r'(?s)(<ms-video-player.+?</ms-video-player>)',
|
||||
webpage, 'ms video player'))
|
||||
video_id = player_data['data-media-id']
|
||||
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
mmc_url = config['services']['mmc']
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
|
||||
mmc = self._download_json(
|
||||
m_url, video_id, 'Downloading mmc JSON')
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
bas = location.get('bas')
|
||||
loc = location.get('loc')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, bas, loc, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'bas': bas,
|
||||
'icd': loc,
|
||||
'ogn': ogn,
|
||||
'sta': '0',
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
|
||||
video_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
ext = determine_ext(file_)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
|
||||
class MiTeleIE(MiTeleBaseIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
|
||||
_VALID_URL = r'https?://www\.mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
@ -25,7 +82,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
|
||||
'display_id': 'programa-144',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
'series': 'Diario de',
|
||||
@ -40,7 +97,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'eLZSwoEd1S3pVyUm8lc6F',
|
||||
'display_id': 'programa-226',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
|
||||
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
|
||||
'series': 'Cuarto Milenio',
|
||||
@ -59,40 +116,7 @@ class MiTeleIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
config_url = self._search_regex(
|
||||
r'data-config\s*=\s*"([^"]+)"', webpage, 'data config url')
|
||||
config_url = compat_urlparse.urljoin(url, config_url)
|
||||
|
||||
config = self._download_json(
|
||||
config_url, display_id, 'Downloading config JSON')
|
||||
|
||||
mmc = self._download_json(
|
||||
config['services']['mmc'], display_id, 'Downloading mmc JSON')
|
||||
|
||||
formats = []
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
bas = location.get('bas')
|
||||
loc = location.get('loc')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, bas, loc, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'bas': bas,
|
||||
'icd': loc,
|
||||
'ogn': ogn,
|
||||
'sta': '0',
|
||||
}
|
||||
media = self._download_json(
|
||||
'%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
|
||||
display_id, 'Downloading %s JSON' % location['loc'])
|
||||
file_ = media.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
display_id, f4m_id=loc))
|
||||
self._sort_formats(formats)
|
||||
info = self._get_player_info(url, webpage)
|
||||
|
||||
title = self._search_regex(
|
||||
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
|
||||
@ -112,21 +136,12 @@ class MiTeleIE(InfoExtractor):
|
||||
title = remove_start(self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-media-id\s*=\s*"([^"]+)"', webpage,
|
||||
'data media id', default=None) or display_id
|
||||
thumbnail = config.get('poster', {}).get('imageUrl')
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('class', 'text', webpage),
|
||||
'series': series,
|
||||
'season': season,
|
||||
'episode': episode,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
like_count = parse_count(self._search_regex(
|
||||
r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
|
||||
webpage, 'like count', fatal=False))
|
||||
webpage, 'like count', default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>'],
|
||||
webpage, 'play count', fatal=False))
|
||||
webpage, 'play count', default=None))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
|
122
youtube_dl/extractor/msn.py
Normal file
122
youtube_dl/extractor/msn.py
Normal file
@ -0,0 +1,122 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class MSNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE',
|
||||
'md5': '8442f66c116cbab1ff7098f986983458',
|
||||
'info_dict': {
|
||||
'id': 'BBqQYNE',
|
||||
'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message',
|
||||
'ext': 'mp4',
|
||||
'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message',
|
||||
'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25',
|
||||
'duration': 104,
|
||||
'uploader': 'CBS Entertainment',
|
||||
'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
|
||||
webpage, 'video data', default='{}', group='data'),
|
||||
display_id, transform_source=unescapeHTML)
|
||||
|
||||
if not video:
|
||||
error = unescapeHTML(self._search_regex(
|
||||
r'data-error=(["\'])(?P<error>.+?)\1',
|
||||
webpage, 'error', group='error'))
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
for file_ in video.get('videoFiles', []):
|
||||
format_url = file_.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
# .ism is not yet supported (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8118)
|
||||
if ext == 'ism':
|
||||
continue
|
||||
if 'm3u8' in format_url:
|
||||
# m3u8_native should not be used here until
|
||||
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False)
|
||||
# Despite metadata in m3u8 all video+audio formats are
|
||||
# actually video-only (no audio)
|
||||
for f in m3u8_formats:
|
||||
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = 'none'
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'http',
|
||||
'width': int_or_none(file_.get('width')),
|
||||
'height': int_or_none(file_.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for file_ in video.get('files', []):
|
||||
format_url = file_.get('url')
|
||||
format_code = file_.get('formatCode')
|
||||
if not format_url or not format_code:
|
||||
continue
|
||||
if compat_str(format_code) == '3100':
|
||||
subtitles.setdefault(file_.get('culture', 'en'), []).append({
|
||||
'ext': determine_ext(format_url, 'ttml'),
|
||||
'url': format_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('headlineImage', {}).get('url'),
|
||||
'duration': int_or_none(video.get('durationSecs')),
|
||||
'uploader': video.get('sourceFriendly'),
|
||||
'uploader_id': video.get('providerId'),
|
||||
'creator': video.get('creator'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
url_basename,
|
||||
@ -61,7 +62,7 @@ class NationalGeographicIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class NationalGeographicChannelIE(InfoExtractor):
|
||||
class NationalGeographicChannelIE(ThePlatformIE):
|
||||
IE_NAME = 'natgeo:channel'
|
||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
|
||||
|
||||
@ -102,12 +103,22 @@ class NationalGeographicChannelIE(InfoExtractor):
|
||||
release_url = self._search_regex(
|
||||
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
|
||||
webpage, 'release url')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'switch': 'http',
|
||||
}
|
||||
is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
|
||||
if is_auth == 'auth':
|
||||
auth_resource_id = self._search_regex(
|
||||
r"video_auth_resourceId\s*=\s*'([^']+)'",
|
||||
webpage, 'auth resource id')
|
||||
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or ''
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}),
|
||||
update_url_query(release_url, query),
|
||||
{'force_smil_url': True}),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ from ..utils import update_url_query
|
||||
|
||||
class NickIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'nick.com'
|
||||
_VALID_URL = r'https?://(?:www\.)?nick\.com/videos/clip/(?P<id>[^/?#.]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nick(?:jr)?\.com/(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
|
||||
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
|
||||
@ -52,6 +52,9 @@ class NickIE(MTVServicesInfoExtractor):
|
||||
}
|
||||
},
|
||||
],
|
||||
}, {
|
||||
'url': 'http://www.nickjr.com/paw-patrol/videos/pups-save-a-goldrush-s3-ep302-full-episode/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
|
55
youtube_dl/extractor/ninecninemedia.py
Normal file
55
youtube_dl/extractor/ninecninemedia.py
Normal file
@ -0,0 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class NineCNineMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
destination_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
|
||||
content = self._download_json(api_base_url, video_id, query={
|
||||
'$include': '[contentpackages]',
|
||||
})
|
||||
title = content['Name']
|
||||
if len(content['ContentPackages']) > 1:
|
||||
raise ExtractorError('multiple content packages')
|
||||
content_package = content['ContentPackages'][0]
|
||||
stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
|
||||
stacks = self._download_json(stacks_base_url, video_id)['Items']
|
||||
if len(stacks) > 1:
|
||||
raise ExtractorError('multiple stacks')
|
||||
stack = stacks[0]
|
||||
stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
|
||||
formats = []
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stack_base_url + 'm3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stack_base_url + 'f4m', video_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
'format_id': 'mp4',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': content.get('Desc') or content.get('ShortDesc'),
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'duration': parse_duration(content.get('BroadcastTime')),
|
||||
'formats': formats,
|
||||
}
|
172
youtube_dl/extractor/onet.py
Normal file
172
youtube_dl/extractor/onet.py
Normal file
@ -0,0 +1,172 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class OnetBaseIE(InfoExtractor):
|
||||
def _search_mvp_id(self, webpage):
|
||||
return self._search_regex(
|
||||
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
|
||||
|
||||
def _extract_from_id(self, video_id, webpage):
|
||||
response = self._download_json(
|
||||
'http://qi.ckm.onetapi.pl/', video_id,
|
||||
query={
|
||||
'body[id]': video_id,
|
||||
'body[jsonrpc]': '2.0',
|
||||
'body[method]': 'get_asset_detail',
|
||||
'body[params][ID_Publikacji]': video_id,
|
||||
'body[params][Service]': 'www.onet.pl',
|
||||
'content-type': 'application/jsonp',
|
||||
'x-onet-app': 'player.front.onetapi.pl',
|
||||
})
|
||||
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error['message']), expected=True)
|
||||
|
||||
video = response['result'].get('0')
|
||||
|
||||
formats = []
|
||||
for _, formats_dict in video['formats'].items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_list in formats_dict.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for f in format_list:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if format_id == 'ism':
|
||||
# TODO: Support Microsoft Smooth Streaming
|
||||
continue
|
||||
elif ext == 'mpd':
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url, video_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(f.get('vertical_resolution')),
|
||||
'width': int_or_none(f.get('horizontal_resolution')),
|
||||
'abr': float_or_none(f.get('audio_bitrate')),
|
||||
'vbr': float_or_none(f.get('video_bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta = video.get('meta', {})
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or meta['title']
|
||||
description = self._og_search_description(webpage, default=None) or meta.get('description')
|
||||
duration = meta.get('length') or meta.get('lenght')
|
||||
timestamp = parse_iso8601(meta.get('addDate'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class OnetIE(OnetBaseIE):
|
||||
_VALID_URL = 'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'onet.tv'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
|
||||
'md5': 'e3ffbf47590032ac3f27249204173d50',
|
||||
'info_dict': {
|
||||
'id': 'qbpyqc',
|
||||
'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd',
|
||||
'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...',
|
||||
'upload_date': '20160705',
|
||||
'timestamp': 1467721580,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id, video_id = mobj.group('display_id', 'id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
mvp_id = self._search_mvp_id(webpage)
|
||||
|
||||
info_dict = self._extract_from_id(mvp_id, webpage)
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
class OnetChannelIE(OnetBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P<id>[a-z]+)(?:[?#]|$)'
|
||||
IE_NAME = 'onet.tv:channel'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://onet.tv/k/openerfestival',
|
||||
'info_dict': {
|
||||
'id': 'openerfestival',
|
||||
'title': 'Open\'er Festival Live',
|
||||
'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.',
|
||||
},
|
||||
'playlist_mincount': 46,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, channel_id)
|
||||
|
||||
current_clip_info = self._parse_json(self._search_regex(
|
||||
r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id,
|
||||
transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s)))
|
||||
video_id = remove_start(current_clip_info['ckmId'], 'mvp:')
|
||||
video_name = url_basename(current_clip_info['url'])
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist' % video_name)
|
||||
return self._extract_from_id(video_id, webpage)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading channel %s - add --no-playlist to just download video %s' % (
|
||||
channel_id, video_name))
|
||||
matches = re.findall(
|
||||
r'<a[^>]+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)',
|
||||
webpage)
|
||||
entries = [
|
||||
self.url_result(video_link, OnetIE.ie_key())
|
||||
for video_link in matches]
|
||||
|
||||
channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
|
||||
channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
|
||||
return self.playlist_result(entries, channel_id, channel_title, channel_description)
|
@ -7,6 +7,8 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@ -15,15 +17,14 @@ class OnionStudiosIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
|
||||
'md5': 'd4851405d31adfadf71cd7a487b765bb',
|
||||
'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
|
||||
'info_dict': {
|
||||
'id': '2937',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hannibal charges forward, stops for a cocktail',
|
||||
'description': 'md5:e786add7f280b7f0fe237b64cc73df76',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'The A.V. Club',
|
||||
'uploader_id': 'TheAVClub',
|
||||
'uploader_id': 'the-av-club',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
|
||||
@ -40,50 +41,38 @@ class OnionStudiosIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.onionstudios.com/embed?id=%s' % video_id, video_id)
|
||||
video_data = self._download_json(
|
||||
'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
|
||||
ext = determine_ext(src)
|
||||
for source in video_data.get('sources', []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'/(\d+)\.%s' % ext, src, 'height', default=None))
|
||||
tbr = int_or_none(source.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': ext + ('-%sp' % height if height else ''),
|
||||
'url': src,
|
||||
'height': height,
|
||||
'format_id': ext + ('-%d' % tbr if tbr else ''),
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
|
||||
webpage, 'title', group='title')
|
||||
description = self._search_regex(
|
||||
r'share_description\s*=\s*(["\'])(?P<description>[^\'"]+?)\1',
|
||||
webpage, 'description', default=None, group='description')
|
||||
thumbnail = self._search_regex(
|
||||
r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
|
||||
webpage, 'thumbnail', default=False, group='thumbnail')
|
||||
|
||||
uploader_id = self._search_regex(
|
||||
r'twitter_handle\s*=\s*(["\'])(?P<uploader_id>[^\1]+?)\1',
|
||||
webpage, 'uploader id', fatal=False, group='uploader_id')
|
||||
uploader = self._search_regex(
|
||||
r'window\.channelName\s*=\s*(["\'])Embedded:(?P<uploader>[^\1]+?)\1',
|
||||
webpage, 'uploader', default=False, group='uploader')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': video_data.get('poster_url'),
|
||||
'uploader': video_data.get('channel_name'),
|
||||
'uploader_id': video_data.get('channel_slug'),
|
||||
'duration': float_or_none(video_data.get('duration', 1000)),
|
||||
'tags': video_data.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -516,9 +516,14 @@ class PBSIE(InfoExtractor):
|
||||
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
||||
if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
|
||||
continue
|
||||
f_url = re.sub(r'\d+k|baseline', bitrate, http_url)
|
||||
# This may produce invalid links sometimes (e.g.
|
||||
# http://www.pbs.org/wgbh/frontline/film/suicide-plan)
|
||||
if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate):
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'\d+k|baseline', bitrate, http_url),
|
||||
'url': f_url,
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
|
@ -120,9 +120,12 @@ class PeriscopeUserIE(InfoExtractor):
|
||||
title = user.get('display_name') or user.get('username')
|
||||
description = user.get('description')
|
||||
|
||||
broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or
|
||||
data_store.get('BroadcastCache', {}).get('broadcastIds', []))
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
|
||||
for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
|
||||
'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id))
|
||||
for broadcast_id in broadcast_ids]
|
||||
|
||||
return self.playlist_result(entries, user_id, title, description)
|
||||
|
@ -49,7 +49,7 @@ class PladformIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage)
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
|
@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
class PlayvidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
|
||||
'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
|
||||
'info_dict': {
|
||||
@ -24,8 +24,19 @@ class PlayvidIE(InfoExtractor):
|
||||
'title': 'md5:9256d01c6317e3f703848b5906880dc8',
|
||||
'duration': 82,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Video removed due to ToS',
|
||||
}, {
|
||||
'url': 'http://www.playvid.com/watch/hwb0GpNkzgH',
|
||||
'md5': '39d49df503ad7b8f23a4432cbf046477',
|
||||
'info_dict': {
|
||||
'id': 'hwb0GpNkzgH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
99
youtube_dl/extractor/polskieradio.py
Normal file
99
youtube_dl/extractor/polskieradio.py
Normal file
@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class PolskieRadioIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie',
|
||||
'info_dict': {
|
||||
'id': '1587943',
|
||||
'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
|
||||
'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '2984ee6ce9046d91fc233bc1a864a09a',
|
||||
'info_dict': {
|
||||
'id': '1540576',
|
||||
'ext': 'mp3',
|
||||
'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
|
||||
'timestamp': 1456594200,
|
||||
'upload_date': '20160227',
|
||||
'duration': 2364,
|
||||
'thumbnail': 're:^https?://static\.prsa\.pl/images/.*\.jpg$'
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal',
|
||||
'info_dict': {
|
||||
'id': '1635803',
|
||||
'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał',
|
||||
'description': 'md5:01cb7d0cad58664095d72b51a1ebada2',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with mp4 video
|
||||
'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
content = self._search_regex(
|
||||
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
|
||||
webpage, 'content')
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
||||
webpage, 'timestamp', fatal=False))
|
||||
|
||||
thumbnail_url = self._og_search_thumbnail(webpage)
|
||||
|
||||
entries = []
|
||||
|
||||
media_urls = set()
|
||||
|
||||
for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content):
|
||||
media = self._parse_json(data_media, playlist_id, fatal=False)
|
||||
if not media.get('file') or not media.get('desc'):
|
||||
continue
|
||||
media_url = self._proto_relative_url(media['file'], 'http:')
|
||||
if media_url in media_urls:
|
||||
continue
|
||||
media_urls.add(media_url)
|
||||
entries.append({
|
||||
'id': compat_str(media['id']),
|
||||
'url': media_url,
|
||||
'title': compat_urllib_parse_unquote(media['desc']),
|
||||
'duration': int_or_none(media.get('length')),
|
||||
'vcodec': 'none' if media.get('provider') == 'audio' else None,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail_url
|
||||
})
|
||||
|
||||
title = self._og_search_title(webpage).strip()
|
||||
description = strip_or_none(self._og_search_description(webpage))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
@ -25,7 +25,15 @@ from ..aes import (
|
||||
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
|
||||
IE_DESC = 'PornHub and Thumbzilla'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[0-9a-z]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
'md5': '1e19b41231a02eba417839222ac9d58e',
|
||||
@ -63,8 +71,24 @@ class PornHubIE(InfoExtractor):
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# removed at the request of cam4.com
|
||||
'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# removed at the request of the copyright owner
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# removed by uploader
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private video
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@ -87,8 +111,8 @@ class PornHubIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>',
|
||||
webpage, 'error message', default=None)
|
||||
r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>',
|
||||
webpage, 'error message', default=None, group='error')
|
||||
if error_msg:
|
||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||
raise ExtractorError(
|
||||
|
@ -5,7 +5,7 @@ import re
|
||||
|
||||
from hashlib import sha1
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@ -71,6 +71,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
|
||||
@ -86,6 +87,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
|
||||
@ -101,6 +103,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
|
||||
@ -116,6 +119,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
|
||||
@ -131,6 +135,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
|
||||
@ -227,70 +232,42 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _extract_clip(self, url, webpage):
|
||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
|
||||
clip_id = self._html_search_regex(
|
||||
self._CLIPID_REGEXES, webpage, 'clip id')
|
||||
|
||||
access_token = 'prosieben'
|
||||
client_name = 'kolibri-2.0.19-splec4'
|
||||
client_location = url
|
||||
|
||||
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({
|
||||
video = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos',
|
||||
clip_id, 'Downloading videos JSON', query={
|
||||
'access_token': access_token,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
'ids': clip_id,
|
||||
})
|
||||
|
||||
video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0]
|
||||
})[0]
|
||||
|
||||
if video.get('is_protected') is True:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
duration = float_or_none(video.get('duration'))
|
||||
source_ids = [source['id'] for source in video['sources']]
|
||||
source_ids_str = ','.join(map(str, source_ids))
|
||||
source_ids = [compat_str(source['id']) for source in video['sources']]
|
||||
|
||||
g = '01!8d8F_)r9]4s[qeuXfP%'
|
||||
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest()
|
||||
|
||||
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name])
|
||||
.encode('utf-8')).hexdigest()
|
||||
|
||||
sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({
|
||||
sources = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
|
||||
clip_id, 'Downloading sources JSON', query={
|
||||
'access_token': access_token,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
}))
|
||||
|
||||
sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON')
|
||||
})
|
||||
server_id = sources['server_id']
|
||||
|
||||
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id,
|
||||
client_location, source_ids_str, g, client_name])
|
||||
.encode('utf-8')).hexdigest()
|
||||
|
||||
url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({
|
||||
'access_token': access_token,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
'server_id': server_id,
|
||||
'source_ids': source_ids_str,
|
||||
}))
|
||||
|
||||
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
|
||||
|
||||
formats = []
|
||||
|
||||
urls_sources = urls['sources']
|
||||
if isinstance(urls_sources, dict):
|
||||
urls_sources = urls_sources.values()
|
||||
|
||||
def fix_bitrate(bitrate):
|
||||
bitrate = int_or_none(bitrate)
|
||||
@ -298,10 +275,42 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
return None
|
||||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||
|
||||
formats = []
|
||||
for source_id in source_ids:
|
||||
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest()
|
||||
urls = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
|
||||
clip_id, 'Downloading urls JSON', fatal=False, query={
|
||||
'access_token': access_token,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
'server_id': server_id,
|
||||
'source_ids': source_id,
|
||||
})
|
||||
if not urls:
|
||||
continue
|
||||
if urls.get('status_code') != 0:
|
||||
raise ExtractorError('This video is unavailable', expected=True)
|
||||
urls_sources = urls['sources']
|
||||
if isinstance(urls_sources, dict):
|
||||
urls_sources = urls_sources.values()
|
||||
for source in urls_sources:
|
||||
protocol = source['protocol']
|
||||
source_url = source['url']
|
||||
if protocol == 'rtmp' or protocol == 'rtmpe':
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
protocol = source.get('protocol')
|
||||
mimetype = source.get('mimetype')
|
||||
if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url, clip_id, f4m_id='hds', fatal=False))
|
||||
elif mimetype == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = fix_bitrate(source['bitrate'])
|
||||
if protocol in ('rtmp', 'rtmpe'):
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
|
||||
if not mobj:
|
||||
continue
|
||||
@ -315,20 +324,24 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'play_path': play_path,
|
||||
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
|
||||
'page_url': 'http://www.prosieben.de',
|
||||
'vbr': fix_bitrate(source['bitrate']),
|
||||
'ext': 'mp4',
|
||||
'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
|
||||
})
|
||||
elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(source_url, clip_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vbr': fix_bitrate(source['bitrate']),
|
||||
'tbr': tbr,
|
||||
'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
'title': title,
|
||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_element,
|
||||
ExtractorError,
|
||||
determine_protocol,
|
||||
)
|
||||
|
||||
|
||||
@ -22,13 +23,13 @@ class RadioCanadaIE(InfoExtractor):
|
||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||
'info_dict': {
|
||||
'id': '7184272',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le parcours du tireur capté sur vidéo',
|
||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
@ -36,11 +37,14 @@ class RadioCanadaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
app_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
device_types = ['ipad', 'android']
|
||||
if app_code != 'toutv':
|
||||
device_types.append('flash')
|
||||
|
||||
formats = []
|
||||
# TODO: extract m3u8 and f4m formats
|
||||
# m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
|
||||
# TODO: extract f4m formats
|
||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||
for device_type in ('flash',):
|
||||
for device_type in device_types:
|
||||
v_data = self._download_xml(
|
||||
'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
|
||||
video_id, note='Downloading %s XML' % device_type, query={
|
||||
@ -52,7 +56,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
|
||||
'paysJ391wsHjbOJwvCs26toz': 'CA',
|
||||
'bypasslock': 'NZt5K62gRqfc',
|
||||
})
|
||||
}, fatal=False)
|
||||
v_url = xpath_text(v_data, 'url')
|
||||
if not v_url:
|
||||
continue
|
||||
@ -64,7 +68,8 @@ class RadioCanadaIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
v_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
ext = determine_ext(v_url)
|
||||
bitrates = xpath_element(v_data, 'bitrates')
|
||||
@ -72,15 +77,28 @@ class RadioCanadaIE(InfoExtractor):
|
||||
tbr = int_or_none(url_e.get('bitrate'))
|
||||
if not tbr:
|
||||
continue
|
||||
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
|
||||
protocol = determine_protocol({'url': f_url})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
|
||||
'ext': 'flv',
|
||||
'protocol': 'rtmp',
|
||||
'format_id': '%s-%d' % (protocol, tbr),
|
||||
'url': f_url,
|
||||
'ext': 'flv' if protocol == 'rtmp' else ext,
|
||||
'protocol': protocol,
|
||||
'width': int_or_none(url_e.get('width')),
|
||||
'height': int_or_none(url_e.get('height')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
if protocol == 'rtsp':
|
||||
base_url = self._search_regex(
|
||||
r'rtsp://([^?]+)', f_url, 'base url', default=None)
|
||||
if base_url:
|
||||
base_url = 'http://' + base_url
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
base_url + '/playlist.m3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
base_url + '/manifest.f4m', video_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._download_xml(
|
||||
@ -115,13 +133,13 @@ class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
||||
'info_dict': {
|
||||
'id': '7527184',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Barack Obama au Vietnam',
|
||||
'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
|
||||
'upload_date': '20160523',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
@ -1,47 +1,141 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class RaiTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
|
||||
class RaiBaseIE(InfoExtractor):
|
||||
def _extract_relinker_formats(self, relinker_url, video_id):
|
||||
formats = []
|
||||
|
||||
for platform in ('mon', 'flash', 'native'):
|
||||
relinker = self._download_xml(
|
||||
relinker_url, video_id,
|
||||
note='Downloading XML metadata for platform %s' % platform,
|
||||
transform_source=fix_xml_ampersands,
|
||||
query={'output': 45, 'pl': platform},
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
media_url = find_xpath_attr(relinker, './url', 'type', 'content').text
|
||||
if media_url == 'http://download.rai.it/video_no_available.mp4':
|
||||
self.raise_geo_restricted()
|
||||
|
||||
ext = determine_ext(media_url)
|
||||
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
|
||||
continue
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
manifest_url = update_url_query(
|
||||
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
|
||||
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
bitrate = int_or_none(xpath_text(relinker, 'bitrate'))
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'tbr': bitrate if bitrate > 0 else None,
|
||||
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
|
||||
})
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_from_content_id(self, content_id, base_url):
|
||||
media = self._download_json(
|
||||
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
|
||||
content_id, 'Downloading video JSON')
|
||||
|
||||
thumbnails = []
|
||||
for image_type in ('image', 'image_medium', 'image_300'):
|
||||
thumbnail_url = media.get(image_type)
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': compat_urlparse.urljoin(base_url, thumbnail_url),
|
||||
})
|
||||
|
||||
formats = []
|
||||
media_type = media['type']
|
||||
if 'Audio' in media_type:
|
||||
formats.append({
|
||||
'format_id': media.get('formatoAudio'),
|
||||
'url': media['audioUrl'],
|
||||
'ext': media.get('formatoAudio'),
|
||||
})
|
||||
elif 'Video' in media_type:
|
||||
formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id))
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
raise ExtractorError('not a media file')
|
||||
|
||||
subtitles = {}
|
||||
captions = media.get('subtitlesUrl')
|
||||
if captions:
|
||||
STL_EXT = '.stl'
|
||||
SRT_EXT = '.srt'
|
||||
if captions.endswith(STL_EXT):
|
||||
captions = captions[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'] = [{
|
||||
'ext': 'srt',
|
||||
'url': captions,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'title': media['name'],
|
||||
'description': media.get('desc'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': media.get('author'),
|
||||
'upload_date': unified_strdate(media.get('date')),
|
||||
'duration': parse_duration(media.get('length')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class RaiTVIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||
'md5': '96382709b61dd64a6b88e0f791e6df4c',
|
||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||
'info_dict': {
|
||||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Report del 07/04/2014',
|
||||
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
|
||||
'upload_date': '20140407',
|
||||
'duration': 6160,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
# no m3u8 stream
|
||||
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
|
||||
'md5': 'd9751b78eac9710d62c2447b224dea39',
|
||||
# HDS download, MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
|
||||
'ext': 'flv',
|
||||
'title': 'TG PRIMO TEMPO',
|
||||
'upload_date': '20140612',
|
||||
'duration': 1758,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'Geo-restricted to Italy',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
|
||||
@ -67,127 +161,70 @@ class RaiTVIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
|
||||
'md5': '496ab63e420574447f70d02578333437',
|
||||
'md5': 'e57493e1cb8bc7c564663f363b171847',
|
||||
'info_dict': {
|
||||
'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il Candidato - Primo episodio: "Le Primarie"',
|
||||
'description': 'md5:364b604f7db50594678f483353164fb8',
|
||||
'upload_date': '20140923',
|
||||
'duration': 386,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._download_json(
|
||||
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
thumbnails = []
|
||||
for image_type in ('image', 'image_medium', 'image_300'):
|
||||
thumbnail_url = media.get(image_type)
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
subtitles = []
|
||||
formats = []
|
||||
media_type = media['type']
|
||||
if 'Audio' in media_type:
|
||||
formats.append({
|
||||
'format_id': media.get('formatoAudio'),
|
||||
'url': media['audioUrl'],
|
||||
'ext': media.get('formatoAudio'),
|
||||
})
|
||||
elif 'Video' in media_type:
|
||||
def fix_xml(xml):
|
||||
return xml.replace(' tag elementi', '').replace('>/', '</')
|
||||
|
||||
relinker = self._download_xml(
|
||||
media['mediaUri'] + '&output=43',
|
||||
video_id, transform_source=fix_xml)
|
||||
|
||||
has_subtitle = False
|
||||
|
||||
for element in relinker.findall('element'):
|
||||
media_url = xpath_text(element, 'url')
|
||||
ext = determine_ext(media_url)
|
||||
content_type = xpath_text(element, 'content-type')
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'stl':
|
||||
has_subtitle = True
|
||||
elif content_type.startswith('video/'):
|
||||
bitrate = int_or_none(xpath_text(element, 'bitrate'))
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'tbr': bitrate if bitrate > 0 else None,
|
||||
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
|
||||
})
|
||||
elif content_type.startswith('image/'):
|
||||
thumbnails.append({
|
||||
'url': media_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if has_subtitle:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
subtitles = self._get_subtitles(video_id, webpage)
|
||||
else:
|
||||
raise ExtractorError('not a media file')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': media['name'],
|
||||
'description': media.get('desc'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': media.get('author'),
|
||||
'upload_date': unified_strdate(media.get('date')),
|
||||
'duration': parse_duration(media.get('length')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
subtitles = {}
|
||||
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
|
||||
if m:
|
||||
captions = m.group('captions')
|
||||
STL_EXT = '.stl'
|
||||
SRT_EXT = '.srt'
|
||||
if captions.endswith(STL_EXT):
|
||||
captions = captions[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'] = [{
|
||||
'ext': 'srt',
|
||||
'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
|
||||
}]
|
||||
return subtitles
|
||||
return self._extract_from_content_id(video_id, url)
|
||||
|
||||
|
||||
class RaiIE(InfoExtractor):
|
||||
class RaiIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
|
||||
'md5': 'e0e7a8a131e249d1aa0ebf270d1d8db7',
|
||||
'md5': '2dd727e61114e1ee9c47f0da6914e178',
|
||||
'info_dict': {
|
||||
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il pacco',
|
||||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
# Direct relinker URL
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
|
||||
# HDS live stream, MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
|
||||
'ext': 'flv',
|
||||
'title': 'EuroNews',
|
||||
},
|
||||
'skip': 'Geo-restricted to Italy',
|
||||
},
|
||||
{
|
||||
# Embedded content item ID
|
||||
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
|
||||
'md5': '84c1135ce960e8822ae63cec34441d63',
|
||||
'info_dict': {
|
||||
'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8',
|
||||
'ext': 'mp4',
|
||||
'title': 'TG1 ore 20:00 del 02/07/2016',
|
||||
'upload_date': '20160702',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
|
||||
# HDS live stream, MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
|
||||
'ext': 'flv',
|
||||
'title': 'La diretta di Rainews24',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
@ -201,7 +238,30 @@ class RaiIE(InfoExtractor):
|
||||
iframe_url = self._search_regex(
|
||||
[r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
|
||||
r'drawMediaRaiTV\(["\'](.+?)["\']'],
|
||||
webpage, 'iframe')
|
||||
webpage, 'iframe', default=None)
|
||||
if iframe_url:
|
||||
if not iframe_url.startswith('http'):
|
||||
iframe_url = compat_urlparse.urljoin(url, iframe_url)
|
||||
return self.url_result(iframe_url)
|
||||
|
||||
content_item_id = self._search_regex(
|
||||
r'initEdizione\((?P<q1>[\'"])ContentItem-(?P<content_id>[^\'"]+)(?P=q1)',
|
||||
webpage, 'content item ID', group='content_id', default=None)
|
||||
if content_item_id:
|
||||
return self._extract_from_content_id(content_item_id, url)
|
||||
|
||||
relinker_url = compat_urlparse.urljoin(url, self._search_regex(
|
||||
r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P<q1>[\'"])(?P<url>(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)',
|
||||
webpage, 'relinker URL', group='url'))
|
||||
formats = self._extract_relinker_formats(relinker_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
|
||||
webpage, 'title', group='title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -1,23 +1,23 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
js_to_json,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class RDSIE(InfoExtractor):
|
||||
IE_DESC = 'RDS.ca'
|
||||
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
|
||||
'info_dict': {
|
||||
'id': '3.1132799',
|
||||
'id': '604333',
|
||||
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fowler Jr. prend la direction de Jacksonville',
|
||||
@ -33,22 +33,17 @@ class RDSIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
# TODO: extract f4m from 9c9media.com
|
||||
video_url = self._search_regex(
|
||||
r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
|
||||
webpage, 'video url')
|
||||
|
||||
title = self._og_search_title(webpage) or self._html_search_meta(
|
||||
item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
|
||||
video_id = compat_str(item['id'])
|
||||
title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
|
||||
'title', webpage, 'title', fatal=True)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
|
||||
thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
|
||||
[r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
|
||||
r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
@ -61,13 +56,15 @@ class RDSIE(InfoExtractor):
|
||||
age_limit = self._family_friendly_search(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'url': '9c9media:rds_web:%s' % video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
|
148
youtube_dl/extractor/roosterteeth.py
Normal file
148
youtube_dl/extractor/roosterteeth.py
Normal file
@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class RoosterTeethIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
|
||||
_LOGIN_URL = 'https://roosterteeth.com/login'
|
||||
_NETRC_MACHINE = 'roosterteeth'
|
||||
_TESTS = [{
|
||||
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||
'md5': 'e2bd7764732d785ef797700a2489f212',
|
||||
'info_dict': {
|
||||
'id': '26576',
|
||||
'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
|
||||
'ext': 'mp4',
|
||||
'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
|
||||
'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'series': 'Million Dollars, But...',
|
||||
'episode': 'Million Dollars, But... The Game Announcement',
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# only available for FIRST members
|
||||
'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None,
|
||||
note='Downloading login page',
|
||||
errnote='Unable to download login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
login_request = self._download_webpage(
|
||||
self._LOGIN_URL, None,
|
||||
note='Logging in as %s' % username,
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={
|
||||
'Referer': self._LOGIN_URL,
|
||||
})
|
||||
|
||||
if not any(re.search(p, login_request) for p in (
|
||||
r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
|
||||
r'>Sign Out<')):
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
|
||||
login_request, 'alert', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
episode = strip_or_none(unescapeHTML(self._search_regex(
|
||||
(r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
|
||||
default=None, group='title')))
|
||||
|
||||
title = strip_or_none(self._og_search_title(
|
||||
webpage, default=None)) or episode
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
|
||||
webpage, 'm3u8 url', default=None, group='url')
|
||||
|
||||
if not m3u8_url:
|
||||
if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
|
||||
self.raise_login_required(
|
||||
'%s is only available for FIRST members' % display_id)
|
||||
|
||||
if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
|
||||
self.raise_login_required('%s is not available yet' % display_id)
|
||||
|
||||
raise ExtractorError('Unable to extract m3u8 URL')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = strip_or_none(self._og_search_description(webpage))
|
||||
thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
|
||||
|
||||
series = self._search_regex(
|
||||
(r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
|
||||
webpage, 'series', fatal=False)
|
||||
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'>Comments \((\d+)\)<', webpage,
|
||||
'comment count', fatal=False))
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'containerId\s*=\s*["\']episode-(\d+)\1',
|
||||
r'<div[^<]+id=["\']episode-(\d+)'), webpage,
|
||||
'video id', default=display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
}
|
@ -9,7 +9,7 @@ class RTVNHIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.rtvnh.nl/video/131946',
|
||||
'md5': '6e1d0ab079e2a00b6161442d3ceacfc1',
|
||||
'md5': 'cdbec9f44550763c8afc96050fa747dc',
|
||||
'info_dict': {
|
||||
'id': '131946',
|
||||
'ext': 'mp4',
|
||||
@ -29,15 +29,29 @@ class RTVNHIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'%s returned error code %d' % (self.IE_NAME, status), expected=True)
|
||||
|
||||
formats = self._extract_smil_formats(
|
||||
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id, fatal=False)
|
||||
formats = []
|
||||
rtmp_formats = self._extract_smil_formats(
|
||||
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
|
||||
formats.extend(rtmp_formats)
|
||||
|
||||
for item in meta['source']['fb']:
|
||||
if item.get('type') == 'hls':
|
||||
for rtmp_format in rtmp_formats:
|
||||
rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
|
||||
rtsp_format = rtmp_format.copy()
|
||||
del rtsp_format['play_path']
|
||||
del rtsp_format['ext']
|
||||
rtsp_format.update({
|
||||
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
|
||||
'url': rtmp_url.replace('rtmp://', 'rtsp://'),
|
||||
'protocol': 'rtsp',
|
||||
})
|
||||
formats.append(rtsp_format)
|
||||
http_base_url = rtmp_url.replace('rtmp://', 'http://')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
elif item.get('type') == '':
|
||||
formats.append({'url': item['file']})
|
||||
http_base_url + '/playlist.m3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
http_base_url + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
53
youtube_dl/extractor/rudo.py
Normal file
53
youtube_dl/extractor/rudo.py
Normal file
@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
get_element_by_class,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class RudoIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://rudo.video/vod/oTzw0MGnyG',
|
||||
'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
|
||||
'info_dict': {
|
||||
'id': 'oTzw0MGnyG',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comentario Tomás Mosciatti',
|
||||
'upload_date': '20160617',
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _extract_url(self, webpage):
|
||||
mobj = re.search(
|
||||
'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
|
||||
|
||||
jwplayer_data = self._parse_json(self._search_regex(
|
||||
r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
|
||||
transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
|
||||
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, m3u8_id='hls')
|
||||
|
||||
info_dict.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'upload_date': unified_strdate(get_element_by_class('date', webpage)),
|
||||
})
|
||||
|
||||
return info_dict
|
@ -1,18 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@ -27,7 +21,8 @@ class SandiaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Xyce Software Training - Section 1',
|
||||
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
|
||||
'upload_date': '20120904',
|
||||
'upload_date': '20120409',
|
||||
'timestamp': 1333983600,
|
||||
'duration': 7794,
|
||||
}
|
||||
}
|
||||
@ -35,37 +30,24 @@ class SandiaIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
presentation_data = self._download_json(
|
||||
'http://digitalops.sandia.gov/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
|
||||
video_id, data=json.dumps({
|
||||
'getPlayerOptionsRequest': {
|
||||
'ResourceId': video_id,
|
||||
'QueryString': '',
|
||||
}
|
||||
}), headers={
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
})['d']['Presentation']
|
||||
|
||||
js_path = self._search_regex(
|
||||
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
|
||||
webpage, 'JS code URL')
|
||||
js_url = compat_urlparse.urljoin(url, js_path)
|
||||
|
||||
js_code = self._download_webpage(
|
||||
js_url, video_id, note='Downloading player')
|
||||
|
||||
def extract_str(key, **args):
|
||||
return self._search_regex(
|
||||
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
|
||||
js_code, key, **args)
|
||||
|
||||
def extract_data(key, **args):
|
||||
data_json = extract_str(key, **args)
|
||||
if data_json is None:
|
||||
return data_json
|
||||
return self._parse_json(
|
||||
data_json, video_id, transform_source=js_to_json)
|
||||
title = presentation_data['Title']
|
||||
|
||||
formats = []
|
||||
for i in itertools.count():
|
||||
fd = extract_data('VideoUrls[%d]' % i, default=None)
|
||||
if fd is None:
|
||||
break
|
||||
for stream in presentation_data.get('Streams', []):
|
||||
for fd in stream.get('VideoUrls', []):
|
||||
formats.append({
|
||||
'format_id': '%s' % i,
|
||||
'format_id': fd['MediaType'],
|
||||
'format_note': fd['MimeType'].partition('/')[2],
|
||||
'ext': mimetype2ext(fd['MimeType']),
|
||||
'url': fd['Location'],
|
||||
@ -73,43 +55,11 @@ class SandiaIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
slide_baseurl = compat_urlparse.urljoin(
|
||||
url, extract_data('SlideBaseUrl'))
|
||||
slide_template = slide_baseurl + re.sub(
|
||||
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
|
||||
slides = []
|
||||
last_slide_time = 0
|
||||
for i in itertools.count(1):
|
||||
sd = extract_str('Slides[%d]' % i, default=None)
|
||||
if sd is None:
|
||||
break
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
|
||||
sd, 'slide %s timestamp' % i, fatal=False))
|
||||
slides.append({
|
||||
'url': slide_template % i,
|
||||
'duration': timestamp - last_slide_time,
|
||||
})
|
||||
last_slide_time = timestamp
|
||||
formats.append({
|
||||
'format_id': 'slides',
|
||||
'protocol': 'slideshow',
|
||||
'url': json.dumps(slides),
|
||||
'preference': -10000, # Downloader not yet written
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = extract_data('Title')
|
||||
description = extract_data('Description', fatal=False)
|
||||
duration = int_or_none(extract_data(
|
||||
'Duration', fatal=False), scale=1000)
|
||||
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'description': presentation_data.get('Description'),
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'timestamp': int_or_none(presentation_data.get('UnixTime'), 1000),
|
||||
'duration': int_or_none(presentation_data.get('Duration'), 1000),
|
||||
}
|
||||
|
@ -2,11 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor):
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
return
|
||||
error = response.get('error')
|
||||
def _call_api(self, path, video_id, note):
|
||||
data = self._download_json(
|
||||
'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
}).get('data', {})
|
||||
|
||||
error = data.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
|
||||
expected=True)
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||
response = super(ShahidIE, self)._download_json(url, video_id, note)['data']
|
||||
self._handle_error(response)
|
||||
return response
|
||||
return data
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
api_vars = {
|
||||
'id': video_id,
|
||||
'type': 'player',
|
||||
'url': 'http://api.shahid.net/api/v1_1',
|
||||
'playerType': 'episode',
|
||||
}
|
||||
|
||||
flashvars = self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None)
|
||||
if flashvars:
|
||||
for key in api_vars.keys():
|
||||
value = self._search_regex(
|
||||
r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key,
|
||||
flashvars, 'type', default=None, group='value')
|
||||
if value:
|
||||
api_vars[key] = value
|
||||
|
||||
player = self._download_json(
|
||||
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html'
|
||||
% (video_id, api_vars['type']), video_id, 'Downloading player JSON')
|
||||
player = self._call_api(
|
||||
'Content/Episode/%s' % video_id,
|
||||
video_id, 'Downloading player JSON')
|
||||
|
||||
if player.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
@ -79,22 +61,11 @@ class ShahidIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = self._download_json(
|
||||
'%s/%s/%s?%s' % (
|
||||
api_vars['url'], api_vars['playerType'], api_vars['id'],
|
||||
compat_urllib_parse_urlencode({
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
})),
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
video = video[api_vars['playerType']]
|
||||
video = self._call_api(
|
||||
'episode/%s' % video_id, video_id,
|
||||
'Downloading video JSON')['episode']
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = video.get('thumbnailUrl')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('referenceDate'))
|
||||
categories = [
|
||||
category['name']
|
||||
for category in video.get('genres', []) if 'name' in category]
|
||||
@ -102,10 +73,16 @@ class ShahidIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('thumbnailUrl'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('referenceDate')),
|
||||
'categories': categories,
|
||||
'series': video.get('showTitle') or video.get('showName'),
|
||||
'season': video.get('seasonTitle'),
|
||||
'season_number': int_or_none(video.get('seasonNumber')),
|
||||
'season_id': str_or_none(video.get('seasonId')),
|
||||
'episode_number': int_or_none(video.get('number')),
|
||||
'episode_id': video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
64
youtube_dl/extractor/sixplay.py
Normal file
64
youtube_dl/extractor/sixplay.py
Normal file
@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
qualities,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
|
||||
'md5': '42310bffe4ba3982db112b9cd3467328',
|
||||
'info_dict': {
|
||||
'id': '11495320',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jamel et ses amis au Marrakech du rire 2015',
|
||||
'description': 'md5:ba2149d5c321d5201b78070ee839d872',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
clip_data = self._download_json(
|
||||
'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
|
||||
video_id)
|
||||
video_data = clip_data['videoInfo']
|
||||
|
||||
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
||||
formats = []
|
||||
for source in clip_data['sources']:
|
||||
source_type, source_url = source.get('type'), source.get('src')
|
||||
if not source_url or source_type == 'hls/primetime':
|
||||
continue
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'mp4':
|
||||
quality = source.get('quality')
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': quality,
|
||||
'quality': quality_key(quality),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['title'].strip(),
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'series': video_data.get('titlePgm'),
|
||||
'formats': formats,
|
||||
}
|
@ -67,7 +67,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
|
||||
|
||||
|
||||
class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
|
||||
IE_NAME = 'skynewsarabia:video'
|
||||
IE_NAME = 'skynewsarabia:article'
|
||||
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9',
|
||||
|
33
youtube_dl/extractor/skysports.py
Normal file
33
youtube_dl/extractor/skysports.py
Normal file
@ -0,0 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SkySportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
|
||||
'md5': 'c44a1db29f27daf9a0003e010af82100',
|
||||
'info_dict': {
|
||||
'id': '10328419',
|
||||
'ext': 'flv',
|
||||
'title': 'Bale: Its our time to shine',
|
||||
'description': 'md5:9fd1de3614d525f5addda32ac3c482c9',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'ooyala:%s' % self._search_regex(
|
||||
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'ie_key': 'Ooyala',
|
||||
}
|
@ -9,6 +9,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
)
|
||||
|
||||
|
||||
@ -40,7 +41,7 @@ class SlideshareIE(InfoExtractor):
|
||||
bucket = info['jsplayer']['video_bucket']
|
||||
ext = info['jsplayer']['video_extension']
|
||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||
description = self._html_search_regex(
|
||||
description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex(
|
||||
r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
|
||||
'description', fatal=False)
|
||||
|
||||
@ -51,5 +52,5 @@ class SlideshareIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
'thumbnail': info['slideshow']['pin_image_url'],
|
||||
'description': description,
|
||||
'description': description.strip() if description else None,
|
||||
}
|
||||
|
@ -8,10 +8,7 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class SohuIE(InfoExtractor):
|
||||
@ -96,15 +93,10 @@ class SohuIE(InfoExtractor):
|
||||
else:
|
||||
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||
|
||||
req = sanitized_Request(base_data_url + vid_id)
|
||||
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||
|
||||
return self._download_json(
|
||||
req, video_id,
|
||||
'Downloading JSON data for %s' % vid_id)
|
||||
base_data_url + vid_id, video_id,
|
||||
'Downloading JSON data for %s' % vid_id,
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
@ -4,8 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
unified_strdate,
|
||||
get_element_by_attribute,
|
||||
)
|
||||
|
||||
|
||||
class SpiegelIE(InfoExtractor):
|
||||
@ -19,6 +24,7 @@ class SpiegelIE(InfoExtractor):
|
||||
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
||||
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
|
||||
'duration': 49,
|
||||
'upload_date': '20130311',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||
@ -29,6 +35,7 @@ class SpiegelIE(InfoExtractor):
|
||||
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
||||
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
|
||||
'duration': 983,
|
||||
'upload_date': '20131115',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
|
||||
@ -38,6 +45,7 @@ class SpiegelIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
|
||||
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
|
||||
'upload_date': '20140904',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
|
||||
@ -52,10 +60,10 @@ class SpiegelIE(InfoExtractor):
|
||||
if SpiegeltvIE.suitable(handle.geturl()):
|
||||
return self.url_result(handle.geturl(), 'Spiegeltv')
|
||||
|
||||
title = re.sub(r'\s+', ' ', self._html_search_regex(
|
||||
r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
|
||||
webpage, 'title'))
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default=''))
|
||||
|
||||
title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage)
|
||||
description = video_data.get('data-video-teaser') or self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
base_url = self._search_regex(
|
||||
[r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
|
||||
@ -87,8 +95,9 @@ class SpiegelIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'description': description.strip() if description else None,
|
||||
'duration': duration,
|
||||
'upload_date': unified_strdate(video_data.get('data-video-date')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@ -104,6 +113,7 @@ class SpiegelArticleIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
|
||||
'description': 're:^Patrick Kämnitz gehört.{100,}',
|
||||
'upload_date': '20140825',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
|
||||
|
@ -9,8 +9,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class SRMediathekIE(ARDMediathekIE):
|
||||
IE_NAME = 'sr:mediathek'
|
||||
IE_DESC = 'Saarländischer Rundfunk'
|
||||
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
|
||||
@ -34,7 +35,9 @@ class SRMediathekIE(ARDMediathekIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest']
|
||||
}, {
|
||||
'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -56,7 +56,7 @@ class StitcherIE(InfoExtractor):
|
||||
|
||||
episode = self._parse_json(
|
||||
js_to_json(self._search_regex(
|
||||
r'(?s)var\s+stitcher\s*=\s*({.+?});\n', webpage, 'episode config')),
|
||||
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
|
||||
display_id)['config']['episode']
|
||||
|
||||
title = unescapeHTML(episode['title'])
|
||||
|
@ -120,7 +120,7 @@ class SVTIE(SVTBaseIE):
|
||||
|
||||
class SVTPlayIE(SVTBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
||||
@ -141,6 +141,9 @@ class SVTPlayIE(SVTBaseIE):
|
||||
# geo restricted to Sweden
|
||||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,50 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
get_element_by_attribute,
|
||||
parse_duration,
|
||||
strip_jsonp,
|
||||
)
|
||||
from .mitele import MiTeleBaseIE
|
||||
|
||||
|
||||
class TelecincoIE(InfoExtractor):
|
||||
class TelecincoIE(MiTeleBaseIE):
|
||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||
_VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||
'md5': '5cbef3ad5ef17bf0d21570332d140729',
|
||||
'md5': '8d7b2d5f699ee2709d992a63d5cd1712',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20141015_0058',
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
|
||||
'title': 'Bacalao con kokotxas al pil-pil',
|
||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
||||
'duration': 662,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '0a5b9f3cc8b074f50a0578f823a12694',
|
||||
'md5': '284393e5387b3b947b77c613ef04749a',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20150916_0128',
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
'title': '¿Quién es este ex futbolista con el que hablan ...',
|
||||
'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',
|
||||
'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
|
||||
'duration': 79,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': 'ad1bfaaba922dd4a295724b05b68f86a',
|
||||
'md5': '749afab6ea5a136a8806855166ae46a2',
|
||||
'info_dict': {
|
||||
'id': 'MDSVID20150513_0220',
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
'title': '#DOYLACARA. Con la trata no hay trato',
|
||||
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
|
||||
'duration': 50,
|
||||
},
|
||||
}, {
|
||||
@ -56,40 +47,16 @@ class TelecincoIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode)
|
||||
embed_data_json = self._search_regex(
|
||||
r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
|
||||
).replace('\'', '"')
|
||||
embed_data = json.loads(embed_data_json)
|
||||
|
||||
domain = embed_data['mediaUrl']
|
||||
if not domain.startswith('http'):
|
||||
# only happens in telecinco.es videos
|
||||
domain = 'http://' + domain
|
||||
info_url = compat_urlparse.urljoin(
|
||||
domain,
|
||||
compat_urllib_parse_unquote(embed_data['flashvars']['host'])
|
||||
)
|
||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||
|
||||
video_link = info_el.find('videoUrl/link').text
|
||||
token_query = compat_urllib_parse_urlencode({'id': video_link})
|
||||
token_info = self._download_json(
|
||||
embed_data['flashvars']['ov_tk'] + '?' + token_query,
|
||||
episode,
|
||||
transform_source=strip_jsonp
|
||||
)
|
||||
formats = self._extract_m3u8_formats(
|
||||
token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': embed_data['videoId'],
|
||||
'display_id': episode,
|
||||
'title': info_el.find('title').text,
|
||||
'formats': formats,
|
||||
'description': get_element_by_attribute('class', 'text', webpage),
|
||||
'thumbnail': info_el.find('thumb').text,
|
||||
'duration': parse_duration(info_el.find('duration').text),
|
||||
}
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title')
|
||||
info = self._get_player_info(url, webpage)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
['og:description', 'twitter:description'],
|
||||
webpage, 'title', fatal=False),
|
||||
})
|
||||
return info
|
||||
|
@ -6,6 +6,7 @@ import time
|
||||
import hmac
|
||||
import binascii
|
||||
import hashlib
|
||||
import netrc
|
||||
|
||||
|
||||
from .once import OnceIE
|
||||
@ -24,6 +25,9 @@ from ..utils import (
|
||||
xpath_with_ns,
|
||||
mimetype2ext,
|
||||
find_xpath_attr,
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
||||
@ -62,10 +66,11 @@ class ThePlatformBaseIE(OnceIE):
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def get_metadata(self, path, video_id):
|
||||
def _download_theplatform_metadata(self, path, video_id):
|
||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||
info = self._download_json(info_url, video_id)
|
||||
return self._download_json(info_url, video_id)
|
||||
|
||||
def _parse_theplatform_metadata(self, info):
|
||||
subtitles = {}
|
||||
captions = info.get('captions')
|
||||
if isinstance(captions, list):
|
||||
@ -86,6 +91,10 @@ class ThePlatformBaseIE(OnceIE):
|
||||
'uploader': info.get('billingCode'),
|
||||
}
|
||||
|
||||
def _extract_theplatform_metadata(self, path, video_id):
|
||||
info = self._download_theplatform_metadata(path, video_id)
|
||||
return self._parse_theplatform_metadata(info)
|
||||
|
||||
|
||||
class ThePlatformIE(ThePlatformBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
@ -158,6 +167,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
@ -192,6 +202,96 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||
return '%s&sig=%s' % (url, sig)
|
||||
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
|
||||
def xml_text(xml_str, tag):
|
||||
return self._search_regex(
|
||||
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
|
||||
|
||||
mvpd_headers = {
|
||||
'ap_42': 'anonymous',
|
||||
'ap_11': 'Linux i686',
|
||||
'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
|
||||
}
|
||||
|
||||
guid = xml_text(resource, 'guid')
|
||||
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token:
|
||||
token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', ''))
|
||||
if token_expires and token_expires >= time.time():
|
||||
authn_token = None
|
||||
if not authn_token:
|
||||
# TODO add support for other TV Providers
|
||||
mso_id = 'DTV'
|
||||
login_info = netrc.netrc().authenticators(mso_id)
|
||||
if not login_info:
|
||||
return None
|
||||
|
||||
def post_form(form_page, note, data={}):
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
return self._download_webpage(
|
||||
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
provider_redirect_page = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
provider_login_page = post_form(
|
||||
provider_redirect_page, 'Downloading Provider Login Page')
|
||||
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
|
||||
'username': login_info[0],
|
||||
'password': login_info[2],
|
||||
})
|
||||
post_form(mvpd_confirm_page, 'Confirming Login')
|
||||
|
||||
session = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
||||
requestor_info['authn_token'] = authn_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
authz_token = requestor_info.get(guid)
|
||||
if not authz_token:
|
||||
authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
|
||||
'Retrieving Authorization Token', data=urlencode_postdata({
|
||||
'resource_id': resource,
|
||||
'requestor_id': requestor_id,
|
||||
'authentication_token': authn_token,
|
||||
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
|
||||
'userMeta': '1',
|
||||
}), headers=mvpd_headers)
|
||||
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
|
||||
requestor_info[guid] = authz_token
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
mvpd_headers.update({
|
||||
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
|
||||
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
|
||||
})
|
||||
|
||||
return self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
|
||||
video_id, 'Retrieving Media Token', data=urlencode_postdata({
|
||||
'authz_token': authz_token,
|
||||
'requestor_id': requestor_id,
|
||||
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
|
||||
'hashed_guid': 'false',
|
||||
}), headers=mvpd_headers)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
@ -265,7 +365,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
ret = self.get_metadata(path, video_id)
|
||||
ret = self._extract_theplatform_metadata(path, video_id)
|
||||
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
|
||||
ret.update({
|
||||
'id': video_id,
|
||||
@ -339,7 +439,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
|
||||
categories = [item['media$name'] for item in entry.get('media$categories', [])]
|
||||
|
||||
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
|
||||
ret = self._extract_theplatform_metadata('%s/%s' % (provider_id, first_video_id), video_id)
|
||||
subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
|
||||
ret.update({
|
||||
'id': video_id,
|
||||
|
@ -92,12 +92,11 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
if not item_url or item_url in urls:
|
||||
return
|
||||
urls.add(item_url)
|
||||
type_ = item.get('type')
|
||||
ext = determine_ext(item_url, default_ext=None)
|
||||
if type_ == 'application/dash+xml' or ext == 'mpd':
|
||||
ext = mimetype2ext(item.get('type')) or determine_ext(item_url, default_ext=None)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
item_url, video_id, mpd_id='mpd', fatal=False))
|
||||
elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8':
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
item_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if live else 'm3u8_native',
|
||||
@ -111,7 +110,7 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': item.get('quality'),
|
||||
'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext,
|
||||
'ext': 'mp4' if item_url.startswith('rtsp') else ext,
|
||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
||||
})
|
||||
|
||||
|
@ -5,31 +5,27 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class TMZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tmz.com/videos/0_okj015ty/',
|
||||
'md5': '791204e3bf790b1426cb2db0706184c0',
|
||||
'md5': '4d22a51ef205b6c06395d8394f72d560',
|
||||
'info_dict': {
|
||||
'id': '0_okj015ty',
|
||||
'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
|
||||
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
|
||||
'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*',
|
||||
}
|
||||
'timestamp': 1394747163,
|
||||
'uploader_id': 'batchUser',
|
||||
'upload_date': '20140313',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.tmz.com/videos/0-cegprt2p/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._html_search_meta('VideoURL', webpage, fatal=True),
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._html_search_meta('ThumbURL', webpage),
|
||||
}
|
||||
video_id = self._match_id(url).replace('-', '_')
|
||||
return self.url_result('kaltura:591531:%s' % video_id, 'Kaltura', video_id)
|
||||
|
||||
|
||||
class TMZArticleIE(InfoExtractor):
|
||||
|
@ -1,74 +1,41 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class TouTvIE(InfoExtractor):
|
||||
IE_NAME = 'tou.tv'
|
||||
_VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
|
||||
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.tou.tv/30-vies/S04E41',
|
||||
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
|
||||
'info_dict': {
|
||||
'id': '30-vies_S04E41',
|
||||
'id': '122017',
|
||||
'ext': 'mp4',
|
||||
'title': '30 vies Saison 4 / Épisode 41',
|
||||
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||
'age_limit': 8,
|
||||
'uploader': 'Groupe des Nouveaux Médias',
|
||||
'duration': 1296,
|
||||
'upload_date': '20131118',
|
||||
'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
|
||||
'title': 'Saison 2015 Épisode 17',
|
||||
'description': 'La photo de famille 2',
|
||||
'upload_date': '20100717',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only available in Canada'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mediaId = self._search_regex(
|
||||
r'"idMedia":\s*"([^"]+)"', webpage, 'media ID')
|
||||
|
||||
streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId
|
||||
streams_doc = self._download_xml(
|
||||
streams_url, video_id, note='Downloading stream list')
|
||||
|
||||
video_url = next(n.text
|
||||
for n in streams_doc.findall('.//choice/url')
|
||||
if '//ad.doubleclick' not in n.text)
|
||||
if video_url.endswith('/Unavailable.flv'):
|
||||
raise ExtractorError(
|
||||
'Access to this video is blocked from outside of Canada',
|
||||
expected=True)
|
||||
|
||||
duration_str = self._html_search_meta(
|
||||
'video:duration', webpage, 'duration')
|
||||
duration = int(duration_str) if duration_str else None
|
||||
upload_date_str = self._html_search_meta(
|
||||
'video:release_date', webpage, 'upload date')
|
||||
upload_date = unified_strdate(upload_date_str) if upload_date_str else None
|
||||
path = self._match_id(url)
|
||||
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
|
||||
video_id = metadata['IdMedia']
|
||||
details = metadata['Details']
|
||||
title = details['OriginalTitle']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id),
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': video_url,
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': self._dc_search_uploader(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': self._media_rating_search(webpage),
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'thumbnail': details.get('ImageUrl'),
|
||||
'duration': int_or_none(details.get('LengthInSeconds')),
|
||||
}
|
||||
|
@ -4,6 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
get_element_by_attribute,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class TVPIE(InfoExtractor):
|
||||
@ -21,7 +27,7 @@ class TVPIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||
'md5': 'c3b15ed1af288131115ff17a17c19dda',
|
||||
'md5': 'b0005b542e5b4de643a9690326ab1257',
|
||||
'info_dict': {
|
||||
'id': '17916176',
|
||||
'ext': 'mp4',
|
||||
@ -53,6 +59,11 @@ class TVPIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
|
||||
|
||||
error_massage = get_element_by_attribute('class', 'msg error', webpage)
|
||||
if error_massage:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, clean_html(error_massage)), expected=True)
|
||||
|
||||
title = self._search_regex(
|
||||
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
|
||||
webpage, 'title', group='title')
|
||||
@ -66,24 +77,50 @@ class TVPIE(InfoExtractor):
|
||||
r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
|
||||
if not video_url:
|
||||
r'0:{src:([\'"])(?P<url>.*?)\1', webpage,
|
||||
'formats', group='url', default=None)
|
||||
if not video_url or 'material_niedostepny.mp4' in video_url:
|
||||
video_url = self._download_json(
|
||||
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
|
||||
video_id)['video_url']
|
||||
|
||||
ext = video_url.rsplit('.', 1)[-1]
|
||||
if ext != 'ism/manifest':
|
||||
if '/' in ext:
|
||||
ext = 'mp4'
|
||||
formats = []
|
||||
video_url_base = self._search_regex(
|
||||
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
|
||||
video_url, 'video base url', default=None)
|
||||
if video_url_base:
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url_base + '.ism/video.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url_base + '.ism/video.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url_base + '.ism/video.m3u8', video_id,
|
||||
'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
formats.extend(m3u8_formats)
|
||||
for i, m3u8_format in enumerate(m3u8_formats, 2):
|
||||
http_url = '%s-%d.mp4' % (video_url_base, i)
|
||||
if self._is_valid_url(http_url, video_id):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': http_url,
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': 'direct',
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'ext': determine_ext(video_url, 'mp4'),
|
||||
}]
|
||||
else:
|
||||
m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -8,43 +8,36 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
determine_ext,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayIE(InfoExtractor):
|
||||
IE_DESC = 'TV3Play and related services'
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?:tvplay\.lv/parraides|
|
||||
tv3play\.lt/programos|
|
||||
play\.tv3\.lt/programos|
|
||||
tv3play\.ee/sisu|
|
||||
tv3play\.se/program|
|
||||
tv6play\.se/program|
|
||||
tv8play\.se/program|
|
||||
tv10play\.se/program|
|
||||
tv3play\.no/programmer|
|
||||
viasat4play\.no/programmer|
|
||||
tv6play\.no/programmer|
|
||||
tv3play\.dk/programmer|
|
||||
(?:tvplay(?:\.skaties)?\.lv/parraides|
|
||||
(?:tv3play|play\.tv3)\.lt/programos|
|
||||
tv3play(?:\.tv3)?\.ee/sisu|
|
||||
tv(?:3|6|8|10)play\.se/program|
|
||||
(?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer|
|
||||
play\.novatv\.bg/programi
|
||||
)/[^/]+/(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'md5': 'a1612fe0849455423ad8718fe049be21',
|
||||
'info_dict': {
|
||||
'id': '418113',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kādi ir īri? - Viņas melo labāk',
|
||||
'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
|
||||
'duration': 25,
|
||||
'timestamp': 1406097056,
|
||||
'upload_date': '20140723',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
|
||||
@ -82,7 +75,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '395385',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Husräddarna S02E07',
|
||||
'description': 'md5:f210c6c89f42d4fc39faa551be813777',
|
||||
'duration': 2574,
|
||||
@ -90,7 +83,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20140520',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@ -98,7 +90,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '266636',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Den sista dokusåpan S01E08',
|
||||
'description': 'md5:295be39c872520221b933830f660b110',
|
||||
'duration': 1492,
|
||||
@ -107,7 +99,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@ -115,7 +106,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '282756',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Antikjakten S01E10',
|
||||
'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
|
||||
'duration': 2646,
|
||||
@ -123,7 +114,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20120925',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@ -131,7 +121,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '230898',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anna Anka søker assistent - Ep. 8',
|
||||
'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
|
||||
'duration': 2656,
|
||||
@ -139,7 +129,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20100628',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@ -147,7 +136,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '21873',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Budbringerne program 10',
|
||||
'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
|
||||
'duration': 1297,
|
||||
@ -155,7 +144,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20090929',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@ -163,7 +151,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '361883',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
|
||||
'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
|
||||
'duration': 2594,
|
||||
@ -171,7 +159,6 @@ class TVPlayIE(InfoExtractor):
|
||||
'upload_date': '20140224',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@ -191,6 +178,14 @@ class TVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -199,7 +194,9 @@ class TVPlayIE(InfoExtractor):
|
||||
video = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
if video['is_geo_blocked']:
|
||||
title = video['title']
|
||||
|
||||
if video.get('is_geo_blocked'):
|
||||
self.report_warning(
|
||||
'This content might not be available in your country due to copyright reasons')
|
||||
|
||||
@ -208,12 +205,25 @@ class TVPlayIE(InfoExtractor):
|
||||
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams['streams'].items():
|
||||
for format_id, video_url in streams.get('streams', {}).items():
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(video_url, {
|
||||
'hdcore': '3.5.0',
|
||||
'plugin': 'aasp-3.5.0.151.81'
|
||||
}), video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'preference': quality(format_id),
|
||||
'quality': quality(format_id),
|
||||
'ext': ext,
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
||||
@ -225,25 +235,20 @@ class TVPlayIE(InfoExtractor):
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
})
|
||||
elif video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id))
|
||||
continue
|
||||
else:
|
||||
fmt.update({
|
||||
'url': video_url,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'duration': video['duration'],
|
||||
'timestamp': parse_iso8601(video['created_at']),
|
||||
'view_count': video['views']['total'],
|
||||
'age_limit': video.get('age_limit', 0),
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('created_at')),
|
||||
'view_count': int_or_none(video.get('views', {}).get('total')),
|
||||
'age_limit': int_or_none(video.get('age_limit', 0)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -1,25 +1,62 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
class TweakersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
|
||||
'md5': '3147e4ddad366f97476a93863e4557c8',
|
||||
'md5': 'fe73e417c093a788e0160c4025f88b15',
|
||||
'info_dict': {
|
||||
'id': '9926',
|
||||
'ext': 'mp4',
|
||||
'title': 'New Nintendo 3DS XL - Op alle fronten beter',
|
||||
'description': 'md5:f97324cc71e86e11c853f0763820e3ba',
|
||||
'description': 'md5:3789b21fed9c0219e9bcaacd43fab280',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
'duration': 386,
|
||||
'uploader_id': 's7JeEm',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
entries = self._extract_xspf_playlist(
|
||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id)
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://tweakers.net/video/s1playlist/%s/1920/1080/playlist.json' % video_id,
|
||||
video_id)['items'][0]
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for location in video_data.get('locations', {}).get('progressive', []):
|
||||
format_id = location.get('label')
|
||||
width = int_or_none(location.get('width'))
|
||||
height = int_or_none(location.get('height'))
|
||||
for source in location.get('sources', []):
|
||||
source_url = source.get('src')
|
||||
if not source_url:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_url)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': source_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'uploader_id': video_data.get('account'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user