Compare commits

..

143 Commits

Author SHA1 Message Date
0e94b4713d release 2016.07.06 2016-07-06 00:54:23 +07:00
a6d3b89feb [prosiebensat1] Make downloading urls JSON non fatal 2016-07-06 00:52:48 +07:00
6c26815d63 [onionstudios] fix info extraction 2016-07-05 18:05:07 +01:00
73c4ac2c95 [youtube:channel] Improve channel id extraction and detect unavailable channels (Closes #10009) 2016-07-05 23:30:44 +07:00
84f214d840 [prosiebensat1] extract all formats 2016-07-05 17:11:45 +01:00
e3f88be7a9 [rtvnh] extract all formats 2016-07-05 14:45:39 +01:00
31af3e35e0 [sandia] remove unused imports 2016-07-05 13:39:24 +01:00
94a5cff91d [sendia] fix info extraction 2016-07-05 13:37:46 +01:00
77082c7b9e [slideshare] fix description extraction 2016-07-05 12:01:04 +01:00
252a1f75d2 [spiegel] improve info extraction 2016-07-05 11:46:25 +01:00
5abf513cf8 [stitcher] fix episode config extraction 2016-07-05 10:44:16 +01:00
c6054e3201 [xuite] Support videos with already encoded media id 2016-07-05 14:26:42 +08:00
4080530624 [youtube:shared] Recognize the new 'shared' URLs
Closes #10007
2016-07-05 13:15:05 +08:00
c25f1a9b63 release 2016.07.05 2016-07-05 06:32:46 +07:00
dfaa86b75e [test_utils] add test for smuggling a smuggled url 2016-07-04 21:36:32 +01:00
d9163ae3b6 [kaltura] fix extraction error for videos from multiple kaltura servers 2016-07-04 21:34:27 +01:00
dafafe7cf1 [la7] extract more info from a kaltura custom server 2016-07-04 17:59:58 +01:00
81953d1ae5 [kaltura] add support videos stored on custom kaltura servers(closes #5557) 2016-07-04 17:59:58 +01:00
3a212ed62e [iqiyi] Skip an unstable MD5 checksum 2016-07-04 11:25:46 +08:00
195f084542 [pornhub] Detect private videos (Closes #9987) 2016-07-04 03:27:00 +07:00
aa7a455b2e [README.md] Clarify configuration file may not exist by default 2016-07-04 01:24:33 +07:00
6a4e659c93 [yahoo] Recognize brightcove embed (Closes #9995) 2016-07-03 23:00:36 +07:00
40f3666f6b [test/test_http] Update tests for 38cce791c7 2016-07-03 23:50:55 +08:00
dd801bbe18 [brightcove] improve error detection 2016-07-03 16:37:22 +01:00
38cce791c7 Rename --cn-verfication-proxy to --geo-verification-proxy
And deprecate the former one

Since commit f138873900, this option is
not limited to China websites, so rename it.
2016-07-03 23:29:56 +08:00
bf3ae6a543 [devscripts/show-downloads-statictics] Add script for displaying downloads statistics 2016-07-03 22:20:14 +07:00
bff98341d5 release 2016.07.03.1 2016-07-03 21:28:55 +07:00
2644e911be [iqiyi] Fix extraction
See https://github.com/soimort/you-get/issues/1211#issuecomment-229011559
2016-07-03 22:19:56 +08:00
a5f67895d3 [nationalgeographic] restore http formats
there was a misunderstanding about the reason of 403 response
the problem happen only when the user use aria2c as a downloader
a1f6f5c768 (commitcomment-18107559)
2016-07-03 14:10:25 +01:00
15e4b6b758 [rai] Support an alternative form of embedded relinker URL
Closes #8551
2016-07-03 19:52:11 +08:00
2b28b892d8 [rai] Support videos with embedded content item ID (#8551) 2016-07-03 19:52:11 +08:00
7507fc98cb [README.md] Fix somes typo in coding conventions section 2016-07-03 18:35:28 +07:00
477b7a8474 [downloader/f4m] Fix for Rai live streams 2016-07-03 19:26:39 +08:00
034a884957 [rai] Support direct relinker URLs (closes #8552) 2016-07-03 19:26:39 +08:00
64436cb1a4 [nationalgeographic] skip download for national geographic channel tests(closes #9991) 2016-07-03 10:43:36 +01:00
f138873900 [rai] Fix extraction and update _TESTS
Closes #8617
Closes #9157
Closes #9232
2016-07-03 15:49:35 +08:00
e793338c88 [buzzfeed] Detect Facebook embed and update _TESTS
Closes #5701
2016-07-03 14:12:02 +08:00
369bb06206 [facebook] Improve embed detection (#5701) 2016-07-03 14:11:29 +08:00
2cb31d288e [history:topic] Relax _VALID_URL 2016-07-03 13:01:04 +07:00
c723d1cd8d [README.md] Update some codebase links 2016-07-03 11:35:13 +07:00
1f55234057 Add PULL_REQUEST_TEMPLATE.md 2016-07-03 11:31:49 +07:00
04006fae8d [README.md] Start writing youtube-dl coding conventions 2016-07-03 11:31:07 +07:00
4cb13d0d6a [hrti] Don't redefine variable in list comprehension 2016-07-02 23:02:14 +02:00
a1f6f5c768 [nationalgeographic] add support Adobe Pass auth 2016-07-02 21:24:22 +01:00
05c7feec77 [aenetworks] add support Adobe Pass auth 2016-07-02 21:24:22 +01:00
bf83024826 [theplatform] add basic support for Adobe Pass 2016-07-02 21:24:22 +01:00
a0cfd82dda release 2016.07.03 2016-07-03 03:19:22 +07:00
1b734adb2d [xtube] Fix extraction (Closes #9953, closes #9961) 2016-07-03 03:17:35 +07:00
9b724d7277 [extractors] Add hrti:playlist import 2016-07-03 02:25:39 +07:00
c3a5dd3b5d Credit @atopuzov for hrti (#9482) 2016-07-03 02:22:59 +07:00
e3755a624b [hrti] Improve and add support for playlists (Closes #9482) 2016-07-03 02:22:14 +07:00
95cf60e826 [utils] Add PUTRequest 2016-07-03 02:21:32 +07:00
6b03e1e25d [HRTi] Implement extractor for Croatian Radiotelevision 2016-07-03 02:20:41 +07:00
712b0b5b70 [la7.it] Fix the extractor 2016-07-02 23:49:03 +08:00
6a424391d9 [facebook] Make embed detection stricter to prevent false-positives 2016-07-02 23:15:55 +08:00
dbf0157a26 [generic] Add MD5 checksums 2016-07-02 21:58:07 +08:00
7deef1ba67 [generic] Support Wordpress "YouTube Video Importer" plugin
Closes #9938
2016-07-02 21:58:07 +08:00
fd6ca38262 [facebook] Improve Facebook embedded detection
Related to #9938.

Another example comes from 9834872bf6.
2016-07-02 21:58:07 +08:00
bdafd88da0 [vk] Extend _VALID_URLs to support new domain (Closes #9981) 2016-07-02 16:43:19 +07:00
7a1e71575e release 2016.07.02 2016-07-02 02:47:42 +07:00
ac2d8f54d1 [vine] Remove superfluous whitespace 2016-07-02 02:45:00 +07:00
14ff6baa0e [fusion] Improve 2016-07-02 02:44:37 +07:00
bb08101ec4 [Fusion] Add new extractor 2016-07-02 02:37:28 +07:00
bc4b2d75ba [pornhub] Add support for thumbzilla (Closes #8696) 2016-07-02 02:11:07 +07:00
35fc3021ba [periscope] Add another fallback source 2016-07-02 01:35:57 +07:00
347227237b [periscope] fix playlist extraction (#9967)
The JSON response changed and the extractor needed to be updated in order to gather the video IDs.
2016-07-02 01:29:11 +07:00
564dc3c6e8 [vine] Fix extraction (Closes #9970) 2016-07-02 01:24:57 +07:00
9f4576a7eb [twitch] Update usher URL (Closes #9975) 2016-07-01 23:16:43 +07:00
f11315e8d4 release 2016.07.01 2016-07-01 03:59:57 +07:00
0c2ac64bb8 [sixplay] Rename preference key to quality in format dict 2016-07-01 03:57:59 +07:00
a9eede3913 [test/compat] compat_shlex_split: test with newlines 2016-07-01 03:30:35 +07:00
9e29ef13a3 [options] Accept quoted string across multiple lines (#9940)
Like:

    -f "
    bestvideo+bestaudio/
    best
    "
2016-07-01 03:30:31 +07:00
eaaaaec042 [pornhub] Add more tests with removed videos 2016-07-01 03:18:27 +07:00
3cb3b60064 [pornhub] Relax removed message regex (Closes #9964) 2016-07-01 03:14:23 +07:00
044e3d91b5 [Pornhub] Fix error detection 2016-07-01 02:59:50 +07:00
c9e538a3b1 [ctvnews] use orderedSet, increase the number of items for playlists and use smaller bin list for test 2016-06-30 19:52:32 +01:00
76dad392f5 [meta] Clarify the source of uppod st decryption algorithm 2016-06-30 18:27:57 +01:00
9617b557aa [ctv] Add new extractor(closes #4077) 2016-06-30 18:22:35 +01:00
bf4fa24414 [ctvnews] Add new extractor(closes #2156) 2016-06-30 18:22:35 +01:00
20361b4f25 [rds] extract 9c9media formats 2016-06-30 18:22:35 +01:00
05a0068a76 [9c9media] Add new extractor 2016-06-30 18:22:35 +01:00
66a42309fa release 2016.06.30 2016-06-30 23:56:55 +07:00
fd94e2671a [meta] Add support for pladform embeds 2016-06-30 23:20:44 +07:00
8ff6697861 [pladform] Improve embed detection 2016-06-30 23:19:29 +07:00
eafa643715 [meta] Make duration and description optional
For iframe URLs
2016-06-30 23:06:13 +07:00
049da7cb6c [meta] Extend _VALID_URL 2016-06-30 23:04:18 +07:00
7dbeee7e22 [generic] make twitter:player extraction non fatal 2016-06-30 14:11:55 +01:00
93ad6c6bfa [sixplay] Add new extractor(closes #2183) 2016-06-30 13:50:49 +01:00
329179073b [generic] add generic support for twitter:player embeds 2016-06-30 12:01:30 +01:00
4d86d2008e [urplay] fix typo and check with flake8 2016-06-30 11:30:42 +01:00
ab47b6e881 [theatlantic] Add new extractor(closes #6611) 2016-06-30 04:08:56 +01:00
df43389ade [skysports] Add new extractor(closes #7066) 2016-06-30 02:54:21 +01:00
397b305cfe [meta] Add new extractor(closes #8789) 2016-06-30 00:21:03 +01:00
e496fa50cd [urplay] Add new extractor(closes #9332) 2016-06-29 20:19:31 +01:00
06a96da15b [eagleplatform] Improve embed detection and extract in separate routine (Closes #9926) 2016-06-29 23:01:34 +07:00
70157c2c43 [aenetworks] add support for movie pages 2016-06-29 16:55:17 +01:00
c58ed8563d [aenetworks] extract history topic playlist title 2016-06-29 16:18:16 +01:00
4c7821227c [aenetworks:historytopic] fix topic video url 2016-06-29 16:03:32 +01:00
42362fdb5e [aenetworks] add support for show and season for A&E Network sites and History topics(closes #9816) 2016-06-29 15:49:17 +01:00
97124e572d [arte:playlist] Fix test 2016-06-28 22:39:53 +07:00
32616c14cc [vrt] extract all formats 2016-06-28 14:02:03 +01:00
8174d0fe95 release 2016.06.27 2016-06-27 23:09:39 +07:00
8704778d95 [pbs] Check manually constructed http links (Closes #9921) 2016-06-27 23:06:42 +07:00
c287f2bc60 [extractor/generic] Use _extract_url for kaltura embeds (Closes #9922) 2016-06-27 22:45:26 +07:00
9ea5c04c0d [kaltura] Add _extract_url with fixed regex 2016-06-27 22:44:17 +07:00
fd7a7498a4 [test_all_urls] PEP 8 and change wording 2016-06-27 22:11:45 +07:00
e3a6747d8f New test-case: extractor names are supposed to be unique
@dstftw explained in
https://github.com/rg3/youtube-dl/pull/9918#issuecomment-228625878 that
extractor names are supposed to be unique. @dstftw has fixed the two
offending extractors, and here I add a test to ensure this does not
happen in the future.
2016-06-27 22:09:29 +07:00
f41ffc00d1 [skynewsarabia:article] Clarify IE_NAME 2016-06-27 05:08:09 +07:00
81fda15369 [sr:mediathek] Clarify IE_NAME 2016-06-27 05:07:12 +07:00
427cd050a3 [extractor/generic] Improve kaltura embed detection (Closes #9911) 2016-06-27 04:11:53 +07:00
b0c200f1ec [msn] Add test URL with non-alphanumeric characters 2016-06-26 22:03:36 +07:00
92747e664a release 2016.06.26 2016-06-26 21:15:24 +07:00
f1f336322d [msn] Fix extraction (Closes #8960, closes #9542) 2016-06-26 21:10:05 +07:00
bf8dd79045 [extractor/common] Fix sorting with custom field preference 2016-06-26 21:09:07 +07:00
c6781156aa [MSN] add new extractor 2016-06-26 21:07:59 +07:00
f484c5fa25 [vidbit] Improve (Closes #9759) 2016-06-26 16:59:28 +07:00
88d9f6c0c4 [utils] Add support for name list in _html_search_meta 2016-06-26 16:57:14 +07:00
3c9c088f9c [Vidbit] Add new extractor 2016-06-26 16:52:52 +07:00
fc3996bfe1 [iqiyi] Remove codes for debugging 2016-06-26 15:45:41 +08:00
5b6ad8630c [iqiyi] Partially fix IqiyiIE
Use the HTML5 API. Only low-resolution formats available

Related: #9839

Thanks @zhangn1985 for the overall algorithm (soimort/you-get#1224)
2016-06-26 15:18:32 +08:00
30105f4ac0 [le] Move urshift() to utils.py 2016-06-26 15:17:26 +08:00
1143535d76 [utils] Add urshift()
Used in IqiyiIE and LeIE
2016-06-26 15:16:49 +08:00
7d52c052ef [generic] Fix test_Generic_76
Broken: https://travis-ci.org/rg3/youtube-dl/jobs/140251658
2016-06-26 11:56:27 +08:00
a2406fce3c Fix misspelling 2016-06-26 01:28:55 +07:00
3b34ab538c [svtplay] Extend _VALID_URL (#9900) 2016-06-26 00:29:53 +07:00
ac782306f1 [iqiyi] Mark broken 2016-06-26 00:25:41 +07:00
0c00e889f3 Credit @JakubAdamWieczorek for #9813 2016-06-25 23:35:57 +07:00
ce96ed05f4 [polskieradio] Add test with video 2016-06-25 23:31:21 +07:00
0463b77a1f [polskieradio] Improve extraction (Closes #9813) 2016-06-25 23:19:18 +07:00
2d185706ea [polskieradio] Add support for Polskie Radio.
Polskie Radio is the main Polish state-funded radio broadcasting service.
2016-06-25 23:19:18 +07:00
b72b44318c [utils] Add strip_or_none 2016-06-25 23:19:18 +07:00
46f59e89ea [utils] Add unified_timestamp 2016-06-25 23:19:18 +07:00
b4241e308e release 2016.06.25 2016-06-25 03:03:20 +07:00
3d4b08dfc7 [setup.py] Add file version information and quotes consistency (Closes #9878) 2016-06-25 02:50:12 +07:00
be49068d65 [youtube] Fix and skip some tests 2016-06-24 22:47:19 +07:00
525cedb971 [youtube] Relax URL expansion in description 2016-06-24 22:37:13 +07:00
de3c7fe0d4 [youtube] Fix 141 format tests 2016-06-24 22:27:55 +07:00
896cc72750 [mixcloud] View count and like count may be absent
Closes #9874
2016-06-24 17:26:12 +08:00
c1ff6e1ad0 [vimeo:review] Fix extraction for password-protected videos
Closes #9853
2016-06-24 16:48:37 +08:00
fee70322d7 [appletrailers] correct thumbnail fallback 2016-06-23 19:03:34 +01:00
8065d6c55f [dcn] extend _VALID_URL for awaan.ae and extract all available formats 2016-06-23 17:22:15 +01:00
494172d2e5 [appletrailers] extract info from an alternative source if available(closes #8422)(closes #8422) 2016-06-23 15:49:42 +01:00
6e3c2047f8 [tvp] extract all formats and detect erros 2016-06-23 04:36:16 +01:00
81 changed files with 3000 additions and 1086 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23.1**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.06**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.23.1
[debug] youtube-dl version 2016.07.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

22
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@ -0,0 +1,22 @@
## Please follow the guide below
- You will be asked some questions, please read them **carefully** and answer honestly
- Put an `x` into all the boxes [ ] relevant to your *pull request* (like that [x])
- Use *Preview* tab to see how your *pull request* will actually look like
---
### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
### What is the purpose of your *pull request*?
- [ ] Bug fix
- [ ] New extractor
- [ ] New feature
---
### Description of your *pull request* and other information
Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible.

View File

@ -175,3 +175,5 @@ Tomáš Čech
Déstin Reed
Roman Tsiupa
Artur Krysiak
Jakub Adam Wieczorek
Aleksandar Topuzović

View File

@ -97,9 +97,17 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
2. Check out the source code with:
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
3. Start a new git branch with
cd youtube-dl
git checkout -b yourextractor
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
```python
# coding: utf-8
from __future__ import unicode_literals
@ -143,16 +151,148 @@ After you have ensured this site is distributing it's content legally, you can f
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
## youtube-dl coding conventions
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
### Mandatory and optional metafields
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
- `url` (media download URL) or `formats`
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
```python
meta = self._download_json(url, video_id)
```
Assume at this point `meta`'s layout is:
```python
{
...
"summary": "some fancy summary text",
...
}
```
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
```python
description = meta.get('summary') # correct
```
and not like:
```python
description = meta['summary'] # incorrect
```
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
```python
description = self._search_regex(
r'<span[^>]+id="title"[^>]*>([^<]+)<',
webpage, 'description', fatal=False)
```
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
You can also pass `default=<some fallback value>`, for example:
```python
description = self._search_regex(
r'<span[^>]+id="title"[^>]*>([^<]+)<',
webpage, 'description', default=None)
```
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
### Provide fallbacks
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
#### Example
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
```python
title = meta['title']
```
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
```python
title = meta.get('title') or self._og_search_title(webpage)
```
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
### Make regular expressions flexible
When using regular expressions try to write them fuzzy and flexible.
#### Example
Say you need to extract `title` from the following HTML code:
```html
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
```
The code for that task should look similar to:
```python
title = self._search_regex(
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
```
Or even better:
```python
title = self._search_regex(
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
webpage, 'title', group='title')
```
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
The code definitely should not look like:
```python
title = self._search_regex(
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
webpage, 'title', group='title')
```
### Use safe conversion functions
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.

160
README.md
View File

@ -103,9 +103,9 @@ which means you can modify it, redistribute it or use it however you like.
(experimental)
-6, --force-ipv6 Make all connections via IPv6
(experimental)
--cn-verification-proxy URL Use this proxy to verify the IP address for
some Chinese sites. The default proxy
specified by --proxy (or none, if the
--geo-verification-proxy URL Use this proxy to verify the IP address for
some geo-restricted sites. The default
proxy specified by --proxy (or none, if the
options is not present) is used for the
actual downloading. (experimental)
@ -424,7 +424,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`.
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
```
@ -890,9 +890,17 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
2. Check out the source code with:
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
3. Start a new git branch with
cd youtube-dl
git checkout -b yourextractor
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
```python
# coding: utf-8
from __future__ import unicode_literals
@ -936,19 +944,151 @@ After you have ensured this site is distributing it's content legally, you can f
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`.
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
## youtube-dl coding conventions
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
### Mandatory and optional metafields
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
- `url` (media download URL) or `formats`
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
```python
meta = self._download_json(url, video_id)
```
Assume at this point `meta`'s layout is:
```python
{
...
"summary": "some fancy summary text",
...
}
```
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
```python
description = meta.get('summary') # correct
```
and not like:
```python
description = meta['summary'] # incorrect
```
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
```python
description = self._search_regex(
r'<span[^>]+id="title"[^>]*>([^<]+)<',
webpage, 'description', fatal=False)
```
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
You can also pass `default=<some fallback value>`, for example:
```python
description = self._search_regex(
r'<span[^>]+id="title"[^>]*>([^<]+)<',
webpage, 'description', default=None)
```
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
### Provide fallbacks
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
#### Example
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
```python
title = meta['title']
```
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
```python
title = meta.get('title') or self._og_search_title(webpage)
```
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
### Make regular expressions flexible
When using regular expressions try to write them fuzzy and flexible.
#### Example
Say you need to extract `title` from the following HTML code:
```html
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
```
The code for that task should look similar to:
```python
title = self._search_regex(
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
```
Or even better:
```python
title = self._search_regex(
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
webpage, 'title', group='title')
```
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
The code definitely should not look like:
```python
title = self._search_regex(
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
webpage, 'title', group='title')
```
### Use safe conversion functions
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
# EMBEDDING YOUTUBE-DL
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
from __future__ import unicode_literals
import json
import os
import re
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import (
compat_print,
compat_urllib_request,
)
from youtube_dl.utils import format_bytes
def format_size(bytes):
return '%s (%d bytes)' % (format_bytes(bytes), bytes)
total_bytes = 0
releases = json.loads(compat_urllib_request.urlopen(
'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
for release in releases:
compat_print(release['name'])
for asset in release['assets']:
asset_name = asset['name']
total_bytes += asset['download_count'] * asset['size']
if all(not re.match(p, asset_name) for p in (
r'^youtube-dl$',
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
r'^youtube-dl\.exe$')):
continue
compat_print(
' %s size: %s downloads: %d'
% (asset_name, format_size(asset['size']), asset['download_count']))
compat_print('total downloads traffic: %s' % format_size(total_bytes))

View File

@ -45,7 +45,6 @@
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**
- **ARD:mediathek**: Saarländischer Rundfunk
- **arte.tv**
- **arte.tv:+7**
- **arte.tv:cinema**
@ -153,6 +152,8 @@
- **CSNNE**
- **CSpan**: C-SPAN
- **CtsNews**: 華視新聞
- **CTV**
- **CTVNews**
- **culturebox.francetvinfo.fr**
- **CultureUnplugged**
- **CWTV**
@ -241,6 +242,7 @@
- **FreeVideo**
- **Funimation**
- **FunnyOrDie**
- **Fusion**
- **GameInformer**
- **Gamekings**
- **GameOne**
@ -273,6 +275,7 @@
- **Helsinki**: helsinki.fi
- **HentaiStigma**
- **HistoricFilms**
- **history:topic**: History.com Topic
- **hitbox**
- **hitbox:live**
- **HornBunny**
@ -280,6 +283,8 @@
- **HotStar**
- **Howcast**
- **HowStuffWorks**
- **HRTi**
- **HRTiPlaylist**
- **HuffPost**: Huffington Post
- **Hypem**
- **Iconosquare**
@ -326,7 +331,7 @@
- **kuwo:mv**: 酷我音乐 - MV
- **kuwo:singer**: 酷我音乐 - 歌手
- **kuwo:song**: 酷我音乐
- **la7.tv**
- **la7.it**
- **Laola1Tv**
- **Le**: 乐视网
- **Learnr**
@ -359,6 +364,7 @@
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **META**
- **metacafe**
- **Metacritic**
- **Mgoon**
@ -385,6 +391,7 @@
- **MovieFap**
- **Moviezine**
- **MPORA**
- **MSN**
- **MTV**
- **mtv.de**
- **mtviggy.com**
@ -438,6 +445,7 @@
- **nick.de**
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- **NineCNineMedia**
- **njoy**: N-JOY
- **njoy:embed**
- **Noco**
@ -501,8 +509,9 @@
- **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic**
- **PolskieRadio**
- **PornHd**
- **PornHub**
- **PornHub**: PornHub and Thumbzilla
- **PornHubPlaylist**
- **PornHubUserVideos**
- **Pornotube**
@ -586,8 +595,10 @@
- **Shared**: shared.sx and vivo.sx
- **ShareSix**
- **Sina**
- **SixPlay**
- **skynewsarabia:article**
- **skynewsarabia:video**
- **skynewsarabia:video**
- **SkySports**
- **Slideshare**
- **Slutload**
- **smotri**: Smotri.com
@ -619,6 +630,7 @@
- **SportBoxEmbed**
- **SportDeutschland**
- **Sportschau**
- **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- **SSA**
@ -719,6 +731,7 @@
- **UDNEmbed**: 聯合影音
- **Unistra**
- **Urort**: NRK P3 Urørt
- **URPlay**
- **USAToday**
- **ustream**
- **ustream:channel**
@ -736,6 +749,7 @@
- **vh1.com**
- **Vice**
- **ViceShow**
- **Vidbit**
- **Viddler**
- **video.google:search**: Google Video search
- **video.mit.edu**
@ -843,6 +857,7 @@
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- **youtube:shared**
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)

View File

@ -21,25 +21,37 @@ try:
import py2exe
except ImportError:
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
print("Cannot import py2exe", file=sys.stderr)
print('Cannot import py2exe', file=sys.stderr)
exit(1)
py2exe_options = {
"bundle_files": 1,
"compressed": 1,
"optimize": 2,
"dist_dir": '.',
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'],
'bundle_files': 1,
'compressed': 1,
'optimize': 2,
'dist_dir': '.',
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
}
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
DESCRIPTION = 'YouTube video downloader'
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
py2exe_console = [{
"script": "./youtube_dl/__main__.py",
"dest_base": "youtube-dl",
'script': './youtube_dl/__main__.py',
'dest_base': 'youtube-dl',
'version': __version__,
'description': DESCRIPTION,
'comments': LONG_DESCRIPTION,
'product_name': 'youtube-dl',
'product_version': __version__,
}]
py2exe_params = {
'console': py2exe_console,
'options': {"py2exe": py2exe_options},
'options': {'py2exe': py2exe_options},
'zipfile': None
}
@ -72,7 +84,7 @@ else:
params['scripts'] = ['bin/youtube-dl']
class build_lazy_extractors(Command):
description = "Build the extractor lazy loading module"
description = 'Build the extractor lazy loading module'
user_options = []
def initialize_options(self):
@ -87,16 +99,11 @@ class build_lazy_extractors(Command):
dry_run=self.dry_run,
)
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
setup(
name='youtube_dl',
version=__version__,
description='YouTube video downloader',
long_description='Small command-line program to download videos from'
' YouTube.com and other video sites.',
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
url='https://github.com/rg3/youtube-dl',
author='Ricardo Garcia',
author_email='ytdl@yt-dl.org',
@ -112,17 +119,17 @@ setup(
# test_requires = ['nosetest'],
classifiers=[
"Topic :: Multimedia :: Video",
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"License :: Public Domain",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
'Topic :: Multimedia :: Video',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'License :: Public Domain',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
],
cmdclass={'build_lazy_extractors': build_lazy_extractors},

View File

@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
class TestIE(InfoExtractor):
@ -66,6 +66,11 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._html_search_meta('d', html), '4')
self.assertEqual(ie._html_search_meta('e', html), '5')
self.assertEqual(ie._html_search_meta('f', html), '6')
self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
def test_download_json(self):
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')

View File

@ -6,6 +6,7 @@ from __future__ import unicode_literals
import os
import sys
import unittest
import collections
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -130,6 +131,15 @@ class TestAllURLsMatching(unittest.TestCase):
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
def test_no_duplicated_ie_names(self):
name_accu = collections.defaultdict(list)
for ie in self.ies:
name_accu[ie.IE_NAME.lower()].append(type(ie).__name__)
for (ie_name, ie_list) in name_accu.items():
self.assertEqual(
len(ie_list), 1,
'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list)))
if __name__ == '__main__':
unittest.main()

View File

@ -87,6 +87,7 @@ class TestCompat(unittest.TestCase):
def test_compat_shlex_split(self):
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
def test_compat_etree_fromstring(self):
xml = '''

View File

@ -138,27 +138,27 @@ class TestProxy(unittest.TestCase):
self.proxy_thread.daemon = True
self.proxy_thread.start()
self.cn_proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('cn'))
self.cn_port = http_server_port(self.cn_proxy)
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
self.cn_proxy_thread.daemon = True
self.cn_proxy_thread.start()
self.geo_proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('geo'))
self.geo_port = http_server_port(self.geo_proxy)
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
self.geo_proxy_thread.daemon = True
self.geo_proxy_thread.start()
def test_proxy(self):
cn_proxy = 'localhost:{0}'.format(self.cn_port)
geo_proxy = 'localhost:{0}'.format(self.geo_port)
ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port),
'cn_verification_proxy': cn_proxy,
'geo_verification_proxy': geo_proxy,
})
url = 'http://foo.com/bar'
response = ydl.urlopen(url).read().decode('utf-8')
self.assertEqual(response, 'normal: {0}'.format(url))
req = compat_urllib_request.Request(url)
req.add_header('Ytdl-request-proxy', cn_proxy)
req.add_header('Ytdl-request-proxy', geo_proxy)
response = ydl.urlopen(req).read().decode('utf-8')
self.assertEqual(response, 'cn: {0}'.format(url))
self.assertEqual(response, 'geo: {0}'.format(url))
def test_proxy_with_idn(self):
ydl = YoutubeDL({

View File

@ -60,11 +60,13 @@ from youtube_dl.utils import (
timeconvert,
unescapeHTML,
unified_strdate,
unified_timestamp,
unsmuggle_url,
uppercase_escape,
lowercase_escape,
url_basename,
urlencode_postdata,
urshift,
update_url_query,
version_tuple,
xpath_with_ns,
@ -283,8 +285,28 @@ class TestUtil(unittest.TestCase):
'20150202')
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
self.assertEqual(
unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
1417001400)
self.assertEqual(
unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
1422902860)
self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
@ -383,6 +405,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_url, url)
self.assertEqual(res_data, None)
smug_url = smuggle_url(url, {'a': 'b'})
smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
res_url, res_data = unsmuggle_url(smug_smug_url)
self.assertEqual(res_url, url)
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
@ -959,5 +987,9 @@ The first line
self.assertRaises(ValueError, encode_base_n, 0, 70)
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
def test_urshift(self):
self.assertEqual(urshift(3, 1), 1)
self.assertEqual(urshift(-3, 1), 2147483646)
if __name__ == '__main__':
unittest.main()

View File

@ -196,8 +196,8 @@ class YoutubeDL(object):
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
cn_verification_proxy: URL of the proxy to use for IP address verification
on Chinese sites. (Experimental)
geo_verification_proxy: URL of the proxy to use for IP address verification
on geo-restricted sites. (Experimental)
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
@ -304,6 +304,11 @@ class YoutubeDL(object):
self.params.update(params)
self.cache = Cache(self)
if self.params.get('cn_verification_proxy') is not None:
self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
if self.params.get('geo_verification_proxy') is None:
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
if params.get('bidi_workaround', False):
try:
import pty

View File

@ -382,6 +382,8 @@ def _real_main(argv=None):
'external_downloader_args': external_downloader_args,
'postprocessor_args': postprocessor_args,
'cn_verification_proxy': opts.cn_verification_proxy,
'geo_verification_proxy': opts.geo_verification_proxy,
}
with YoutubeDL(ydl_opts) as ydl:

View File

@ -196,6 +196,11 @@ def build_fragments_list(boot_info):
first_frag_number = fragment_run_entry_table[0]['first']
fragments_counter = itertools.count(first_frag_number)
for segment, fragments_count in segment_run_table['segment_run']:
# In some live HDS streams (for example Rai), `fragments_count` is
# abnormal and causing out-of-memory errors. It's OK to change the
# number of fragments for live streams as they are updated periodically
if fragments_count == 4294967295 and boot_info['live']:
fragments_count = 2
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
@ -329,7 +334,11 @@ class F4mFD(FragmentFD):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
# From Adobe F4M 3.0 spec:
# The <baseURL> element SHALL be the base URL for all relative
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
# URLs should be relative to the location of the containing document.
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:

View File

@ -2,23 +2,137 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .theplatform import ThePlatformIE
from ..utils import (
smuggle_url,
update_url_query,
unescapeHTML,
extract_attributes,
get_element_by_attribute,
)
from ..compat import (
compat_urlparse,
)
class AENetworksIE(InfoExtractor):
class AENetworksBaseIE(ThePlatformIE):
_THEPLATFORM_KEY = 'crazyjava'
_THEPLATFORM_SECRET = 's3cr3t'
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
},
'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
'info_dict': {
'id': '71889446852',
},
'playlist_mincount': 5,
}, {
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
'info_dict': {
'id': 'SERIES4317',
'title': 'Atlanta Plastic',
},
'playlist_mincount': 2,
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'only_matching': True
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True
}]
_DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY',
'aetv.com': 'AETV',
'mylifetime.com': 'LIFETIME',
'fyi.tv': 'FYI',
}
def _real_extract(self, url):
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id
webpage = self._download_webpage(url, display_id)
if show_path:
url_parts = show_path.split('/')
url_parts_len = len(url_parts)
if url_parts_len == 1:
entries = []
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
entries.append(self.url_result(
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeriesId', webpage),
self._html_search_meta('aetn:SeriesTitle', webpage))
elif url_parts_len == 2:
entries = []
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
episode_attributes = extract_attributes(episode_item)
episode_url = compat_urlparse.urljoin(
url, episode_attributes['data-canonical'])
entries.append(self.url_result(
episode_url, 'AENetworks',
episode_attributes['data-videoid']))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage))
query = {
'mbr': 'true',
'assetTypes': 'medium_video_s3'
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
query['auth'] = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
info.update(self._search_json_ld(webpage, video_id, fatal=False))
media_url = update_url_query(media_url, query)
media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
self._sort_formats(formats)
info.update({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
})
return info
class HistoryTopicIE(AENetworksBaseIE):
IE_NAME = 'history:topic'
IE_DESC = 'History.com Topic'
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)(?:/[^/]+(?:/(?P<video_display_id>[^/?#]+))?)?'
_TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
'info_dict': {
'id': 'g12m5Gyt3fdR',
'id': '40700995724',
'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
@ -31,57 +145,61 @@ class AENetworksIE(InfoExtractor):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'expected_warnings': ['JSON-LD'],
}, {
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
'info_dict': {
'id': 'eg47EERs_JsZ',
'ext': 'mp4',
'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
'info_dict':
{
'id': 'world-war-i-history',
'title': 'World War I History',
},
'add_ie': ['ThePlatform'],
'playlist_mincount': 24,
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
'only_matching': True
'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True,
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
'only_matching': True
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history',
'only_matching': True,
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
'only_matching': True
'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches',
'only_matching': True,
}]
def _real_extract(self, url):
page_type, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id)
video_url_re = [
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
r"media_url\s*=\s*'([^']+)'"
]
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
query = {'mbr': 'true'}
if page_type == 'shows':
query['assetTypes'] = 'medium_video_s3'
if 'switch=hds' in video_url:
query['switch'] = 'hls'
info = self._search_json_ld(webpage, video_id, fatal=False)
info.update({
def theplatform_url_result(self, theplatform_url, video_id, query):
return {
'_type': 'url_transparent',
'id': video_id,
'url': smuggle_url(
update_url_query(video_url, query),
update_url_query(theplatform_url, query),
{
'sig': {
'key': 'crazyjava',
'secret': 's3cr3t'},
'key': self._THEPLATFORM_KEY,
'secret': self._THEPLATFORM_SECRET,
},
'force_smil_url': True
}),
})
return info
'ie_key': 'ThePlatform',
}
def _real_extract(self, url):
topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
if video_display_id:
webpage = self._download_webpage(url, video_display_id)
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
release_url = unescapeHTML(release_url)
return self.theplatform_url_result(
release_url, video_id, {
'mbr': 'true',
'switch': 'hls'
})
else:
webpage = self._download_webpage(url, topic_id)
entries = []
for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
video_attributes = extract_attributes(episode_item)
entries.append(self.theplatform_url_result(
video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true',
'switch': 'hls'
}))
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))

View File

@ -7,6 +7,8 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
parse_duration,
unified_strdate,
)
@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
_TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
'info_dict': {
'id': 'manofsteel',
'id': '5111',
'title': 'Man of Steel',
},
'playlist': [
{
@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
'id': 'blackthorn',
},
'playlist_mincount': 2,
'expected_warnings': ['Unable to download JSON metadata'],
}, {
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
'info_dict': {
'id': '15881',
'title': 'Kung Fu Panda 3',
},
'playlist_mincount': 4,
}, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
movie = mobj.group('movie')
uploader_id = mobj.group('company')
webpage = self._download_webpage(url, movie)
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
film_data = self._download_json(
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
film_id, fatal=False)
if film_data:
entries = []
for clip in film_data.get('clips', []):
clip_title = clip['title']
formats = []
for version, version_data in clip.get('versions', {}).items():
for size, size_data in version_data.get('sizes', {}).items():
src = size_data.get('src')
if not src:
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
})
self._sort_formats(formats)
entries.append({
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
'formats': formats,
'title': clip_title,
'thumbnail': clip.get('screen') or clip.get('thumb'),
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
'upload_date': unified_strdate(clip.get('posted')),
'uploader_id': uploader_id,
})
page_data = film_data.get('page', {})
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):

View File

@ -419,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
'info_dict': {
'id': 'PL-013263',
'title': 'Areva & Uramin',
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
},
'playlist_mincount': 6,
}, {

View File

@ -585,6 +585,13 @@ class BrightcoveNewIE(InfoExtractor):
'format_id': build_format_id('rtmp'),
})
formats.append(f)
errors = json_data.get('errors')
if not formats and errors:
error = errors[0]
raise ExtractorError(
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
self._sort_formats(formats)
subtitles = {}

View File

@ -5,6 +5,7 @@ import json
import re
from .common import InfoExtractor
from .facebook import FacebookIE
class BuzzFeedIE(InfoExtractor):
@ -20,11 +21,11 @@ class BuzzFeedIE(InfoExtractor):
'info_dict': {
'id': 'aVCR29aE_OQ',
'ext': 'mp4',
'title': 'Angry Ram destroys a punching bag..',
'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
'upload_date': '20141024',
'uploader_id': 'Buddhanz1',
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
'uploader': 'Buddhanz',
'title': 'Angry Ram destroys a punching bag',
'uploader': 'Angry Ram',
}
}]
}, {
@ -41,13 +42,30 @@ class BuzzFeedIE(InfoExtractor):
'info_dict': {
'id': 'mVmBL8B-In0',
'ext': 'mp4',
'title': 're:Munchkin the Teddy Bear gets her exercise',
'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
'upload_date': '20141124',
'uploader_id': 'CindysMunchkin',
'description': 're:© 2014 Munchkin the',
'uploader': 're:^Munchkin the',
'title': 're:Munchkin the Teddy Bear gets her exercise',
},
}]
}, {
'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
'info_dict': {
'id': 'the-most-adorable-crash-landing-ever',
'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
'description': 'This gosling knows how to stick a landing.',
},
'playlist': [{
'md5': '763ca415512f91ca62e4621086900a23',
'info_dict': {
'id': '971793786185728',
'ext': 'mp4',
'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
'uploader': 'Calgary Outdoor Centre-University of Calgary',
},
}],
'add_ie': ['Facebook'],
}]
def _real_extract(self, url):
@ -66,6 +84,10 @@ class BuzzFeedIE(InfoExtractor):
continue
entries.append(self.url_result(video['url']))
facebook_url = FacebookIE._extract_url(webpage)
if facebook_url:
entries.append(self.url_result(facebook_url))
return {
'_type': 'playlist',
'id': playlist_id,

View File

@ -80,9 +80,6 @@ class CBSInteractiveIE(ThePlatformIE):
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
formats, subtitles = [], {}
if site == 'cnet':
formats, subtitles = self._extract_theplatform_smil(
self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id)
for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue
@ -94,7 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
self._sort_formats(formats)
info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id)
info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
info.update({
'id': video_id,
'display_id': display_id,

View File

@ -749,10 +749,12 @@ class InfoExtractor(object):
return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
if not isinstance(name, (list, tuple)):
name = [name]
if display_name is None:
display_name = name
display_name = name[0]
return self._html_search_regex(
self._meta_regex(name),
[self._meta_regex(n) for n in name],
html, display_name, fatal=fatal, group='content', **kwargs)
def _dc_search_uploader(self, html):
@ -876,7 +878,11 @@ class InfoExtractor(object):
f['ext'] = determine_ext(f['url'])
if isinstance(field_preference, (list, tuple)):
return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
return tuple(
f.get(field)
if f.get(field) is not None
else ('' if field == 'format_id' else -1)
for field in field_preference)
preference = f.get('preference')
if preference is None:
@ -1723,6 +1729,13 @@ class InfoExtractor(object):
def _mark_watched(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
def geo_verification_headers(self):
headers = {}
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
if geo_verification_proxy:
headers['Ytdl-request-proxy'] = geo_verification_proxy
return headers
class SearchInfoExtractor(InfoExtractor):
"""

View File

@ -0,0 +1,30 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class CTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
'info_dict': {
'id': '706966',
'ext': 'mp4',
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
'upload_date': '20150919',
'timestamp': 1442624700,
},
'expected_warnings': ['HTTP Error 404'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'id': video_id,
'url': '9c9media:ctv_web:%s' % video_id,
'ie_key': 'NineCNineMedia',
}

View File

@ -0,0 +1,65 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import orderedSet
class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
'info_dict': {
'id': '901995',
'ext': 'mp4',
'title': 'Extended: \'That person cannot be me\' Johnson says',
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284,
'upload_date': '20160630',
}
}, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
'info_dict':
{
'id': '1.2966224',
},
'playlist_mincount': 19,
}, {
'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
'info_dict':
{
'id': '1.2876780',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True,
}, {
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
'only_matching': True,
}]
def _real_extract(self, url):
page_id = self._match_id(url)
def ninecninemedia_url_result(clip_id):
return {
'_type': 'url_transparent',
'id': clip_id,
'url': '9c9media:ctvnews_web:%s' % clip_id,
'ie_key': 'NineCNineMedia',
}
if page_id.isdigit():
return ninecninemedia_url_result(page_id)
else:
webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
'ot': 'example.AjaxPageLayout.ot',
'maxItemsPerPage': 1000000,
})
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
return self.playlist_result(entries, page_id)

View File

@ -20,7 +20,7 @@ from ..utils import (
class DCNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
def _real_extract(self, url):
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor):
'is_live': is_live,
}
def _extract_video_formats(self, webpage, video_id, entry_protocol):
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
formats = []
m3u8_url = self._html_search_regex(
r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False)
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None))
rtsp_url = self._search_regex(
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
if rtsp_url:
formats.append({
'url': rtsp_url,
'format_id': 'rtsp',
})
format_url_base = 'http' + self._html_search_regex(
[
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
r'<a[^>]+href="rtsp(://[^"]+)"'
], webpage, 'format url')
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
# formats.extend(self._extract_mpd_formats(
# format_url_base + '/manifest.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats(
format_url_base + '/playlist.m3u8', video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
format_url_base + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
return formats
class DCNVideoIE(DCNBaseIE):
IE_NAME = 'dcn:video'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
'info_dict':
{
@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE):
# m3u8 download
'skip_download': True,
},
}
}, {
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE):
class DCNLiveIE(DCNBaseIE):
IE_NAME = 'dcn:live'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
def _real_extract(self, url):
channel_id = self._match_id(url)
@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE):
class DCNSeasonIE(InfoExtractor):
IE_NAME = 'dcn:season'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_TEST = {
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
'info_dict':

View File

@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor):
'skip': 'Georestricted',
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
webpage)
if mobj is not None:
return mobj.group('url')
@staticmethod
def _handle_error(response):
status = int_or_none(response.get('status', 200))

View File

@ -20,7 +20,10 @@ from .adobetv import (
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
from .aenetworks import AENetworksIE
from .aenetworks import (
AENetworksIE,
HistoryTopicIE,
)
from .afreecatv import AfreecaTVIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
@ -168,6 +171,8 @@ from .crunchyroll import (
)
from .cspan import CSpanIE
from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE
from .cwtv import CWTVIE
from .dailymail import DailyMailIE
@ -276,6 +281,7 @@ from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE
from .gameone import (
@ -320,6 +326,10 @@ from .hotnewhiphop import HotNewHipHopIE
from .hotstar import HotStarIE
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
from .hrti import (
HRTiIE,
HRTiPlaylistIE,
)
from .huffpost import HuffPostIE
from .hypem import HypemIE
from .iconosquare import IconosquareIE
@ -422,6 +432,7 @@ from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .meta import METAIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
@ -454,6 +465,7 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE
from .msn import MSNIE
from .mtv import (
MTVIE,
MTVServicesEmbeddedIE,
@ -521,6 +533,7 @@ from .nick import (
NickDeIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE
from .noco import NocoIE
from .normalboots import NormalbootsIE
@ -606,6 +619,7 @@ from .pluralsight import (
PluralsightCourseIE,
)
from .podomatic import PodomaticIE
from .polskieradio import PolskieRadioIE
from .porn91 import Porn91IE
from .pornhd import PornHdIE
from .pornhub import (
@ -704,10 +718,12 @@ from .shahid import ShahidIE
from .shared import SharedIE
from .sharesix import ShareSixIE
from .sina import SinaIE
from .sixplay import SixPlayIE
from .skynewsarabia import (
SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
)
from .skysports import SkySportsIE
from .slideshare import SlideshareIE
from .slutload import SlutloadIE
from .smotri import (
@ -889,6 +905,7 @@ from .udn import UDNEmbedIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .urplay import URPlayIE
from .usatoday import USATodayIE
from .ustream import UstreamIE, UstreamChannelIE
from .ustudio import (
@ -915,6 +932,7 @@ from .vice import (
ViceIE,
ViceShowIE,
)
from .vidbit import VidbitIE
from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
@ -1048,6 +1066,7 @@ from .youtube import (
YoutubeSearchDateIE,
YoutubeSearchIE,
YoutubeSearchURLIE,
YoutubeSharedVideoIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE,

View File

@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
if mobj is not None:
return mobj.group('url')
# Facebook API embed
# see https://developers.facebook.com/docs/plugins/embedded-video-player
mobj = re.search(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage)
if mobj is not None:
return mobj.group('url')
def _login(self):
(useremail, password) = self._get_login_info()
if useremail is None:

View File

@ -0,0 +1,35 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .ooyala import OoyalaIE
class FusionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
'info_dict': {
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
'ext': 'mp4',
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
'duration': 140.0,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://fusion.net/video/201781',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex(
r'data-video-id=(["\'])(?P<code>.+?)\1',
webpage, 'ooyala code', group='code')
return OoyalaIE._build_url_result(ooyala_code)

View File

@ -64,6 +64,9 @@ from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
from .vessel import VesselIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
class GenericIE(InfoExtractor):
@ -920,6 +923,24 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
{
# Kaltura embedded via quoted entry_id
'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
'info_dict': {
'id': '0_utuok90b',
'ext': 'mp4',
'title': '06_matthew_brender_raj_dutt',
'timestamp': 1466638791,
'upload_date': '20160622',
},
'add_ie': ['Kaltura'],
'expected_warnings': [
'Could not send HEAD request'
],
'params': {
'skip_download': True,
}
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@ -1091,12 +1112,17 @@ class GenericIE(InfoExtractor):
# Dailymotion Cloud video
{
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
'md5': '49444254273501a64675a7e68c502681',
'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
'info_dict': {
'id': '5585de919473990de4bee11b',
'id': 'x2uy8t3',
'ext': 'mp4',
'title': 'Le débat',
'title': 'Sauvons les abeilles ! - Le débat',
'description': 'md5:d9082128b1c5277987825d684939ca26',
'thumbnail': 're:^https?://.*\.jpe?g$',
'timestamp': 1434970506,
'upload_date': '20150622',
'uploader': 'Public Sénat',
'uploader_id': 'xa9gza',
}
},
# OnionStudios embed
@ -1220,6 +1246,70 @@ class GenericIE(InfoExtractor):
'uploader': 'www.hudl.com',
},
},
# twitter:player embed
{
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
'md5': 'a3e0df96369831de324f0778e126653c',
'info_dict': {
'id': '4909620399001',
'ext': 'mp4',
'title': 'What Do Black Holes Sound Like?',
'description': 'what do black holes sound like',
'upload_date': '20160524',
'uploader_id': '29913724001',
'timestamp': 1464107587,
'uploader': 'TheAtlantic',
},
'add_ie': ['BrightcoveLegacy'],
},
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
'md5': 'fbcde74f534176ecb015849146dd3aee',
'info_dict': {
'id': '599637780109885',
'ext': 'mp4',
'title': 'Facebook video #599637780109885',
},
},
# Facebook API embed
{
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
'md5': 'a47372ee61b39a7b90287094d447d94e',
'info_dict': {
'id': '10153467542406923',
'ext': 'mp4',
'title': 'Facebook video #10153467542406923',
},
},
# Wordpress "YouTube Video Importer" plugin
{
'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
'md5': 'd16797741b560b485194eddda8121b48',
'info_dict': {
'id': 'HNTXWDXV9Is',
'ext': 'mp4',
'title': 'Blue Devils Drumline Stanford lot 2016',
'upload_date': '20160627',
'uploader_id': 'GENOCIDE8GENERAL10',
'uploader': 'cylus cyrus',
},
},
{
# video stored on custom kaltura server
'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
'md5': '537617d06e64dfed891fa1593c4b30cc',
'info_dict': {
'id': '0_1iotm5bh',
'ext': 'mp4',
'title': 'Elecciones británicas: 5 lecciones para Rajoy',
'description': 'md5:435a89d68b9760b92ce67ed227055f16',
'uploader_id': 'videos.expansion@el-mundo.net',
'upload_date': '20150429',
'timestamp': 1430303472,
},
'add_ie': ['Kaltura'],
},
]
def report_following_redirect(self, new_url):
@ -1576,6 +1666,13 @@ class GenericIE(InfoExtractor):
if matches:
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
# Look for Wordpress "YouTube Video Importer" plugin
matches = re.findall(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
if matches:
return _playlist_from_matches(matches, lambda m: m[-1])
# Look for embedded Dailymotion player
matches = re.findall(
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
@ -1718,10 +1815,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Facebook')
facebook_url = FacebookIE._extract_url(webpage)
if facebook_url is not None:
return self.url_result(facebook_url, 'Facebook')
# Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
@ -1903,18 +1999,14 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
if mobj is not None:
return self.url_result(smuggle_url(
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
{'source_url': url}), 'Kaltura')
kaltura_url = KalturaIE._extract_url(webpage)
if kaltura_url:
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
# Look for Eagle.Platform embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'EaglePlatform')
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
if eagleplatform_url:
return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
# Look for ClipYou (uses Eagle.Platform) embeds
mobj = re.search(
@ -2060,6 +2152,11 @@ class GenericIE(InfoExtractor):
'uploader': video_uploader,
}
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
if embed_url:
return self.url_result(embed_url)
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True

View File

@ -0,0 +1,202 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
parse_age_limit,
sanitized_Request,
try_get,
)
class HRTiBaseIE(InfoExtractor):
"""
Base Information Extractor for Croatian Radiotelevision
video on demand site https://hrti.hrt.hr
Reverse engineered from the JavaScript app in app.min.js
"""
_NETRC_MACHINE = 'hrti'
_APP_LANGUAGE = 'hr'
_APP_VERSION = '1.1'
_APP_PUBLICATION_ID = 'all_in_one'
_API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json'
def _initialize_api(self):
init_data = {
'application_publication_id': self._APP_PUBLICATION_ID
}
uuid = self._download_json(
self._API_URL, None, note='Downloading uuid',
errnote='Unable to download uuid',
data=json.dumps(init_data).encode('utf-8'))['uuid']
app_data = {
'uuid': uuid,
'application_publication_id': self._APP_PUBLICATION_ID,
'application_version': self._APP_VERSION
}
req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
req.get_method = lambda: 'PUT'
resources = self._download_json(
req, None, note='Downloading session information',
errnote='Unable to download session information')
self._session_id = resources['session_id']
modules = resources['modules']
self._search_url = modules['vod_catalog']['resources']['search']['uri'].format(
language=self._APP_LANGUAGE,
application_id=self._APP_PUBLICATION_ID)
self._login_url = (modules['user']['resources']['login']['uri'] +
'/format/json').format(session_id=self._session_id)
self._logout_url = modules['user']['resources']['logout']['uri']
def _login(self):
(username, password) = self._get_login_info()
# TODO: figure out authentication with cookies
if username is None or password is None:
self.raise_login_required()
auth_data = {
'username': username,
'password': password,
}
try:
auth_info = self._download_json(
self._login_url, None, note='Logging in', errnote='Unable to log in',
data=json.dumps(auth_data).encode('utf-8'))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
else:
raise
error_message = auth_info.get('error', {}).get('message')
if error_message:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error_message),
expected=True)
self._token = auth_info['secure_streaming_token']
def _real_initialize(self):
self._initialize_api()
self._login()
class HRTiIE(HRTiBaseIE):
_VALID_URL = r'''(?x)
(?:
hrti:(?P<short_id>[0-9]+)|
https?://
hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
)
'''
_TESTS = [{
'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd',
'info_dict': {
'id': '2181385',
'display_id': 'republika-dokumentarna-serija-16-hd',
'ext': 'mp4',
'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)',
'description': 'md5:48af85f620e8e0e1df4096270568544f',
'duration': 2922,
'view_count': int,
'average_rating': int,
'episode_number': int,
'season_number': int,
'age_limit': 12,
},
'skip': 'Requires account credentials',
}, {
'url': 'https://hrti.hrt.hr/#/video/show/2181385/',
'only_matching': True,
}, {
'url': 'hrti:2181385',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('short_id') or mobj.group('id')
display_id = mobj.group('display_id') or video_id
video = self._download_json(
'%s/video_id/%s/format/json' % (self._search_url, video_id),
display_id, 'Downloading video metadata JSON')['video'][0]
title_info = video['title']
title = title_info['title_long']
movie = video['video_assets']['movie'][0]
m3u8_url = movie['url'].format(TOKEN=self._token)
formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
description = clean_html(title_info.get('summary_long'))
age_limit = parse_age_limit(video.get('parental_control', {}).get('rating'))
view_count = int_or_none(video.get('views'))
average_rating = int_or_none(video.get('user_rating'))
duration = int_or_none(movie.get('duration'))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'average_rating': average_rating,
'age_limit': age_limit,
'formats': formats,
}
class HRTiPlaylistIE(HRTiBaseIE):
_VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
_TESTS = [{
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
'info_dict': {
'id': '212',
'title': 'ekumena',
},
'playlist_mincount': 8,
'skip': 'Requires account credentials',
}, {
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
category_id = mobj.group('id')
display_id = mobj.group('display_id') or category_id
response = self._download_json(
'%s/category_id/%s/format/json' % (self._search_url, category_id),
display_id, 'Downloading video metadata JSON')
video_ids = try_get(
response, lambda x: x['video_listings'][0]['alternatives'][0]['list'],
list) or [video['id'] for video in response.get('videos', []) if video.get('id')]
entries = [self.url_result('hrti:%s' % video_id) for video_id in video_ids]
return self.playlist_result(entries, category_id, display_id)

View File

@ -3,28 +3,22 @@ from __future__ import unicode_literals
import hashlib
import itertools
import math
import os
import random
import re
import time
import uuid
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
)
from ..utils import (
clean_html,
decode_packed_codes,
get_element_by_id,
get_element_by_attribute,
ExtractorError,
ohdave_rsa_encrypt,
remove_start,
sanitized_Request,
urlencode_postdata,
url_basename,
)
@ -171,70 +165,21 @@ class IqiyiIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
'md5': '2cb594dc2781e6c941a110d8f358118b',
# MD5 checksum differs on my machine and Travis CI
'info_dict': {
'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
'ext': 'mp4',
'title': '美国德州空中惊现奇异云团 酷似UFO',
'ext': 'f4v',
}
}, {
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
'md5': '667171934041350c5de3f5015f7f1152',
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb',
'title': '名侦探柯南第752集',
},
'playlist': [{
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}],
'params': {
'skip_download': True,
'ext': 'mp4',
'title': '名侦探柯南 国语版第752集 迫近灰原秘密的黑影 下篇',
},
'skip': 'Geo-restricted to China',
}, {
'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
'only_matching': True,
@ -250,22 +195,10 @@ class IqiyiIE(InfoExtractor):
'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
'info_dict': {
'id': 'f3cf468b39dddb30d676f89a91200dc1',
'ext': 'mp4',
'title': '泰坦尼克号',
},
'playlist': [{
'info_dict': {
'id': 'f3cf468b39dddb30d676f89a91200dc1_part1',
'ext': 'f4v',
'title': '泰坦尼克号',
},
}, {
'info_dict': {
'id': 'f3cf468b39dddb30d676f89a91200dc1_part2',
'ext': 'f4v',
'title': '泰坦尼克号',
},
}],
'expected_warnings': ['Needs a VIP account for full video'],
'skip': 'Geo-restricted to China',
}, {
'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
'info_dict': {
@ -278,20 +211,15 @@ class IqiyiIE(InfoExtractor):
'only_matching': True,
}]
_FORMATS_MAP = [
('1', 'h6'),
('2', 'h5'),
('3', 'h4'),
('4', 'h3'),
('5', 'h2'),
('10', 'h1'),
]
AUTH_API_ERRORS = {
# No preview available (不允许试看鉴权失败)
'Q00505': 'This video requires a VIP account',
# End of preview time (试看结束鉴权失败)
'Q00506': 'Needs a VIP account for full video',
_FORMATS_MAP = {
'96': 1, # 216p, 240p
'1': 2, # 336p, 360p
'2': 3, # 480p, 504p
'21': 4, # 504p
'4': 5, # 720p
'17': 5, # 720p
'5': 6, # 1072p, 1080p
'18': 7, # 1080p
}
def _real_initialize(self):
@ -352,177 +280,23 @@ class IqiyiIE(InfoExtractor):
return True
def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning):
auth_params = {
# version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
'version': '2.0',
'platform': 'b6c13e26323c537d',
'aid': tvid,
def get_raw_data(self, tvid, video_id):
tm = int(time.time() * 1000)
key = 'd5fb4bd9d50c4be6948c97edd7254b0e'
sc = md5_text(compat_str(tm) + key + tvid)
params = {
'tvid': tvid,
'uid': '',
'deviceId': _uuid,
'playType': 'main', # XXX: always main?
'filename': os.path.splitext(url_basename(api_video_url))[0],
}
qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query)
for key, val in qd_items.items():
auth_params[key] = val[0]
auth_req = sanitized_Request(
'http://api.vip.iqiyi.com/services/ckn.action',
urlencode_postdata(auth_params))
# iQiyi server throws HTTP 405 error without the following header
auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
auth_result = self._download_json(
auth_req, video_id,
note='Downloading video authentication JSON',
errnote='Unable to download video authentication JSON')
code = auth_result.get('code')
msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code
if code == 'Q00506':
if do_report_warning:
self.report_warning(msg)
return False
if 'data' not in auth_result:
if msg is not None:
raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True)
raise ExtractorError('Unexpected error from Iqiyi auth API')
return auth_result['data']
def construct_video_urls(self, data, video_id, _uuid, tvid):
def do_xor(x, y):
a = y % 3
if a == 1:
return x ^ 121
if a == 2:
return x ^ 72
return x ^ 103
def get_encode_code(l):
a = 0
b = l.split('-')
c = len(b)
s = ''
for i in range(c - 1, -1, -1):
a = do_xor(int(b[c - i - 1], 16), i)
s += chr(a)
return s[::-1]
def get_path_key(x, format_id, segment_index):
mg = ')(*&^flash@#$%a'
tm = self._download_json(
'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
)['t']
t = str(int(math.floor(int(tm) / (600.0))))
return md5_text(t + mg + x)
video_urls_dict = {}
need_vip_warning_report = True
for format_item in data['vp']['tkl'][0]['vs']:
if 0 < int(format_item['bid']) <= 10:
format_id = self.get_format(format_item['bid'])
else:
continue
video_urls = []
video_urls_info = format_item['fs']
if not format_item['fs'][0]['l'].startswith('/'):
t = get_encode_code(format_item['fs'][0]['l'])
if t.endswith('mp4'):
video_urls_info = format_item['flvs']
for segment_index, segment in enumerate(video_urls_info):
vl = segment['l']
if not vl.startswith('/'):
vl = get_encode_code(vl)
is_vip_video = '/vip/' in vl
filesize = segment['b']
base_url = data['vp']['du'].split('/')
if not is_vip_video:
key = get_path_key(
vl.split('/')[-1].split('.')[0], format_id, segment_index)
base_url.insert(-1, key)
base_url = '/'.join(base_url)
param = {
'su': _uuid,
'qyid': uuid.uuid4().hex,
'client': '',
'z': '',
'bt': '',
'ct': '',
'tn': str(int(time.time()))
}
api_video_url = base_url + vl
if is_vip_video:
api_video_url = api_video_url.replace('.f4v', '.hml')
auth_result = self._authenticate_vip_video(
api_video_url, video_id, tvid, _uuid, need_vip_warning_report)
if auth_result is False:
need_vip_warning_report = False
break
param.update({
't': auth_result['t'],
# cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
'cid': 'afbe8fd3d73448c9',
'vid': video_id,
'QY00001': auth_result['u'],
})
api_video_url += '?' if '?' not in api_video_url else '&'
api_video_url += compat_urllib_parse_urlencode(param)
js = self._download_json(
api_video_url, video_id,
note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
video_url = js['l']
video_urls.append(
(video_url, filesize))
video_urls_dict[format_id] = video_urls
return video_urls_dict
def get_format(self, bid):
matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
return matched_format_ids[0] if len(matched_format_ids) else None
def get_bid(self, format_id):
matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
return matched_bids[0] if len(matched_bids) else None
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
tm = str(int(time.time()))
tail = tm + tvid
param = {
'key': 'fvip',
'src': md5_text('youtube-dl'),
'tvId': tvid,
'vid': video_id,
'vinfo': 1,
'tm': tm,
'enc': md5_text(enc_key + tail),
'qyid': _uuid,
'tn': random.random(),
# In iQiyi's flash player, um is set to 1 if there's a logged user
# Some 1080P formats are only available with a logged user.
# Here force um=1 to trick the iQiyi server
'um': 1,
'authkey': md5_text(md5_text('') + tail),
'k_tag': 1,
'src': '76f90cbd92f94a2e925d83e8ccd22cb7',
'sc': sc,
't': tm,
}
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
compat_urllib_parse_urlencode(param)
raw_data = self._download_json(api_url, video_id)
return raw_data
def get_enc_key(self, video_id):
# TODO: automatic key extraction
# last update at 2016-01-22 for Zombie::bite
enc_key = '4a1caba4b4465345366f28da7c117d20'
return enc_key
return self._download_json(
'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id),
video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='),
query=params, headers=self.geo_verification_headers())
def _extract_playlist(self, webpage):
PAGE_SIZE = 50
@ -571,58 +345,41 @@ class IqiyiIE(InfoExtractor):
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
video_id = self._search_regex(
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
_uuid = uuid.uuid4().hex
enc_key = self.get_enc_key(video_id)
formats = []
for _ in range(5):
raw_data = self.get_raw_data(tvid, video_id)
raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
if raw_data['code'] != 'A00000':
if raw_data['code'] == 'A00111':
self.raise_geo_restricted()
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
if raw_data['code'] != 'A000000':
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
data = raw_data['data']
data = raw_data['data']
for stream in data['vidl']:
if 'm3utx' not in stream:
continue
vd = compat_str(stream['vd'])
formats.append({
'url': stream['m3utx'],
'format_id': vd,
'ext': 'mp4',
'preference': self._FORMATS_MAP.get(vd, -1),
'protocol': 'm3u8_native',
})
title = data['vi']['vn']
if formats:
break
# generate video_urls_dict
video_urls_dict = self.construct_video_urls(
data, video_id, _uuid, tvid)
self._sleep(5, video_id)
# construct info
entries = []
for format_id in video_urls_dict:
video_urls = video_urls_dict[format_id]
for i, video_url_info in enumerate(video_urls):
if len(entries) < i + 1:
entries.append({'formats': []})
entries[i]['formats'].append(
{
'url': video_url_info[0],
'filesize': video_url_info[-1],
'format_id': format_id,
'preference': int(self.get_bid(format_id))
}
)
self._sort_formats(formats)
title = (get_element_by_id('widget-videotitle', webpage) or
clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)))
for i in range(len(entries)):
self._sort_formats(entries[i]['formats'])
entries[i].update(
{
'id': '%s_part%d' % (video_id, i + 1),
'title': title,
}
)
if len(entries) > 1:
info = {
'_type': 'multi_video',
'id': video_id,
'title': title,
'entries': entries,
}
else:
info = entries[0]
info['id'] = video_id
info['title'] = title
return info
return {
'id': video_id,
'title': title,
'formats': formats,
}

View File

@ -6,7 +6,6 @@ import base64
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse,
compat_parse_qs,
)
@ -15,6 +14,7 @@ from ..utils import (
ExtractorError,
int_or_none,
unsmuggle_url,
smuggle_url,
)
@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor):
)(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
)
'''
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
_SERVICE_URL = 'http://cdnapi.kaltura.com'
_SERVICE_BASE = '/api_v3/index.php'
_TESTS = [
{
'url': 'kaltura:269692:1_1jc2y3e4',
@ -64,16 +65,50 @@ class KalturaIE(InfoExtractor):
}
]
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
@staticmethod
def _extract_url(webpage):
mobj = (
re.search(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
""", webpage) or
re.search(
r'''(?xs)
(?P<q1>["\'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
(?P=q1).*?
(?:
entry_?[Ii]d|
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
)\s*:\s*
(?P<q3>["\'])(?P<id>.+?)(?P=q3)
''', webpage))
if mobj:
embed_info = mobj.groupdict()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
service_url = re.search(
r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage)
if service_url:
url = smuggle_url(url, {'service_url': service_url.group(1)})
return url
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0]
if len(actions) > 1:
for i, a in enumerate(actions[1:], start=1):
for k, v in a.items():
params['%d:%s' % (i, k)] = v
query = compat_urllib_parse_urlencode(params)
url = self._API_BASE + query
data = self._download_json(url, video_id, *args, **kwargs)
data = self._download_json(
(service_url or self._SERVICE_URL) + self._SERVICE_BASE,
video_id, query=params, *args, **kwargs)
status = data if len(actions) == 1 else data[0]
if status.get('objectType') == 'KalturaAPIException':
@ -82,7 +117,7 @@ class KalturaIE(InfoExtractor):
return data
def _get_kaltura_signature(self, video_id, partner_id):
def _get_kaltura_signature(self, video_id, partner_id, service_url=None):
actions = [{
'apiVersion': '3.1',
'expiry': 86400,
@ -92,10 +127,10 @@ class KalturaIE(InfoExtractor):
'widgetId': '_%s' % partner_id,
}]
return self._kaltura_api_call(
video_id, actions, note='Downloading Kaltura signature')['ks']
video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
def _get_video_info(self, video_id, partner_id):
signature = self._get_kaltura_signature(video_id, partner_id)
def _get_video_info(self, video_id, partner_id, service_url=None):
signature = self._get_kaltura_signature(video_id, partner_id, service_url)
actions = [
{
'action': 'null',
@ -118,7 +153,7 @@ class KalturaIE(InfoExtractor):
},
]
return self._kaltura_api_call(
video_id, actions, note='Downloading video info JSON')
video_id, actions, service_url, note='Downloading video info JSON')
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@ -127,7 +162,7 @@ class KalturaIE(InfoExtractor):
partner_id, entry_id = mobj.group('partner_id', 'id')
ks = None
if partner_id and entry_id:
info, flavor_assets = self._get_video_info(entry_id, partner_id)
info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
else:
path, query = mobj.group('path', 'query')
if not path and not query:
@ -175,12 +210,17 @@ class KalturaIE(InfoExtractor):
unsigned_url += '?referrer=%s' % referrer
return unsigned_url
data_url = info['dataUrl']
if '/flvclipper/' in data_url:
data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
formats = []
for f in flavor_assets:
# Continue if asset is not ready
if f['status'] != 2:
continue
video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id']))
video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'),
@ -193,9 +233,12 @@ class KalturaIE(InfoExtractor):
'width': int_or_none(f.get('width')),
'url': video_url,
})
m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp'))
formats.extend(self._extract_m3u8_formats(
m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
if '/playManifest/' in data_url:
m3u8_url = sign_url(data_url.replace(
'format/url', 'format/applehttp'))
formats.extend(self._extract_m3u8_formats(
m3u8_url, entry_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._check_formats(formats, entry_id)
self._sort_formats(formats)

View File

@ -26,11 +26,6 @@ class KuwoBaseIE(InfoExtractor):
def _get_formats(self, song_id, tolerate_ip_deny=False):
formats = []
for file_format in self._FORMATS:
headers = {}
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
headers['Ytdl-request-proxy'] = cn_verification_proxy
query = {
'format': file_format['ext'],
'br': file_format.get('br', ''),
@ -42,7 +37,7 @@ class KuwoBaseIE(InfoExtractor):
song_url = self._download_webpage(
'http://antiserver.kuwo.cn/anti.s',
song_id, note='Download %s url info' % file_format['format'],
query=query, headers=headers,
query=query, headers=self.geo_verification_headers(),
)
if song_url == 'IPDeny' and not tolerate_ip_deny:

View File

@ -1,60 +1,65 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
js_to_json,
smuggle_url,
)
class LA7IE(InfoExtractor):
IE_NAME = 'la7.tv'
_VALID_URL = r'''(?x)
https?://(?:www\.)?la7\.tv/
(?:
richplayer/\?assetid=|
\?contentId=
)
(?P<id>[0-9]+)'''
IE_NAME = 'la7.it'
_VALID_URL = r'''(?x)(https?://)?(?:
(?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
tg\.la7\.it/repliche-tgla7\?id=
)(?P<id>.+)'''
_TEST = {
'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
_TESTS = [{
# 'src' is a plain URL
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
'info_dict': {
'id': '50355319',
'id': 'inccool8-02-10-2015-163722',
'ext': 'mp4',
'title': 'IL DIVO',
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
'duration': 6254,
'title': 'Inc.Cool8',
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
'thumbnail': 're:^https?://.*',
'uploader_id': 'kdla7pillole@iltrovatore.it',
'timestamp': 1443814869,
'upload_date': '20151002',
},
'skip': 'Blocked in the US',
}
}, {
# 'src' is a dictionary
'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
'md5': '6b0d8888d286e39870208dfeceaf456b',
'info_dict': {
'id': '189080',
'ext': 'mp4',
'title': 'TG LA7',
},
}, {
'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
doc = self._download_xml(xml_url, video_id)
video_title = doc.find('title').text
description = doc.find('description').text
duration = parse_duration(doc.find('duration').text)
thumbnail = doc.find('img').text
view_count = int(doc.find('views').text)
webpage = self._download_webpage(url, video_id)
prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:')
formats = [{
'format': vnode.find('quality').text,
'tbr': int(vnode.find('quality').text),
'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
} for vnode in doc.findall('.//videos/video')]
self._sort_formats(formats)
player_data = self._parse_json(
self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
video_id, transform_source=js_to_json)
return {
'_type': 'url_transparent',
'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
'service_url': 'http://kdam.iltrovatore.it',
}),
'id': video_id,
'title': video_title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'view_count': view_count,
'title': player_data['title'],
'description': self._og_search_description(webpage, default=None),
'thumbnail': player_data.get('poster'),
'ie_key': 'Kaltura',
}

View File

@ -20,9 +20,9 @@ from ..utils import (
int_or_none,
orderedSet,
parse_iso8601,
sanitized_Request,
str_or_none,
url_basename,
urshift,
)
@ -74,15 +74,11 @@ class LeIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
def ror(self, param1, param2):
_loc3_ = 0
while _loc3_ < param2:
param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
param1 = urshift(param1, 1) + ((param1 & 1) << 31)
_loc3_ += 1
return param1
@ -124,16 +120,11 @@ class LeIE(InfoExtractor):
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.le.com'
}
play_json_req = sanitized_Request(
'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse_urlencode(params)
)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
play_json = self._download_json(
play_json_req,
media_id, 'Downloading playJson data')
'http://api.le.com/mms/out/video/playJson',
media_id, 'Downloading playJson data', query=params,
headers=self.geo_verification_headers())
# Check for errors
playstatus = play_json['playstatus']

View File

@ -1,8 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -23,34 +21,5 @@ class M6IE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
'Downloading video RSS')
title = rss.find('./channel/item/title').text
description = rss.find('./channel/item/description').text
thumbnail = rss.find('./channel/item/visuel_clip_big').text
duration = int(rss.find('./channel/item/duration').text)
view_count = int(rss.find('./channel/item/nombre_vues').text)
formats = []
for format_id in ['lq', 'sd', 'hq', 'hd']:
video_url = rss.find('./channel/item/url_video_%s' % format_id)
if video_url is None:
continue
formats.append({
'url': video_url.text,
'format_id': format_id,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'formats': formats,
}
video_id = self._match_id(url)
return self.url_result('6play:%s' % video_id, 'SixPlay', video_id)

View File

@ -0,0 +1,73 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .pladform import PladformIE
from ..utils import (
unescapeHTML,
int_or_none,
ExtractorError,
)
class METAIE(InfoExtractor):
_VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://video.meta.ua/5502115.video',
'md5': '71b6f3ee274bef16f1ab410f7f56b476',
'info_dict': {
'id': '5502115',
'ext': 'mp4',
'title': 'Sony Xperia Z camera test [HQ]',
'description': 'Xperia Z shoots video in FullHD HDR.',
'uploader_id': 'nomobile',
'uploader': 'CHЁZA.TV',
'upload_date': '20130211',
},
'add_ie': ['Youtube'],
}, {
'url': 'http://video.meta.ua/iframe/5502115',
'only_matching': True,
}, {
# pladform embed
'url': 'http://video.meta.ua/7121015.video',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
st_html5 = self._search_regex(
r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None)
if st_html5:
# uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js
json_str = ''
for i in range(0, len(st_html5), 3):
json_str += '&#x0%s;' % st_html5[i:i + 3]
uppod_data = self._parse_json(unescapeHTML(json_str), video_id)
error = uppod_data.get('customnotfound')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
video_url = uppod_data['file']
info = {
'id': video_id,
'url': video_url,
'title': uppod_data.get('comment') or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage),
'duration': int_or_none(self._og_search_property(
'video:duration', webpage, default=None)),
}
if 'youtube.com/' in video_url:
info.update({
'_type': 'url_transparent',
'ie_key': 'Youtube',
})
return info
pladform_url = PladformIE._extract_url(webpage)
if pladform_url:
return self.url_result(pladform_url)

View File

@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor):
description = self._og_search_description(webpage)
like_count = parse_count(self._search_regex(
r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
webpage, 'like count', fatal=False))
webpage, 'like count', default=None))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False))
webpage, 'play count', default=None))
return {
'id': track_id,

122
youtube_dl/extractor/msn.py Normal file
View File

@ -0,0 +1,122 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
unescapeHTML,
)
class MSNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE',
'md5': '8442f66c116cbab1ff7098f986983458',
'info_dict': {
'id': 'BBqQYNE',
'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message',
'ext': 'mp4',
'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message',
'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25',
'duration': 104,
'uploader': 'CBS Entertainment',
'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v',
},
}, {
'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
'only_matching': True,
}, {
'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
'only_matching': True,
}, {
# geo restricted
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
'only_matching': True,
}, {
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-raped-woman-comment/vi-AAhvzW6',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
video = self._parse_json(
self._search_regex(
r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
webpage, 'video data', default='{}', group='data'),
display_id, transform_source=unescapeHTML)
if not video:
error = unescapeHTML(self._search_regex(
r'data-error=(["\'])(?P<error>.+?)\1',
webpage, 'error', group='error'))
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
title = video['title']
formats = []
for file_ in video.get('videoFiles', []):
format_url = file_.get('url')
if not format_url:
continue
ext = determine_ext(format_url)
# .ism is not yet supported (see
# https://github.com/rg3/youtube-dl/issues/8118)
if ext == 'ism':
continue
if 'm3u8' in format_url:
# m3u8_native should not be used here until
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
m3u8_formats = self._extract_m3u8_formats(
format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False)
# Despite metadata in m3u8 all video+audio formats are
# actually video-only (no audio)
for f in m3u8_formats:
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
f['acodec'] = 'none'
formats.extend(m3u8_formats)
else:
formats.append({
'url': format_url,
'ext': 'mp4',
'format_id': 'http',
'width': int_or_none(file_.get('width')),
'height': int_or_none(file_.get('height')),
})
self._sort_formats(formats)
subtitles = {}
for file_ in video.get('files', []):
format_url = file_.get('url')
format_code = file_.get('formatCode')
if not format_url or not format_code:
continue
if compat_str(format_code) == '3100':
subtitles.setdefault(file_.get('culture', 'en'), []).append({
'ext': determine_ext(format_url, 'ttml'),
'url': format_url,
})
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': video.get('description'),
'thumbnail': video.get('headlineImage', {}).get('url'),
'duration': int_or_none(video.get('durationSecs')),
'uploader': video.get('sourceFriendly'),
'uploader_id': video.get('providerId'),
'creator': video.get('creator'),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -1,6 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .theplatform import ThePlatformIE
from ..utils import (
smuggle_url,
url_basename,
@ -61,7 +62,7 @@ class NationalGeographicIE(InfoExtractor):
}
class NationalGeographicChannelIE(InfoExtractor):
class NationalGeographicChannelIE(ThePlatformIE):
IE_NAME = 'natgeo:channel'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
@ -102,12 +103,22 @@ class NationalGeographicChannelIE(InfoExtractor):
release_url = self._search_regex(
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
webpage, 'release url')
query = {
'mbr': 'true',
'switch': 'http',
}
is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
if is_auth == 'auth':
auth_resource_id = self._search_regex(
r"video_auth_resourceId\s*=\s*'([^']+)'",
webpage, 'auth resource id')
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or ''
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}),
update_url_query(release_url, query),
{'force_smil_url': True}),
'display_id': display_id,
}

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
parse_duration,
ExtractorError
)
class NineCNineMediaIE(InfoExtractor):
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
def _real_extract(self, url):
destination_code, video_id = re.match(self._VALID_URL, url).groups()
api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
content = self._download_json(api_base_url, video_id, query={
'$include': '[contentpackages]',
})
title = content['Name']
if len(content['ContentPackages']) > 1:
raise ExtractorError('multiple content packages')
content_package = content['ContentPackages'][0]
stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
stacks = self._download_json(stacks_base_url, video_id)['Items']
if len(stacks) > 1:
raise ExtractorError('multiple stacks')
stack = stacks[0]
stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
formats = []
formats.extend(self._extract_m3u8_formats(
stack_base_url + 'm3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
stack_base_url + 'f4m', video_id,
f4m_id='hds', fatal=False))
mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
if mp4_url:
formats.append({
'url': mp4_url,
'format_id': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': content.get('Desc') or content.get('ShortDesc'),
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
'duration': parse_duration(content.get('BroadcastTime')),
'formats': formats,
}

View File

@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
float_or_none,
)
@ -15,15 +16,14 @@ class OnionStudiosIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': 'd4851405d31adfadf71cd7a487b765bb',
'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
'info_dict': {
'id': '2937',
'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail',
'description': 'md5:e786add7f280b7f0fe237b64cc73df76',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'The A.V. Club',
'uploader_id': 'TheAVClub',
'uploader_id': 'the-av-club',
},
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
@ -40,50 +40,39 @@ class OnionStudiosIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.onionstudios.com/embed?id=%s' % video_id, video_id)
video_data = self._download_json(
'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
title = video_data['title']
formats = []
for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
ext = determine_ext(src)
if ext == 'm3u8':
for source in video_data.get('sources', []):
source_url = source.get('url')
if not source_url:
continue
content_type = source.get('content_type')
ext = determine_ext(source_url)
if content_type == 'application/x-mpegURL' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
height = int_or_none(self._search_regex(
r'/(\d+)\.%s' % ext, src, 'height', default=None))
tbr = int_or_none(source.get('bitrate'))
formats.append({
'format_id': ext + ('-%sp' % height if height else ''),
'url': src,
'height': height,
'format_id': ext + ('-%d' % tbr if tbr else ''),
'url': source_url,
'width': int_or_none(source.get('width')),
'tbr': tbr,
'ext': ext,
'preference': 1,
})
self._sort_formats(formats)
title = self._search_regex(
r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
webpage, 'title', group='title')
description = self._search_regex(
r'share_description\s*=\s*(["\'])(?P<description>[^\'"]+?)\1',
webpage, 'description', default=None, group='description')
thumbnail = self._search_regex(
r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
webpage, 'thumbnail', default=False, group='thumbnail')
uploader_id = self._search_regex(
r'twitter_handle\s*=\s*(["\'])(?P<uploader_id>[^\1]+?)\1',
webpage, 'uploader id', fatal=False, group='uploader_id')
uploader = self._search_regex(
r'window\.channelName\s*=\s*(["\'])Embedded:(?P<uploader>[^\1]+?)\1',
webpage, 'uploader', default=False, group='uploader')
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': video_data.get('poster_url'),
'uploader': video_data.get('channel_name'),
'uploader_id': video_data.get('channel_slug'),
'duration': float_or_none(video_data.get('duration', 1000)),
'tags': video_data.get('tags'),
'formats': formats,
}

View File

@ -516,9 +516,14 @@ class PBSIE(InfoExtractor):
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
continue
f_url = re.sub(r'\d+k|baseline', bitrate, http_url)
# This may produce invalid links sometimes (e.g.
# http://www.pbs.org/wgbh/frontline/film/suicide-plan)
if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate):
continue
f = m3u8_format.copy()
f.update({
'url': re.sub(r'\d+k|baseline', bitrate, http_url),
'url': f_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})

View File

@ -120,9 +120,12 @@ class PeriscopeUserIE(InfoExtractor):
title = user.get('display_name') or user.get('username')
description = user.get('description')
broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or
data_store.get('BroadcastCache', {}).get('broadcastIds', []))
entries = [
self.url_result(
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id))
for broadcast_id in broadcast_ids]
return self.playlist_result(entries, user_id, title, description)

View File

@ -49,7 +49,7 @@ class PladformIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage)
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
if mobj:
return mobj.group('url')

View File

@ -0,0 +1,95 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
int_or_none,
strip_or_none,
unified_timestamp,
)
class PolskieRadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie',
'info_dict': {
'id': '1587943',
'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
},
'playlist': [{
'md5': '2984ee6ce9046d91fc233bc1a864a09a',
'info_dict': {
'id': '1540576',
'ext': 'mp3',
'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
'timestamp': 1456594200,
'upload_date': '20160227',
'duration': 2364,
},
}],
}, {
'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal',
'info_dict': {
'id': '1635803',
'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał',
'description': 'md5:01cb7d0cad58664095d72b51a1ebada2',
},
'playlist_mincount': 12,
}, {
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
'only_matching': True,
}, {
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943',
'only_matching': True,
}, {
# with mp4 video
'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
content = self._search_regex(
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
webpage, 'content')
timestamp = unified_timestamp(self._html_search_regex(
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
webpage, 'timestamp', fatal=False))
entries = []
media_urls = set()
for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content):
media = self._parse_json(data_media, playlist_id, fatal=False)
if not media.get('file') or not media.get('desc'):
continue
media_url = self._proto_relative_url(media['file'], 'http:')
if media_url in media_urls:
continue
media_urls.add(media_url)
entries.append({
'id': compat_str(media['id']),
'url': media_url,
'title': compat_urllib_parse_unquote(media['desc']),
'duration': int_or_none(media.get('length')),
'vcodec': 'none' if media.get('provider') == 'audio' else None,
'timestamp': timestamp,
})
title = self._og_search_title(webpage).strip()
description = strip_or_none(self._og_search_description(webpage))
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -25,7 +25,15 @@ from ..aes import (
class PornHubIE(InfoExtractor):
_VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)'
IE_DESC = 'PornHub and Thumbzilla'
_VALID_URL = r'''(?x)
https?://
(?:
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[0-9a-z]+)
'''
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': '1e19b41231a02eba417839222ac9d58e',
@ -63,8 +71,24 @@ class PornHubIE(InfoExtractor):
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
}, {
# removed at the request of cam4.com
'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
'only_matching': True,
}, {
# removed at the request of the copyright owner
'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
'only_matching': True,
}, {
# removed by uploader
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
'only_matching': True,
}, {
# private video
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
'only_matching': True,
}, {
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
'only_matching': True,
}]
@classmethod
@ -87,8 +111,8 @@ class PornHubIE(InfoExtractor):
webpage = self._download_webpage(req, video_id)
error_msg = self._html_search_regex(
r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>',
webpage, 'error message', default=None)
r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>',
webpage, 'error message', default=None, group='error')
if error_msg:
error_msg = re.sub(r'\s+', ' ', error_msg)
raise ExtractorError(

View File

@ -5,7 +5,7 @@ import re
from hashlib import sha1
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
@ -71,6 +71,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
@ -86,6 +87,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
@ -101,6 +103,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
@ -116,6 +119,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
@ -131,6 +135,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
@ -227,70 +232,42 @@ class ProSiebenSat1IE(InfoExtractor):
]
def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
clip_id = self._html_search_regex(
self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'prosieben'
client_name = 'kolibri-2.0.19-splec4'
client_location = url
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({
'access_token': access_token,
'client_location': client_location,
'client_name': client_name,
'ids': clip_id,
})
video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0]
video = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos',
clip_id, 'Downloading videos JSON', query={
'access_token': access_token,
'client_location': client_location,
'client_name': client_name,
'ids': clip_id,
})[0]
if video.get('is_protected') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
duration = float_or_none(video.get('duration'))
source_ids = [source['id'] for source in video['sources']]
source_ids_str = ','.join(map(str, source_ids))
source_ids = [compat_str(source['id']) for source in video['sources']]
g = '01!8d8F_)r9]4s[qeuXfP%'
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest()
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name])
.encode('utf-8')).hexdigest()
sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
}))
sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON')
sources = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
clip_id, 'Downloading sources JSON', query={
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
})
server_id = sources['server_id']
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id,
client_location, source_ids_str, g, client_name])
.encode('utf-8')).hexdigest()
url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
'server_id': server_id,
'source_ids': source_ids_str,
}))
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
formats = []
urls_sources = urls['sources']
if isinstance(urls_sources, dict):
urls_sources = urls_sources.values()
def fix_bitrate(bitrate):
bitrate = int_or_none(bitrate)
@ -298,37 +275,73 @@ class ProSiebenSat1IE(InfoExtractor):
return None
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source in urls_sources:
protocol = source['protocol']
source_url = source['url']
if protocol == 'rtmp' or protocol == 'rtmpe':
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
if not mobj:
formats = []
for source_id in source_ids:
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest()
urls = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
clip_id, 'Downloading urls JSON', fatal=False, query={
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
'server_id': server_id,
'source_ids': source_id,
})
if not urls:
continue
if urls.get('status_code') != 0:
raise ExtractorError('This video is unavailable', expected=True)
urls_sources = urls['sources']
if isinstance(urls_sources, dict):
urls_sources = urls_sources.values()
for source in urls_sources:
source_url = source.get('url')
if not source_url:
continue
path = mobj.group('path')
mp4colon_index = path.rfind('mp4:')
app = path[:mp4colon_index]
play_path = path[mp4colon_index:]
formats.append({
'url': '%s/%s' % (mobj.group('url'), app),
'app': app,
'play_path': play_path,
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
'page_url': 'http://www.prosieben.de',
'vbr': fix_bitrate(source['bitrate']),
'ext': 'mp4',
'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
})
elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
formats.extend(self._extract_f4m_formats(source_url, clip_id))
else:
formats.append({
'url': source_url,
'vbr': fix_bitrate(source['bitrate']),
})
protocol = source.get('protocol')
mimetype = source.get('mimetype')
if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
formats.extend(self._extract_f4m_formats(
source_url, clip_id, f4m_id='hds', fatal=False))
elif mimetype == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
tbr = fix_bitrate(source['bitrate'])
if protocol in ('rtmp', 'rtmpe'):
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
if not mobj:
continue
path = mobj.group('path')
mp4colon_index = path.rfind('mp4:')
app = path[:mp4colon_index]
play_path = path[mp4colon_index:]
formats.append({
'url': '%s/%s' % (mobj.group('url'), app),
'app': app,
'play_path': play_path,
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
'page_url': 'http://www.prosieben.de',
'tbr': tbr,
'ext': 'flv',
'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
})
else:
formats.append({
'url': source_url,
'tbr': tbr,
'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
})
self._sort_formats(formats)
description = self._html_search_regex(
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
return {
'id': clip_id,
'title': title,

View File

@ -1,47 +1,141 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
determine_ext,
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
int_or_none,
parse_duration,
unified_strdate,
int_or_none,
update_url_query,
xpath_text,
)
class RaiTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
class RaiBaseIE(InfoExtractor):
def _extract_relinker_formats(self, relinker_url, video_id):
formats = []
for platform in ('mon', 'flash', 'native'):
relinker = self._download_xml(
relinker_url, video_id,
note='Downloading XML metadata for platform %s' % platform,
transform_source=fix_xml_ampersands,
query={'output': 45, 'pl': platform},
headers=self.geo_verification_headers())
media_url = find_xpath_attr(relinker, './url', 'type', 'content').text
if media_url == 'http://download.rai.it/video_no_available.mp4':
self.raise_geo_restricted()
ext = determine_ext(media_url)
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
continue
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
manifest_url = update_url_query(
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
formats.extend(self._extract_f4m_formats(
manifest_url, video_id, f4m_id='hds', fatal=False))
else:
bitrate = int_or_none(xpath_text(relinker, 'bitrate'))
formats.append({
'url': media_url,
'tbr': bitrate if bitrate > 0 else None,
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
})
return formats
def _extract_from_content_id(self, content_id, base_url):
media = self._download_json(
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
content_id, 'Downloading video JSON')
thumbnails = []
for image_type in ('image', 'image_medium', 'image_300'):
thumbnail_url = media.get(image_type)
if thumbnail_url:
thumbnails.append({
'url': compat_urlparse.urljoin(base_url, thumbnail_url),
})
formats = []
media_type = media['type']
if 'Audio' in media_type:
formats.append({
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
})
elif 'Video' in media_type:
formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id))
self._sort_formats(formats)
else:
raise ExtractorError('not a media file')
subtitles = {}
captions = media.get('subtitlesUrl')
if captions:
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = [{
'ext': 'srt',
'url': captions,
}]
return {
'id': content_id,
'title': media['name'],
'description': media.get('desc'),
'thumbnails': thumbnails,
'uploader': media.get('author'),
'upload_date': unified_strdate(media.get('date')),
'duration': parse_duration(media.get('length')),
'formats': formats,
'subtitles': subtitles,
}
class RaiTVIE(RaiBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
_TESTS = [
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '96382709b61dd64a6b88e0f791e6df4c',
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': {
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'flv',
'ext': 'mp4',
'title': 'Report del 07/04/2014',
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
'upload_date': '20140407',
'duration': 6160,
'thumbnail': 're:^https?://.*\.jpg$',
}
},
{
# no m3u8 stream
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
'md5': 'd9751b78eac9710d62c2447b224dea39',
# HDS download, MD5 is unstable
'info_dict': {
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
'ext': 'flv',
'title': 'TG PRIMO TEMPO',
'upload_date': '20140612',
'duration': 1758,
'thumbnail': 're:^https?://.*\.jpg$',
},
'skip': 'Geo-restricted to Italy',
},
{
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
@ -67,127 +161,70 @@ class RaiTVIE(InfoExtractor):
},
{
'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
'md5': '496ab63e420574447f70d02578333437',
'md5': 'e57493e1cb8bc7c564663f363b171847',
'info_dict': {
'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
'ext': 'flv',
'ext': 'mp4',
'title': 'Il Candidato - Primo episodio: "Le Primarie"',
'description': 'md5:364b604f7db50594678f483353164fb8',
'upload_date': '20140923',
'duration': 386,
'thumbnail': 're:^https?://.*\.jpg$',
}
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
media = self._download_json(
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % video_id,
video_id, 'Downloading video JSON')
thumbnails = []
for image_type in ('image', 'image_medium', 'image_300'):
thumbnail_url = media.get(image_type)
if thumbnail_url:
thumbnails.append({
'url': thumbnail_url,
})
subtitles = []
formats = []
media_type = media['type']
if 'Audio' in media_type:
formats.append({
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
})
elif 'Video' in media_type:
def fix_xml(xml):
return xml.replace(' tag elementi', '').replace('>/', '</')
relinker = self._download_xml(
media['mediaUri'] + '&output=43',
video_id, transform_source=fix_xml)
has_subtitle = False
for element in relinker.findall('element'):
media_url = xpath_text(element, 'url')
ext = determine_ext(media_url)
content_type = xpath_text(element, 'content-type')
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
media_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
video_id, f4m_id='hds', fatal=False))
elif ext == 'stl':
has_subtitle = True
elif content_type.startswith('video/'):
bitrate = int_or_none(xpath_text(element, 'bitrate'))
formats.append({
'url': media_url,
'tbr': bitrate if bitrate > 0 else None,
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
})
elif content_type.startswith('image/'):
thumbnails.append({
'url': media_url,
})
self._sort_formats(formats)
if has_subtitle:
webpage = self._download_webpage(url, video_id)
subtitles = self._get_subtitles(video_id, webpage)
else:
raise ExtractorError('not a media file')
return {
'id': video_id,
'title': media['name'],
'description': media.get('desc'),
'thumbnails': thumbnails,
'uploader': media.get('author'),
'upload_date': unified_strdate(media.get('date')),
'duration': parse_duration(media.get('length')),
'formats': formats,
'subtitles': subtitles,
}
def _get_subtitles(self, video_id, webpage):
subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m:
captions = m.group('captions')
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = [{
'ext': 'srt',
'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
}]
return subtitles
return self._extract_from_content_id(video_id, url)
class RaiIE(InfoExtractor):
class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
_TESTS = [
{
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': 'e0e7a8a131e249d1aa0ebf270d1d8db7',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'flv',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'upload_date': '20141221',
},
}
},
{
# Direct relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
# HDS live stream, MD5 is unstable
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'skip': 'Geo-restricted to Italy',
},
{
# Embedded content item ID
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
'md5': '84c1135ce960e8822ae63cec34441d63',
'info_dict': {
'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8',
'ext': 'mp4',
'title': 'TG1 ore 20:00 del 02/07/2016',
'upload_date': '20160702',
},
},
{
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
# HDS live stream, MD5 is unstable
'info_dict': {
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
'ext': 'flv',
'title': 'La diretta di Rainews24',
},
},
]
@classmethod
@ -201,7 +238,30 @@ class RaiIE(InfoExtractor):
iframe_url = self._search_regex(
[r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
r'drawMediaRaiTV\(["\'](.+?)["\']'],
webpage, 'iframe')
if not iframe_url.startswith('http'):
iframe_url = compat_urlparse.urljoin(url, iframe_url)
return self.url_result(iframe_url)
webpage, 'iframe', default=None)
if iframe_url:
if not iframe_url.startswith('http'):
iframe_url = compat_urlparse.urljoin(url, iframe_url)
return self.url_result(iframe_url)
content_item_id = self._search_regex(
r'initEdizione\((?P<q1>[\'"])ContentItem-(?P<content_id>[^\'"]+)(?P=q1)',
webpage, 'content item ID', group='content_id', default=None)
if content_item_id:
return self._extract_from_content_id(content_item_id, url)
relinker_url = compat_urlparse.urljoin(url, self._search_regex(
r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P<q1>[\'"])(?P<url>(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)',
webpage, 'relinker URL', group='url'))
formats = self._extract_relinker_formats(relinker_url, video_id)
self._sort_formats(formats)
title = self._search_regex(
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
webpage, 'title', group='title', default=None) or self._og_search_title(webpage)
return {
'id': video_id,
'title': title,
'formats': formats,
}

View File

@ -1,23 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
js_to_json,
)
from ..compat import compat_str
class RDSIE(InfoExtractor):
IE_DESC = 'RDS.ca'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
_TESTS = [{
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
'info_dict': {
'id': '3.1132799',
'id': '604333',
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
'ext': 'mp4',
'title': 'Fowler Jr. prend la direction de Jacksonville',
@ -33,22 +33,17 @@ class RDSIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
# TODO: extract f4m from 9c9media.com
video_url = self._search_regex(
r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
webpage, 'video url')
title = self._og_search_title(webpage) or self._html_search_meta(
item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
video_id = compat_str(item['id'])
title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
'title', webpage, 'title', fatal=True)
description = self._og_search_description(webpage) or self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
[r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
webpage, 'thumbnail', fatal=False)
@ -61,13 +56,15 @@ class RDSIE(InfoExtractor):
age_limit = self._family_friendly_search(webpage)
return {
'_type': 'url_transparent',
'id': video_id,
'display_id': display_id,
'url': video_url,
'url': '9c9media:rds_web:%s' % video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'age_limit': age_limit,
'ie_key': 'NineCNineMedia',
}

View File

@ -9,7 +9,7 @@ class RTVNHIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.rtvnh.nl/video/131946',
'md5': '6e1d0ab079e2a00b6161442d3ceacfc1',
'md5': 'cdbec9f44550763c8afc96050fa747dc',
'info_dict': {
'id': '131946',
'ext': 'mp4',
@ -29,15 +29,29 @@ class RTVNHIE(InfoExtractor):
raise ExtractorError(
'%s returned error code %d' % (self.IE_NAME, status), expected=True)
formats = self._extract_smil_formats(
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id, fatal=False)
formats = []
rtmp_formats = self._extract_smil_formats(
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
formats.extend(rtmp_formats)
for item in meta['source']['fb']:
if item.get('type') == 'hls':
formats.extend(self._extract_m3u8_formats(
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
elif item.get('type') == '':
formats.append({'url': item['file']})
for rtmp_format in rtmp_formats:
rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
rtsp_format = rtmp_format.copy()
del rtsp_format['play_path']
del rtsp_format['ext']
rtsp_format.update({
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'url': rtmp_url.replace('rtmp://', 'rtsp://'),
'protocol': 'rtsp',
})
formats.append(rtsp_format)
http_base_url = rtmp_url.replace('rtmp://', 'http://')
formats.extend(self._extract_m3u8_formats(
http_base_url + '/playlist.m3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
return {

View File

@ -1,18 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import json
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
sanitized_Request,
unified_strdate,
)
@ -27,7 +21,8 @@ class SandiaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Xyce Software Training - Section 1',
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
'upload_date': '20120904',
'upload_date': '20120409',
'timestamp': 1333983600,
'duration': 7794,
}
}
@ -35,81 +30,36 @@ class SandiaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
webpage = self._download_webpage(req, video_id)
presentation_data = self._download_json(
'http://digitalops.sandia.gov/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
video_id, data=json.dumps({
'getPlayerOptionsRequest': {
'ResourceId': video_id,
'QueryString': '',
}
}), headers={
'Content-Type': 'application/json; charset=utf-8',
})['d']['Presentation']
js_path = self._search_regex(
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
webpage, 'JS code URL')
js_url = compat_urlparse.urljoin(url, js_path)
js_code = self._download_webpage(
js_url, video_id, note='Downloading player')
def extract_str(key, **args):
return self._search_regex(
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
js_code, key, **args)
def extract_data(key, **args):
data_json = extract_str(key, **args)
if data_json is None:
return data_json
return self._parse_json(
data_json, video_id, transform_source=js_to_json)
title = presentation_data['Title']
formats = []
for i in itertools.count():
fd = extract_data('VideoUrls[%d]' % i, default=None)
if fd is None:
break
formats.append({
'format_id': '%s' % i,
'format_note': fd['MimeType'].partition('/')[2],
'ext': mimetype2ext(fd['MimeType']),
'url': fd['Location'],
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
})
for stream in presentation_data.get('Streams', []):
for fd in stream.get('VideoUrls', []):
formats.append({
'format_id': fd['MediaType'],
'format_note': fd['MimeType'].partition('/')[2],
'ext': mimetype2ext(fd['MimeType']),
'url': fd['Location'],
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
})
self._sort_formats(formats)
slide_baseurl = compat_urlparse.urljoin(
url, extract_data('SlideBaseUrl'))
slide_template = slide_baseurl + re.sub(
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
slides = []
last_slide_time = 0
for i in itertools.count(1):
sd = extract_str('Slides[%d]' % i, default=None)
if sd is None:
break
timestamp = int_or_none(self._search_regex(
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
sd, 'slide %s timestamp' % i, fatal=False))
slides.append({
'url': slide_template % i,
'duration': timestamp - last_slide_time,
})
last_slide_time = timestamp
formats.append({
'format_id': 'slides',
'protocol': 'slideshow',
'url': json.dumps(slides),
'preference': -10000, # Downloader not yet written
})
self._sort_formats(formats)
title = extract_data('Title')
description = extract_data('Description', fatal=False)
duration = int_or_none(extract_data(
'Duration', fatal=False), scale=1000)
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'description': presentation_data.get('Description'),
'formats': formats,
'upload_date': upload_date,
'duration': duration,
'timestamp': int_or_none(presentation_data.get('UnixTime'), 1000),
'duration': int_or_none(presentation_data.get('Duration'), 1000),
}

View File

@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
qualities,
int_or_none,
)
class SixPlayIE(InfoExtractor):
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
'md5': '42310bffe4ba3982db112b9cd3467328',
'info_dict': {
'id': '11495320',
'ext': 'mp4',
'title': 'Jamel et ses amis au Marrakech du rire 2015',
'description': 'md5:ba2149d5c321d5201b78070ee839d872',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
clip_data = self._download_json(
'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
video_id)
video_data = clip_data['videoInfo']
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
for source in clip_data['sources']:
source_type, source_url = source.get('type'), source.get('src')
if not source_url or source_type == 'hls/primetime':
continue
if source_type == 'application/vnd.apple.mpegURL':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
source_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
elif source_type == 'video/mp4':
quality = source.get('quality')
formats.append({
'url': source_url,
'format_id': quality,
'quality': quality_key(quality),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'].strip(),
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'series': video_data.get('titlePgm'),
'formats': formats,
}

View File

@ -67,7 +67,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
IE_NAME = 'skynewsarabia:video'
IE_NAME = 'skynewsarabia:article'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9',

View File

@ -0,0 +1,33 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class SkySportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
'md5': 'c44a1db29f27daf9a0003e010af82100',
'info_dict': {
'id': '10328419',
'ext': 'flv',
'title': 'Bale: Its our time to shine',
'description': 'md5:9fd1de3614d525f5addda32ac3c482c9',
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'ooyala:%s' % self._search_regex(
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'ie_key': 'Ooyala',
}

View File

@ -9,6 +9,7 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
get_element_by_id,
)
@ -40,7 +41,7 @@ class SlideshareIE(InfoExtractor):
bucket = info['jsplayer']['video_bucket']
ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex(
description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex(
r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
'description', fatal=False)
@ -51,5 +52,5 @@ class SlideshareIE(InfoExtractor):
'ext': ext,
'url': video_url,
'thumbnail': info['slideshow']['pin_image_url'],
'description': description,
'description': description.strip() if description else None,
}

View File

@ -8,10 +8,7 @@ from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
from ..utils import ExtractorError
class SohuIE(InfoExtractor):
@ -96,15 +93,10 @@ class SohuIE(InfoExtractor):
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
req = sanitized_Request(base_data_url + vid_id)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
return self._download_json(
req, video_id,
'Downloading JSON data for %s' % vid_id)
base_data_url + vid_id, video_id,
'Downloading JSON data for %s' % vid_id,
headers=self.geo_verification_headers())
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')

View File

@ -4,8 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse
from ..utils import (
extract_attributes,
unified_strdate,
get_element_by_attribute,
)
class SpiegelIE(InfoExtractor):
@ -19,6 +24,7 @@ class SpiegelIE(InfoExtractor):
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
'duration': 49,
'upload_date': '20130311',
},
}, {
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
@ -29,6 +35,7 @@ class SpiegelIE(InfoExtractor):
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983,
'upload_date': '20131115',
},
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
@ -38,6 +45,7 @@ class SpiegelIE(InfoExtractor):
'ext': 'mp4',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
'upload_date': '20140904',
}
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
@ -52,10 +60,10 @@ class SpiegelIE(InfoExtractor):
if SpiegeltvIE.suitable(handle.geturl()):
return self.url_result(handle.geturl(), 'Spiegeltv')
title = re.sub(r'\s+', ' ', self._html_search_regex(
r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
webpage, 'title'))
description = self._html_search_meta('description', webpage, 'description')
video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default=''))
title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage)
description = video_data.get('data-video-teaser') or self._html_search_meta('description', webpage, 'description')
base_url = self._search_regex(
[r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
@ -87,8 +95,9 @@ class SpiegelIE(InfoExtractor):
return {
'id': video_id,
'title': title,
'description': description,
'description': description.strip() if description else None,
'duration': duration,
'upload_date': unified_strdate(video_data.get('data-video-date')),
'formats': formats,
}

View File

@ -9,6 +9,7 @@ from ..utils import (
class SRMediathekIE(ARDMediathekIE):
IE_NAME = 'sr:mediathek'
IE_DESC = 'Saarländischer Rundfunk'
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'

View File

@ -56,7 +56,7 @@ class StitcherIE(InfoExtractor):
episode = self._parse_json(
js_to_json(self._search_regex(
r'(?s)var\s+stitcher\s*=\s*({.+?});\n', webpage, 'episode config')),
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
display_id)['config']['episode']
title = unescapeHTML(episode['title'])

View File

@ -120,7 +120,7 @@ class SVTIE(SVTBaseIE):
class SVTPlayIE(SVTBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
@ -141,6 +141,9 @@ class SVTPlayIE(SVTBaseIE):
# geo restricted to Sweden
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
'only_matching': True,
}, {
'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -6,6 +6,7 @@ import time
import hmac
import binascii
import hashlib
import netrc
from .once import OnceIE
@ -24,6 +25,9 @@ from ..utils import (
xpath_with_ns,
mimetype2ext,
find_xpath_attr,
unescapeHTML,
urlencode_postdata,
unified_timestamp,
)
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@ -62,10 +66,11 @@ class ThePlatformBaseIE(OnceIE):
return formats, subtitles
def get_metadata(self, path, video_id):
def _download_theplatform_metadata(self, path, video_id):
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
info = self._download_json(info_url, video_id)
return self._download_json(info_url, video_id)
def _parse_theplatform_metadata(self, info):
subtitles = {}
captions = info.get('captions')
if isinstance(captions, list):
@ -86,6 +91,10 @@ class ThePlatformBaseIE(OnceIE):
'uploader': info.get('billingCode'),
}
def _extract_theplatform_metadata(self, path, video_id):
info = self._download_theplatform_metadata(path, video_id)
return self._parse_theplatform_metadata(info)
class ThePlatformIE(ThePlatformBaseIE):
_VALID_URL = r'''(?x)
@ -158,6 +167,7 @@ class ThePlatformIE(ThePlatformBaseIE):
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True,
}]
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
@classmethod
def _extract_urls(cls, webpage):
@ -192,6 +202,96 @@ class ThePlatformIE(ThePlatformBaseIE):
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig)
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
def xml_text(xml_str, tag):
return self._search_regex(
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
mvpd_headers = {
'ap_42': 'anonymous',
'ap_11': 'Linux i686',
'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
}
guid = xml_text(resource, 'guid')
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token:
token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', ''))
if token_expires and token_expires >= time.time():
authn_token = None
if not authn_token:
# TODO add support for other TV Providers
mso_id = 'DTV'
login_info = netrc.netrc().authenticators(mso_id)
if not login_info:
return None
def post_form(form_page, note, data={}):
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
return self._download_webpage(
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
provider_redirect_page = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
'Downloading Provider Redirect Page', query={
'noflash': 'true',
'mso_id': mso_id,
'requestor_id': requestor_id,
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
provider_login_page = post_form(
provider_redirect_page, 'Downloading Provider Login Page')
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
'username': login_info[0],
'password': login_info[2],
})
post_form(mvpd_confirm_page, 'Confirming Login')
session = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
'Retrieving Session', data=urlencode_postdata({
'_method': 'GET',
'requestor_id': requestor_id,
}), headers=mvpd_headers)
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
if not authz_token:
authorize = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
'Retrieving Authorization Token', data=urlencode_postdata({
'resource_id': resource,
'requestor_id': requestor_id,
'authentication_token': authn_token,
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
'userMeta': '1',
}), headers=mvpd_headers)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
mvpd_headers.update({
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
})
return self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
video_id, 'Retrieving Media Token', data=urlencode_postdata({
'authz_token': authz_token,
'requestor_id': requestor_id,
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
'hashed_guid': 'false',
}), headers=mvpd_headers)
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@ -265,7 +365,7 @@ class ThePlatformIE(ThePlatformBaseIE):
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
self._sort_formats(formats)
ret = self.get_metadata(path, video_id)
ret = self._extract_theplatform_metadata(path, video_id)
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
ret.update({
'id': video_id,
@ -339,7 +439,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
categories = [item['media$name'] for item in entry.get('media$categories', [])]
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
ret = self._extract_theplatform_metadata('%s/%s' % (provider_id, first_video_id), video_id)
subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
ret.update({
'id': video_id,

View File

@ -4,6 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
get_element_by_attribute,
ExtractorError,
)
class TVPIE(InfoExtractor):
@ -21,7 +27,7 @@ class TVPIE(InfoExtractor):
},
}, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
'md5': 'c3b15ed1af288131115ff17a17c19dda',
'md5': 'b0005b542e5b4de643a9690326ab1257',
'info_dict': {
'id': '17916176',
'ext': 'mp4',
@ -53,6 +59,11 @@ class TVPIE(InfoExtractor):
webpage = self._download_webpage(
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
error_massage = get_element_by_attribute('class', 'msg error', webpage)
if error_massage:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, clean_html(error_massage)), expected=True)
title = self._search_regex(
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
webpage, 'title', group='title')
@ -66,24 +77,50 @@ class TVPIE(InfoExtractor):
r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
video_url = self._search_regex(
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None)
if not video_url:
r'0:{src:([\'"])(?P<url>.*?)\1', webpage,
'formats', group='url', default=None)
if not video_url or 'material_niedostepny.mp4' in video_url:
video_url = self._download_json(
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
video_id)['video_url']
ext = video_url.rsplit('.', 1)[-1]
if ext != 'ism/manifest':
if '/' in ext:
ext = 'mp4'
formats = []
video_url_base = self._search_regex(
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
video_url, 'video base url', default=None)
if video_url_base:
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
# formats.extend(self._extract_mpd_formats(
# video_url_base + '.ism/video.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_f4m_formats(
video_url_base + '.ism/video.f4m',
video_id, f4m_id='hds', fatal=False))
m3u8_formats = self._extract_m3u8_formats(
video_url_base + '.ism/video.m3u8', video_id,
'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
m3u8_formats))
formats.extend(m3u8_formats)
for i, m3u8_format in enumerate(m3u8_formats, 2):
http_url = '%s-%d.mp4' % (video_url_base, i)
if self._is_valid_url(http_url, video_id):
f = m3u8_format.copy()
f.update({
'url': http_url,
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
else:
formats = [{
'format_id': 'direct',
'url': video_url,
'ext': ext,
'ext': determine_ext(video_url, 'mp4'),
}]
else:
m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)

View File

@ -29,7 +29,7 @@ class TwitchBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv'
_USHER_BASE = 'https://usher.ttvnw.net'
_LOGIN_URL = 'http://www.twitch.tv/login'
_NETRC_MACHINE = 'twitch'

View File

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class URPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
'md5': '15ca67b63fd8fb320ac2bcd854bad7b6',
'info_dict': {
'id': '190031',
'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
urplayer_data = self._parse_json(self._search_regex(
r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id)
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
formats = []
for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
file_rtmp = urplayer_data.get('file_rtmp' + quality_attr)
if file_rtmp:
formats.append({
'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp),
'format_id': quality + '-rtmp',
'ext': 'flv',
'preference': preference,
})
file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
if file_http:
file_http_base_url = 'http://%s/%s' % (host, file_http)
formats.extend(self._extract_f4m_formats(
file_http_base_url + 'manifest.f4m', video_id,
preference, '%s-hds' % quality, fatal=False))
formats.extend(self._extract_m3u8_formats(
file_http_base_url + 'playlist.m3u8', video_id, 'mp4',
'm3u8_native', preference, '%s-hls' % quality, fatal=False))
self._sort_formats(formats)
subtitles = {}
for subtitle in urplayer_data.get('subtitles', []):
subtitle_url = subtitle.get('file')
kind = subtitle.get('kind')
if subtitle_url or kind and kind != 'captions':
continue
subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
'url': subtitle_url,
})
return {
'id': video_id,
'title': urplayer_data['title'],
'description': self._og_search_description(webpage),
'thumbnail': urplayer_data.get('image'),
'series': urplayer_data.get('series_title'),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -0,0 +1,84 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
remove_end,
unified_strdate,
)
class VidbitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2',
'md5': '1a34b7f14defe3b8fafca9796892924d',
'info_dict': {
'id': 'jkL2yDOEq2',
'ext': 'mp4',
'title': 'Intro to VidBit',
'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7',
'thumbnail': 're:https?://.*\.jpg$',
'upload_date': '20160618',
'view_count': int,
'comment_count': int,
}
}, {
'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id)
video_url, title = [None] * 2
config = self._parse_json(self._search_regex(
r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'),
video_id, transform_source=js_to_json)
if config:
if config.get('file'):
video_url = compat_urlparse.urljoin(url, config['file'])
title = config.get('title')
if not video_url:
video_url = compat_urlparse.urljoin(url, self._search_regex(
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video URL', group='url'))
if not title:
title = remove_end(
self._html_search_regex(
(r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'),
webpage, 'title', default=None) or self._og_search_title(webpage),
' - VidBit')
description = self._html_search_meta(
('description', 'og:description', 'twitter:description'),
webpage, 'description')
upload_date = unified_strdate(self._html_search_meta(
'datePublished', webpage, 'upload date'))
view_count = int_or_none(self._search_regex(
r'<strong>(\d+)</strong> views',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'id=["\']cmt_num["\'][^>]*>\((\d+)\)',
webpage, 'comment count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': upload_date,
'view_count': view_count,
'comment_count': comment_count,
}

View File

@ -16,6 +16,7 @@ from ..utils import (
ExtractorError,
InAdvancePagedList,
int_or_none,
NO_DEFAULT,
RegexNotFoundError,
sanitized_Request,
smuggle_url,
@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self._set_vimeo_cookie('vuid', vuid)
self._download_webpage(login_request, None, False, 'Wrong login info')
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = urlencode_postdata({
'password': password,
'token': token,
})
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
def _extract_xsrft_and_vuid(self, webpage):
xsrft = self._search_regex(
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
@ -344,26 +365,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
if mobj:
return mobj.group(1)
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = urlencode_postdata({
'password': password,
'token': token,
})
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword')
if password is None:
@ -791,12 +792,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
'uploader_id': 'user22258446',
}
}, {
'note': 'Password protected',
'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
'info_dict': {
'id': '138823582',
'ext': 'mp4',
'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
'uploader': 'TMB',
'uploader_id': 'user37284429',
},
'params': {
'videopassword': 'holygrail',
},
}]
def _real_initialize(self):
self._login()
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
webpage = self._download_webpage(webpage_url, video_id)
config_url = self._html_search_regex(
r'data-config-url="([^"]+)"', webpage, 'config URL',
default=NO_DEFAULT if video_password_verified else None)
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(
webpage_url, video_id, video_password_verified=True)
return config_url
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(
'https://player.vimeo.com/video/%s/config' % video_id, video_id)
config_url = self._get_config_url(url, video_id)
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
self._vimeo_sort_formats(info_dict['formats'])
info_dict['id'] = video_id

View File

@ -90,10 +90,12 @@ class VineIE(InfoExtractor):
data = self._parse_json(
self._search_regex(
r'window\.POST_DATA\s*=\s*{\s*%s\s*:\s*({.+?})\s*};\s*</script>' % video_id,
r'window\.POST_DATA\s*=\s*({.+?});\s*</script>',
webpage, 'vine data'),
video_id)
data = data[list(data.keys())[0]]
formats = [{
'format_id': '%(format)s-%(rate)s' % f,
'vcodec': f.get('format'),

View File

@ -27,12 +27,12 @@ class VKIE(InfoExtractor):
https?://
(?:
(?:
(?:m\.)?vk\.com/video_|
(?:(?:m|new)\.)?vk\.com/video_|
(?:www\.)?daxab.com/
)
ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
(?:
(?:m\.)?vk\.com/(?:.+?\?.*?z=)?video|
(?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
(?:www\.)?daxab.com/embed/
)
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
@ -182,6 +182,10 @@ class VKIE(InfoExtractor):
# pladform embed
'url': 'https://vk.com/video-76116461_171554880',
'only_matching': True,
},
{
'url': 'http://new.vk.com/video205387401_165548505',
'only_matching': True,
}
]
@ -354,7 +358,7 @@ class VKIE(InfoExtractor):
class VKUserVideosIE(InfoExtractor):
IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
_TEMPLATE_URL = 'https://vk.com/videos'
_TESTS = [{
'url': 'http://vk.com/videos205387401',
@ -369,6 +373,12 @@ class VKUserVideosIE(InfoExtractor):
}, {
'url': 'http://vk.com/videos-97664626?section=all',
'only_matching': True,
}, {
'url': 'http://m.vk.com/videos205387401',
'only_matching': True,
}, {
'url': 'http://new.vk.com/videos205387401',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -25,7 +25,8 @@ class VRTIE(InfoExtractor):
'timestamp': 1414271750.949,
'upload_date': '20141025',
'duration': 929,
}
},
'skip': 'HTTP Error 404: Not Found',
},
# sporza.be
{
@ -39,7 +40,8 @@ class VRTIE(InfoExtractor):
'timestamp': 1413835980.560,
'upload_date': '20141020',
'duration': 3238,
}
},
'skip': 'HTTP Error 404: Not Found',
},
# cobra.be
{
@ -53,16 +55,39 @@ class VRTIE(InfoExtractor):
'timestamp': 1413967500.494,
'upload_date': '20141022',
'duration': 661,
}
},
'skip': 'HTTP Error 404: Not Found',
},
{
# YouTube video
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
'only_matching': True,
'md5': 'b8b93da1df1cea6c8556255a796b7d61',
'info_dict': {
'id': 'Wji-BZ0oCwg',
'ext': 'mp4',
'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer',
'description': 'md5:8e468944dce15567a786a67f74262583',
'uploader': 'Star Wars',
'uploader_id': 'starwars',
'upload_date': '20160407',
},
'add_ie': ['Youtube'],
},
{
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
'only_matching': True,
'md5': '',
'info_dict': {
'id': '2377055',
'ext': 'mp4',
'title': 'Cafe Derby',
'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.',
'upload_date': '20150626',
'timestamp': 1435305240.769,
},
'params': {
# m3u8 download
'skip_download': True,
}
}
]
@ -98,6 +123,32 @@ class VRTIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
src.replace('playlist.m3u8', 'manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'data-video-geoblocking="true"' not in webpage:
rtmp_formats = self._extract_smil_formats(
src.replace('playlist.m3u8', 'jwplayer.smil'),
video_id, fatal=False)
formats.extend(rtmp_formats)
for rtmp_format in rtmp_formats:
rtmp_format_c = rtmp_format.copy()
rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
del rtmp_format_c['play_path']
del rtmp_format_c['ext']
http_format = rtmp_format_c.copy()
http_format.update({
'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
'format_id': rtmp_format['format_id'].replace('rtmp', 'http'),
'protocol': 'http',
})
rtsp_format = rtmp_format_c.copy()
rtsp_format.update({
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'protocol': 'rtsp',
})
formats.extend([http_format, rtsp_format])
else:
formats.extend(self._extract_f4m_formats(
'%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))

View File

@ -4,17 +4,23 @@ import itertools
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
int_or_none,
orderedSet,
parse_duration,
sanitized_Request,
str_to_int,
)
class XTubeIE(InfoExtractor):
_VALID_URL = r'(?:xtube:|https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-))(?P<id>[^/?&#]+)'
_VALID_URL = r'''(?x)
(?:
xtube:|
https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-)
)
(?P<id>[^/?&#]+)
'''
_TESTS = [{
# old URL schema
@ -27,6 +33,8 @@ class XTubeIE(InfoExtractor):
'description': 'contains:an ET kind of thing',
'uploader': 'greenshowers',
'duration': 450,
'view_count': int,
'comment_count': int,
'age_limit': 18,
}
}, {
@ -51,21 +59,30 @@ class XTubeIE(InfoExtractor):
req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1')
webpage = self._download_webpage(req, display_id)
flashvars = self._parse_json(
self._search_regex(
r'xt\.playerOps\s*=\s*({.+?});', webpage, 'player ops'),
video_id)['flashvars']
sources = self._parse_json(self._search_regex(
r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id)
title = flashvars.get('title') or self._search_regex(
r'<h1>([^<]+)</h1>', webpage, 'title')
video_url = compat_urllib_parse_unquote(flashvars['video_url'])
duration = int_or_none(flashvars.get('video_duration'))
formats = []
for format_id, format_url in sources.items():
formats.append({
'url': format_url,
'format_id': format_id,
'height': int_or_none(format_id),
})
self._sort_formats(formats)
uploader = self._search_regex(
r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
webpage, 'uploader', fatal=False)
title = self._search_regex(
(r'<h1>(?P<title>[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
webpage, 'title', group='title')
description = self._search_regex(
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
uploader = self._search_regex(
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
webpage, 'uploader', fatal=False)
duration = parse_duration(self._search_regex(
r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>',
webpage, 'view count', fatal=False))
@ -76,7 +93,6 @@ class XTubeIE(InfoExtractor):
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'title': title,
'description': description,
'uploader': uploader,
@ -84,6 +100,7 @@ class XTubeIE(InfoExtractor):
'view_count': view_count,
'comment_count': comment_count,
'age_limit': 18,
'formats': formats,
}

View File

@ -67,6 +67,20 @@ class XuiteIE(InfoExtractor):
'categories': ['電玩動漫'],
},
'skip': 'Video removed',
}, {
# Video with encoded media id
# from http://forgetfulbc.blogspot.com/2016/06/date.html
'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
'info_dict': {
'id': 'cE1xbENoLTI3NDQ3MzM2LmZsdg==',
'ext': 'mp4',
'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)',
'description': 'md5:f0abdcb69df300f522a5442ef3146f2a',
'timestamp': 1466160960,
'upload_date': '20160617',
'uploader': 'B.C. & Lowy',
'uploader_id': '232279340',
},
}, {
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
'only_matching': True,
@ -80,10 +94,9 @@ class XuiteIE(InfoExtractor):
def base64_encode_utf8(data):
return base64.b64encode(data.encode('utf-8')).decode('utf-8')
def _extract_flv_config(self, media_id):
base64_media_id = self.base64_encode_utf8(media_id)
def _extract_flv_config(self, encoded_media_id):
flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
'http://vlog.xuite.net/flash/player?media=%s' % encoded_media_id,
'flv config')
prop_dict = {}
for prop in flv_config.findall('./property'):
@ -108,9 +121,14 @@ class XuiteIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error_msg),
expected=True)
video_id = self._html_search_regex(
r'data-mediaid="(\d+)"', webpage, 'media id')
flv_config = self._extract_flv_config(video_id)
encoded_media_id = self._search_regex(
r'attributes\.name\s*=\s*"([^"]+)"', webpage,
'encoded media id', default=None)
if encoded_media_id is None:
video_id = self._html_search_regex(
r'data-mediaid="(\d+)"', webpage, 'media id')
encoded_media_id = self.base64_encode_utf8(video_id)
flv_config = self._extract_flv_config(encoded_media_id)
FORMATS = {
'audio': 'mp3',

View File

@ -19,6 +19,7 @@ from ..utils import (
mimetype2ext,
)
from .brightcove import BrightcoveNewIE
from .nbc import NBCSportsVPlayerIE
@ -227,7 +228,12 @@ class YahooIE(InfoExtractor):
# Look for NBCSports iframes
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
# Look for Brightcove New Studio embeds
bc_url = BrightcoveNewIE._extract_url(webpage)
if bc_url:
return self.url_result(bc_url, BrightcoveNewIE.ie_key())
# Query result is often embedded in webpage as JSON. Sometimes explicit requests
# to video API results in a failure with geo restriction reason therefore using

View File

@ -16,7 +16,6 @@ from ..compat import (
from ..utils import (
ExtractorError,
get_element_by_attribute,
sanitized_Request,
)
@ -218,14 +217,10 @@ class YoukuIE(InfoExtractor):
headers = {
'Referer': req_url,
}
headers.update(self.geo_verification_headers())
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
req = sanitized_Request(req_url, headers=headers)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
raw_data = self._download_json(req, video_id, note=note)
raw_data = self._download_json(req_url, video_id, note=note, headers=headers)
return raw_data['data']

View File

@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'youtube_include_dash_manifest': True,
'format': '141',
},
'skip': 'format 141 not served anymore',
},
# DASH manifest with encrypted signature
{
@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'params': {
'youtube_include_dash_manifest': True,
'format': '141',
'format': '141/bestaudio[ext=m4a]',
},
},
# JS player signature function name containing $
@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'params': {
'youtube_include_dash_manifest': True,
'format': '141',
'format': '141/bestaudio[ext=m4a]',
},
},
# Controversy video
@ -618,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
'license': 'Standard YouTube License',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympics',
'uploader': 'Olympic',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
},
'params': {
@ -671,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
'license': 'Standard YouTube License',
'formats': 'mincount:33',
'formats': 'mincount:32',
},
},
# DASH manifest with segment_list
@ -691,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'youtube_include_dash_manifest': True,
'format': '135', # bestvideo
}
},
'skip': 'This live event has ended.',
},
{
# Multifeed videos (multiple cameras), URL is for Main Camera
@ -762,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
},
'playlist_count': 2,
'skip': 'Not multifeed anymore',
},
{
'url': 'http://vid.plus/FlRa-iH7PGw',
@ -814,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'This video does not exist.',
},
{
# Video licensed under Creative Commons
@ -1331,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
class="[^"]*"[^>]*>
[^<]+\.{3}\s*
</a>
''', r'\1', video_description)
@ -1726,6 +1730,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
class YoutubeSharedVideoIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:shared'
_TEST = {
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
'info_dict': {
'id': 'uPDB5I9wfp8',
'ext': 'webm',
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
'upload_date': '20160219',
'uploader': 'Pocoyo - Português (BR)',
'uploader_id': 'PocoyoBrazil',
},
'add_ie': ['Youtube'],
'params': {
# There are already too many Youtube downloads
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
real_video_id = self._html_search_meta(
'videoId', webpage, 'YouTube video id', fatal=True)
return self.url_result(real_video_id, YoutubeIE.ie_key())
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com playlists'
_VALID_URL = r"""(?x)(?:
@ -1958,9 +1995,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
channel_playlist_id = self._html_search_meta(
'channelId', channel_page, 'channel id', default=None)
if not channel_playlist_id:
channel_playlist_id = self._search_regex(
r'data-(?:channel-external-|yt)id="([^"]+)"',
channel_page, 'channel id', default=None)
channel_url = self._html_search_meta(
('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
channel_page, 'channel url', default=None)
if channel_url:
channel_playlist_id = self._search_regex(
r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
channel_url, 'channel id', default=None)
if channel_playlist_id and channel_playlist_id.startswith('UC'):
playlist_id = 'UU' + channel_playlist_id[2:]
return self.url_result(
@ -1983,6 +2024,15 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
for video_id, video_title in self.extract_videos_from_page(channel_page)]
return self.playlist_result(entries, channel_id)
try:
next(self._entries(channel_page, channel_id))
except StopIteration:
alert_message = self._html_search_regex(
r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
channel_page, 'alert', default=None, group='alert')
if alert_message:
raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
@ -1996,7 +2046,8 @@ class YoutubeUserIE(YoutubeChannelIE):
'url': 'https://www.youtube.com/user/TheLinuxFoundation',
'playlist_mincount': 320,
'info_dict': {
'title': 'TheLinuxFoundation',
'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
'title': 'Uploads from The Linux Foundation',
}
}, {
'url': 'ytuser:phihag',
@ -2004,6 +2055,10 @@ class YoutubeUserIE(YoutubeChannelIE):
}, {
'url': 'https://www.youtube.com/c/gametrailers',
'only_matching': True,
}, {
# This channel is not available.
'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
'only_matching': True,
}]
@classmethod

View File

@ -26,9 +26,7 @@ def parseOpts(overrideArguments=None):
except IOError:
return default # silently skip if file is not present
try:
res = []
for l in optionf:
res += compat_shlex_split(l, comments=True)
res = compat_shlex_split(optionf.read(), comments=True)
finally:
optionf.close()
return res
@ -211,11 +209,16 @@ def parseOpts(overrideArguments=None):
action='store_const', const='::', dest='source_address',
help='Make all connections via IPv6 (experimental)',
)
network.add_option(
'--geo-verification-proxy',
dest='geo_verification_proxy', default=None, metavar='URL',
help='Use this proxy to verify the IP address for some geo-restricted sites. '
'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
)
network.add_option(
'--cn-verification-proxy',
dest='cn_verification_proxy', default=None, metavar='URL',
help='Use this proxy to verify the IP address for some Chinese sites. '
'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
help=optparse.SUPPRESS_HELP,
)
selection = optparse.OptionGroup(parser, 'Video Selection')

View File

@ -76,7 +76,7 @@ class Socks4Error(ProxyError):
CODES = {
91: 'request rejected or failed',
92: 'request rejected becasue SOCKS server cannot connect to identd on the client',
92: 'request rejected because SOCKS server cannot connect to identd on the client',
93: 'request rejected because the client program and identd report different user-ids'
}

View File

@ -110,6 +110,49 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙ
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
DATE_FORMATS = (
'%d %B %Y',
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y/%m/%d',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
DATE_FORMATS_DAY_FIRST.extend([
'%d-%m-%Y',
'%d.%m.%Y',
'%d.%m.%y',
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
DATE_FORMATS_MONTH_FIRST.extend([
'%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'%m/%d/%Y %H:%M:%S',
])
def preferredencoding():
"""Get preferred encoding.
@ -975,6 +1018,24 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
https_response = http_response
def extract_timezone(date_str):
m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
date_str)
if not m:
timezone = datetime.timedelta()
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
timezone = datetime.timedelta()
else:
sign = 1 if m.group('sign') == '+' else -1
timezone = datetime.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
return timezone, date_str
def parse_iso8601(date_str, delimiter='T', timezone=None):
""" Return a UNIX timestamp from the given date """
@ -984,20 +1045,8 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
date_str = re.sub(r'\.[0-9]+', '', date_str)
if timezone is None:
m = re.search(
r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
date_str)
if not m:
timezone = datetime.timedelta()
else:
date_str = date_str[:-len(m.group(0))]
if not m.group('sign'):
timezone = datetime.timedelta()
else:
sign = 1 if m.group('sign') == '+' else -1
timezone = datetime.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
timezone, date_str = extract_timezone(date_str)
try:
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
dt = datetime.datetime.strptime(date_str, date_format) - timezone
@ -1006,6 +1055,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
pass
def date_formats(day_first=True):
return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
def unified_strdate(date_str, day_first=True):
"""Return a string with the date in the format YYYYMMDD"""
@ -1014,53 +1067,11 @@ def unified_strdate(date_str, day_first=True):
upload_date = None
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
_, date_str = extract_timezone(date_str)
format_expressions = [
'%d %B %Y',
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y/%m/%d',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
]
if day_first:
format_expressions.extend([
'%d-%m-%Y',
'%d.%m.%Y',
'%d.%m.%y',
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
])
else:
format_expressions.extend([
'%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'%m/%d/%Y %H:%M:%S',
])
for expression in format_expressions:
for expression in date_formats(day_first):
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except ValueError:
@ -1076,6 +1087,29 @@ def unified_strdate(date_str, day_first=True):
return compat_str(upload_date)
def unified_timestamp(date_str, day_first=True):
if date_str is None:
return None
date_str = date_str.replace(',', ' ')
pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0)
timezone, date_str = extract_timezone(date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
for expression in date_formats(day_first):
try:
dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta
return calendar.timegm(dt.timetuple())
except ValueError:
pass
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
return calendar.timegm(timetuple.timetuple())
def determine_ext(url, default_ext='unknown_video'):
if url is None:
return default_ext
@ -1410,6 +1444,8 @@ def shell_quote(args):
def smuggle_url(url, data):
""" Pass additional data in a URL for internal use. """
url, idata = unsmuggle_url(url, {})
data.update(idata)
sdata = compat_urllib_parse_urlencode(
{'__youtubedl_smuggle': json.dumps(data)})
return url + '#' + sdata
@ -1591,6 +1627,11 @@ class HEADRequest(compat_urllib_request.Request):
return 'HEAD'
class PUTRequest(compat_urllib_request.Request):
def get_method(self):
return 'PUT'
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr:
if v is not None:
@ -1626,6 +1667,10 @@ def float_or_none(v, scale=1, invscale=1, default=None):
return default
def strip_or_none(v):
return None if v is None else v.strip()
def parse_duration(s):
if not isinstance(s, compat_basestring):
return None
@ -1882,7 +1927,13 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
req_headers.update(headers)
req_data = data or req.data
req_url = update_url_query(url or req.get_full_url(), query)
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
req_get_method = req.get_method()
if req_get_method == 'HEAD':
req_type = HEADRequest
elif req_get_method == 'PUT':
req_type = PUTRequest
else:
req_type = compat_urllib_request.Request
new_req = req_type(
req_url, data=req_data, headers=req_headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
@ -2861,3 +2912,7 @@ def parse_m3u8_attributes(attrib):
val = val[1:-1]
info[key] = val
return info
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.06.23.1'
__version__ = '2016.07.06'