Compare commits

..

143 Commits

Author SHA1 Message Date
0e94b4713d release 2016.07.06 2016-07-06 00:54:23 +07:00
a6d3b89feb [prosiebensat1] Make downloading urls JSON non fatal 2016-07-06 00:52:48 +07:00
6c26815d63 [onionstudios] fix info extraction 2016-07-05 18:05:07 +01:00
73c4ac2c95 [youtube:channel] Improve channel id extraction and detect unavailable channels (Closes #10009) 2016-07-05 23:30:44 +07:00
84f214d840 [prosiebensat1] extract all formats 2016-07-05 17:11:45 +01:00
e3f88be7a9 [rtvnh] extract all formats 2016-07-05 14:45:39 +01:00
31af3e35e0 [sandia] remove unused imports 2016-07-05 13:39:24 +01:00
94a5cff91d [sendia] fix info extraction 2016-07-05 13:37:46 +01:00
77082c7b9e [slideshare] fix description extraction 2016-07-05 12:01:04 +01:00
252a1f75d2 [spiegel] improve info extraction 2016-07-05 11:46:25 +01:00
5abf513cf8 [stitcher] fix episode config extraction 2016-07-05 10:44:16 +01:00
c6054e3201 [xuite] Support videos with already encoded media id 2016-07-05 14:26:42 +08:00
4080530624 [youtube:shared] Recognize the new 'shared' URLs
Closes #10007
2016-07-05 13:15:05 +08:00
c25f1a9b63 release 2016.07.05 2016-07-05 06:32:46 +07:00
dfaa86b75e [test_utils] add test for smuggling a smuggled url 2016-07-04 21:36:32 +01:00
d9163ae3b6 [kaltura] fix extraction error for videos from multiple kaltura servers 2016-07-04 21:34:27 +01:00
dafafe7cf1 [la7] extract more info from a kaltura custom server 2016-07-04 17:59:58 +01:00
81953d1ae5 [kaltura] add support videos stored on custom kaltura servers(closes #5557) 2016-07-04 17:59:58 +01:00
3a212ed62e [iqiyi] Skip an unstable MD5 checksum 2016-07-04 11:25:46 +08:00
195f084542 [pornhub] Detect private videos (Closes #9987) 2016-07-04 03:27:00 +07:00
aa7a455b2e [README.md] Clarify configuration file may not exist by default 2016-07-04 01:24:33 +07:00
6a4e659c93 [yahoo] Recognize brightcove embed (Closes #9995) 2016-07-03 23:00:36 +07:00
40f3666f6b [test/test_http] Update tests for 38cce791c7 2016-07-03 23:50:55 +08:00
dd801bbe18 [brightcove] improve error detection 2016-07-03 16:37:22 +01:00
38cce791c7 Rename --cn-verfication-proxy to --geo-verification-proxy
And deprecate the former one

Since commit f138873900, this option is
not limited to China websites, so rename it.
2016-07-03 23:29:56 +08:00
bf3ae6a543 [devscripts/show-downloads-statictics] Add script for displaying downloads statistics 2016-07-03 22:20:14 +07:00
bff98341d5 release 2016.07.03.1 2016-07-03 21:28:55 +07:00
2644e911be [iqiyi] Fix extraction
See https://github.com/soimort/you-get/issues/1211#issuecomment-229011559
2016-07-03 22:19:56 +08:00
a5f67895d3 [nationalgeographic] restore http formats
there was a misunderstanding about the reason of 403 response
the problem happen only when the user use aria2c as a downloader
a1f6f5c768 (commitcomment-18107559)
2016-07-03 14:10:25 +01:00
15e4b6b758 [rai] Support an alternative form of embedded relinker URL
Closes #8551
2016-07-03 19:52:11 +08:00
2b28b892d8 [rai] Support videos with embedded content item ID (#8551) 2016-07-03 19:52:11 +08:00
7507fc98cb [README.md] Fix somes typo in coding conventions section 2016-07-03 18:35:28 +07:00
477b7a8474 [downloader/f4m] Fix for Rai live streams 2016-07-03 19:26:39 +08:00
034a884957 [rai] Support direct relinker URLs (closes #8552) 2016-07-03 19:26:39 +08:00
64436cb1a4 [nationalgeographic] skip download for national geographic channel tests(closes #9991) 2016-07-03 10:43:36 +01:00
f138873900 [rai] Fix extraction and update _TESTS
Closes #8617
Closes #9157
Closes #9232
2016-07-03 15:49:35 +08:00
e793338c88 [buzzfeed] Detect Facebook embed and update _TESTS
Closes #5701
2016-07-03 14:12:02 +08:00
369bb06206 [facebook] Improve embed detection (#5701) 2016-07-03 14:11:29 +08:00
2cb31d288e [history:topic] Relax _VALID_URL 2016-07-03 13:01:04 +07:00
c723d1cd8d [README.md] Update some codebase links 2016-07-03 11:35:13 +07:00
1f55234057 Add PULL_REQUEST_TEMPLATE.md 2016-07-03 11:31:49 +07:00
04006fae8d [README.md] Start writing youtube-dl coding conventions 2016-07-03 11:31:07 +07:00
4cb13d0d6a [hrti] Don't redefine variable in list comprehension 2016-07-02 23:02:14 +02:00
a1f6f5c768 [nationalgeographic] add support Adobe Pass auth 2016-07-02 21:24:22 +01:00
05c7feec77 [aenetworks] add support Adobe Pass auth 2016-07-02 21:24:22 +01:00
bf83024826 [theplatform] add basic support for Adobe Pass 2016-07-02 21:24:22 +01:00
a0cfd82dda release 2016.07.03 2016-07-03 03:19:22 +07:00
1b734adb2d [xtube] Fix extraction (Closes #9953, closes #9961) 2016-07-03 03:17:35 +07:00
9b724d7277 [extractors] Add hrti:playlist import 2016-07-03 02:25:39 +07:00
c3a5dd3b5d Credit @atopuzov for hrti (#9482) 2016-07-03 02:22:59 +07:00
e3755a624b [hrti] Improve and add support for playlists (Closes #9482) 2016-07-03 02:22:14 +07:00
95cf60e826 [utils] Add PUTRequest 2016-07-03 02:21:32 +07:00
6b03e1e25d [HRTi] Implement extractor for Croatian Radiotelevision 2016-07-03 02:20:41 +07:00
712b0b5b70 [la7.it] Fix the extractor 2016-07-02 23:49:03 +08:00
6a424391d9 [facebook] Make embed detection stricter to prevent false-positives 2016-07-02 23:15:55 +08:00
dbf0157a26 [generic] Add MD5 checksums 2016-07-02 21:58:07 +08:00
7deef1ba67 [generic] Support Wordpress "YouTube Video Importer" plugin
Closes #9938
2016-07-02 21:58:07 +08:00
fd6ca38262 [facebook] Improve Facebook embedded detection
Related to #9938.

Another example comes from 9834872bf6.
2016-07-02 21:58:07 +08:00
bdafd88da0 [vk] Extend _VALID_URLs to support new domain (Closes #9981) 2016-07-02 16:43:19 +07:00
7a1e71575e release 2016.07.02 2016-07-02 02:47:42 +07:00
ac2d8f54d1 [vine] Remove superfluous whitespace 2016-07-02 02:45:00 +07:00
14ff6baa0e [fusion] Improve 2016-07-02 02:44:37 +07:00
bb08101ec4 [Fusion] Add new extractor 2016-07-02 02:37:28 +07:00
bc4b2d75ba [pornhub] Add support for thumbzilla (Closes #8696) 2016-07-02 02:11:07 +07:00
35fc3021ba [periscope] Add another fallback source 2016-07-02 01:35:57 +07:00
347227237b [periscope] fix playlist extraction (#9967)
The JSON response changed and the extractor needed to be updated in order to gather the video IDs.
2016-07-02 01:29:11 +07:00
564dc3c6e8 [vine] Fix extraction (Closes #9970) 2016-07-02 01:24:57 +07:00
9f4576a7eb [twitch] Update usher URL (Closes #9975) 2016-07-01 23:16:43 +07:00
f11315e8d4 release 2016.07.01 2016-07-01 03:59:57 +07:00
0c2ac64bb8 [sixplay] Rename preference key to quality in format dict 2016-07-01 03:57:59 +07:00
a9eede3913 [test/compat] compat_shlex_split: test with newlines 2016-07-01 03:30:35 +07:00
9e29ef13a3 [options] Accept quoted string across multiple lines (#9940)
Like:

    -f "
    bestvideo+bestaudio/
    best
    "
2016-07-01 03:30:31 +07:00
eaaaaec042 [pornhub] Add more tests with removed videos 2016-07-01 03:18:27 +07:00
3cb3b60064 [pornhub] Relax removed message regex (Closes #9964) 2016-07-01 03:14:23 +07:00
044e3d91b5 [Pornhub] Fix error detection 2016-07-01 02:59:50 +07:00
c9e538a3b1 [ctvnews] use orderedSet, increase the number of items for playlists and use smaller bin list for test 2016-06-30 19:52:32 +01:00
76dad392f5 [meta] Clarify the source of uppod st decryption algorithm 2016-06-30 18:27:57 +01:00
9617b557aa [ctv] Add new extractor(closes #4077) 2016-06-30 18:22:35 +01:00
bf4fa24414 [ctvnews] Add new extractor(closes #2156) 2016-06-30 18:22:35 +01:00
20361b4f25 [rds] extract 9c9media formats 2016-06-30 18:22:35 +01:00
05a0068a76 [9c9media] Add new extractor 2016-06-30 18:22:35 +01:00
66a42309fa release 2016.06.30 2016-06-30 23:56:55 +07:00
fd94e2671a [meta] Add support for pladform embeds 2016-06-30 23:20:44 +07:00
8ff6697861 [pladform] Improve embed detection 2016-06-30 23:19:29 +07:00
eafa643715 [meta] Make duration and description optional
For iframe URLs
2016-06-30 23:06:13 +07:00
049da7cb6c [meta] Extend _VALID_URL 2016-06-30 23:04:18 +07:00
7dbeee7e22 [generic] make twitter:player extraction non fatal 2016-06-30 14:11:55 +01:00
93ad6c6bfa [sixplay] Add new extractor(closes #2183) 2016-06-30 13:50:49 +01:00
329179073b [generic] add generic support for twitter:player embeds 2016-06-30 12:01:30 +01:00
4d86d2008e [urplay] fix typo and check with flake8 2016-06-30 11:30:42 +01:00
ab47b6e881 [theatlantic] Add new extractor(closes #6611) 2016-06-30 04:08:56 +01:00
df43389ade [skysports] Add new extractor(closes #7066) 2016-06-30 02:54:21 +01:00
397b305cfe [meta] Add new extractor(closes #8789) 2016-06-30 00:21:03 +01:00
e496fa50cd [urplay] Add new extractor(closes #9332) 2016-06-29 20:19:31 +01:00
06a96da15b [eagleplatform] Improve embed detection and extract in separate routine (Closes #9926) 2016-06-29 23:01:34 +07:00
70157c2c43 [aenetworks] add support for movie pages 2016-06-29 16:55:17 +01:00
c58ed8563d [aenetworks] extract history topic playlist title 2016-06-29 16:18:16 +01:00
4c7821227c [aenetworks:historytopic] fix topic video url 2016-06-29 16:03:32 +01:00
42362fdb5e [aenetworks] add support for show and season for A&E Network sites and History topics(closes #9816) 2016-06-29 15:49:17 +01:00
97124e572d [arte:playlist] Fix test 2016-06-28 22:39:53 +07:00
32616c14cc [vrt] extract all formats 2016-06-28 14:02:03 +01:00
8174d0fe95 release 2016.06.27 2016-06-27 23:09:39 +07:00
8704778d95 [pbs] Check manually constructed http links (Closes #9921) 2016-06-27 23:06:42 +07:00
c287f2bc60 [extractor/generic] Use _extract_url for kaltura embeds (Closes #9922) 2016-06-27 22:45:26 +07:00
9ea5c04c0d [kaltura] Add _extract_url with fixed regex 2016-06-27 22:44:17 +07:00
fd7a7498a4 [test_all_urls] PEP 8 and change wording 2016-06-27 22:11:45 +07:00
e3a6747d8f New test-case: extractor names are supposed to be unique
@dstftw explained in
https://github.com/rg3/youtube-dl/pull/9918#issuecomment-228625878 that
extractor names are supposed to be unique. @dstftw has fixed the two
offending extractors, and here I add a test to ensure this does not
happen in the future.
2016-06-27 22:09:29 +07:00
f41ffc00d1 [skynewsarabia:article] Clarify IE_NAME 2016-06-27 05:08:09 +07:00
81fda15369 [sr:mediathek] Clarify IE_NAME 2016-06-27 05:07:12 +07:00
427cd050a3 [extractor/generic] Improve kaltura embed detection (Closes #9911) 2016-06-27 04:11:53 +07:00
b0c200f1ec [msn] Add test URL with non-alphanumeric characters 2016-06-26 22:03:36 +07:00
92747e664a release 2016.06.26 2016-06-26 21:15:24 +07:00
f1f336322d [msn] Fix extraction (Closes #8960, closes #9542) 2016-06-26 21:10:05 +07:00
bf8dd79045 [extractor/common] Fix sorting with custom field preference 2016-06-26 21:09:07 +07:00
c6781156aa [MSN] add new extractor 2016-06-26 21:07:59 +07:00
f484c5fa25 [vidbit] Improve (Closes #9759) 2016-06-26 16:59:28 +07:00
88d9f6c0c4 [utils] Add support for name list in _html_search_meta 2016-06-26 16:57:14 +07:00
3c9c088f9c [Vidbit] Add new extractor 2016-06-26 16:52:52 +07:00
fc3996bfe1 [iqiyi] Remove codes for debugging 2016-06-26 15:45:41 +08:00
5b6ad8630c [iqiyi] Partially fix IqiyiIE
Use the HTML5 API. Only low-resolution formats available

Related: #9839

Thanks @zhangn1985 for the overall algorithm (soimort/you-get#1224)
2016-06-26 15:18:32 +08:00
30105f4ac0 [le] Move urshift() to utils.py 2016-06-26 15:17:26 +08:00
1143535d76 [utils] Add urshift()
Used in IqiyiIE and LeIE
2016-06-26 15:16:49 +08:00
7d52c052ef [generic] Fix test_Generic_76
Broken: https://travis-ci.org/rg3/youtube-dl/jobs/140251658
2016-06-26 11:56:27 +08:00
a2406fce3c Fix misspelling 2016-06-26 01:28:55 +07:00
3b34ab538c [svtplay] Extend _VALID_URL (#9900) 2016-06-26 00:29:53 +07:00
ac782306f1 [iqiyi] Mark broken 2016-06-26 00:25:41 +07:00
0c00e889f3 Credit @JakubAdamWieczorek for #9813 2016-06-25 23:35:57 +07:00
ce96ed05f4 [polskieradio] Add test with video 2016-06-25 23:31:21 +07:00
0463b77a1f [polskieradio] Improve extraction (Closes #9813) 2016-06-25 23:19:18 +07:00
2d185706ea [polskieradio] Add support for Polskie Radio.
Polskie Radio is the main Polish state-funded radio broadcasting service.
2016-06-25 23:19:18 +07:00
b72b44318c [utils] Add strip_or_none 2016-06-25 23:19:18 +07:00
46f59e89ea [utils] Add unified_timestamp 2016-06-25 23:19:18 +07:00
b4241e308e release 2016.06.25 2016-06-25 03:03:20 +07:00
3d4b08dfc7 [setup.py] Add file version information and quotes consistency (Closes #9878) 2016-06-25 02:50:12 +07:00
be49068d65 [youtube] Fix and skip some tests 2016-06-24 22:47:19 +07:00
525cedb971 [youtube] Relax URL expansion in description 2016-06-24 22:37:13 +07:00
de3c7fe0d4 [youtube] Fix 141 format tests 2016-06-24 22:27:55 +07:00
896cc72750 [mixcloud] View count and like count may be absent
Closes #9874
2016-06-24 17:26:12 +08:00
c1ff6e1ad0 [vimeo:review] Fix extraction for password-protected videos
Closes #9853
2016-06-24 16:48:37 +08:00
fee70322d7 [appletrailers] correct thumbnail fallback 2016-06-23 19:03:34 +01:00
8065d6c55f [dcn] extend _VALID_URL for awaan.ae and extract all available formats 2016-06-23 17:22:15 +01:00
494172d2e5 [appletrailers] extract info from an alternative source if available(closes #8422)(closes #8422) 2016-06-23 15:49:42 +01:00
6e3c2047f8 [tvp] extract all formats and detect erros 2016-06-23 04:36:16 +01:00
81 changed files with 3000 additions and 1086 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23.1** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.06**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.06.23.1 [debug] youtube-dl version 2016.07.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

22
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@ -0,0 +1,22 @@
## Please follow the guide below
- You will be asked some questions, please read them **carefully** and answer honestly
- Put an `x` into all the boxes [ ] relevant to your *pull request* (like that [x])
- Use *Preview* tab to see how your *pull request* will actually look like
---
### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
### What is the purpose of your *pull request*?
- [ ] Bug fix
- [ ] New extractor
- [ ] New feature
---
### Description of your *pull request* and other information
Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible.

View File

@ -175,3 +175,5 @@ Tomáš Čech
Déstin Reed Déstin Reed
Roman Tsiupa Roman Tsiupa
Artur Krysiak Artur Krysiak
Jakub Adam Wieczorek
Aleksandar Topuzović

View File

@ -97,9 +97,17 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) 1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git` 2. Check out the source code with:
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
3. Start a new git branch with
cd youtube-dl
git checkout -b yourextractor
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: 4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
```python ```python
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
@ -143,16 +151,148 @@ After you have ensured this site is distributing it's content legally, you can f
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py $ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor' $ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor $ git push origin yourextractor
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. 10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions! In any case, thank you very much for your contributions!
## youtube-dl coding conventions
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
### Mandatory and optional metafields
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
- `url` (media download URL) or `formats`
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
```python
meta = self._download_json(url, video_id)
```
Assume at this point `meta`'s layout is:
```python
{
...
"summary": "some fancy summary text",
...
}
```
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
```python
description = meta.get('summary') # correct
```
and not like:
```python
description = meta['summary'] # incorrect
```
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
```python
description = self._search_regex(
r'<span[^>]+id="title"[^>]*>([^<]+)<',
webpage, 'description', fatal=False)
```
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
You can also pass `default=<some fallback value>`, for example:
```python
description = self._search_regex(
r'<span[^>]+id="title"[^>]*>([^<]+)<',
webpage, 'description', default=None)
```
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
### Provide fallbacks
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
#### Example
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
```python
title = meta['title']
```
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
```python
title = meta.get('title') or self._og_search_title(webpage)
```
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
### Make regular expressions flexible
When using regular expressions try to write them fuzzy and flexible.
#### Example
Say you need to extract `title` from the following HTML code:
```html
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
```
The code for that task should look similar to:
```python
title = self._search_regex(
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
```
Or even better:
```python
title = self._search_regex(
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
webpage, 'title', group='title')
```
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
The code definitely should not look like:
```python
title = self._search_regex(
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
webpage, 'title', group='title')
```
### Use safe conversion functions
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.

160
README.md
View File

@ -103,9 +103,9 @@ which means you can modify it, redistribute it or use it however you like.
(experimental) (experimental)
-6, --force-ipv6 Make all connections via IPv6 -6, --force-ipv6 Make all connections via IPv6
(experimental) (experimental)
--cn-verification-proxy URL Use this proxy to verify the IP address for --geo-verification-proxy URL Use this proxy to verify the IP address for
some Chinese sites. The default proxy some geo-restricted sites. The default
specified by --proxy (or none, if the proxy specified by --proxy (or none, if the
options is not present) is used for the options is not present) is used for the
actual downloading. (experimental) actual downloading. (experimental)
@ -424,7 +424,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION # CONFIGURATION
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
``` ```
@ -890,9 +890,17 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork) 1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git` 2. Check out the source code with:
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
3. Start a new git branch with
cd youtube-dl
git checkout -b yourextractor
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: 4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
```python ```python
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
@ -936,19 +944,151 @@ After you have ensured this site is distributing it's content legally, you can f
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py $ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor' $ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor $ git push origin yourextractor
11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. 10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions! In any case, thank you very much for your contributions!
## youtube-dl coding conventions
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
### Mandatory and optional metafields
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
- `url` (media download URL) or `formats`
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
```python
meta = self._download_json(url, video_id)
```
Assume at this point `meta`'s layout is:
```python
{
...
"summary": "some fancy summary text",
...
}
```
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
```python
description = meta.get('summary') # correct
```
and not like:
```python
description = meta['summary'] # incorrect
```
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
```python
description = self._search_regex(
r'<span[^>]+id="title"[^>]*>([^<]+)<',
webpage, 'description', fatal=False)
```
With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
You can also pass `default=<some fallback value>`, for example:
```python
description = self._search_regex(
r'<span[^>]+id="title"[^>]*>([^<]+)<',
webpage, 'description', default=None)
```
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
### Provide fallbacks
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
#### Example
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
```python
title = meta['title']
```
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
```python
title = meta.get('title') or self._og_search_title(webpage)
```
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
### Make regular expressions flexible
When using regular expressions try to write them fuzzy and flexible.
#### Example
Say you need to extract `title` from the following HTML code:
```html
<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
```
The code for that task should look similar to:
```python
title = self._search_regex(
r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
```
Or even better:
```python
title = self._search_regex(
r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
webpage, 'title', group='title')
```
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
The code definitely should not look like:
```python
title = self._search_regex(
r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
webpage, 'title', group='title')
```
### Use safe conversion functions
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
# EMBEDDING YOUTUBE-DL # EMBEDDING YOUTUBE-DL
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new). youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).

View File

@ -0,0 +1,41 @@
#!/usr/bin/env python
from __future__ import unicode_literals
import json
import os
import re
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import (
compat_print,
compat_urllib_request,
)
from youtube_dl.utils import format_bytes
def format_size(bytes):
return '%s (%d bytes)' % (format_bytes(bytes), bytes)
total_bytes = 0
releases = json.loads(compat_urllib_request.urlopen(
'https://api.github.com/repos/rg3/youtube-dl/releases').read().decode('utf-8'))
for release in releases:
compat_print(release['name'])
for asset in release['assets']:
asset_name = asset['name']
total_bytes += asset['download_count'] * asset['size']
if all(not re.match(p, asset_name) for p in (
r'^youtube-dl$',
r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
r'^youtube-dl\.exe$')):
continue
compat_print(
' %s size: %s downloads: %d'
% (asset_name, format_size(asset['size']), asset['download_count']))
compat_print('total downloads traffic: %s' % format_size(total_bytes))

View File

@ -45,7 +45,6 @@
- **archive.org**: archive.org videos - **archive.org**: archive.org videos
- **ARD** - **ARD**
- **ARD:mediathek** - **ARD:mediathek**
- **ARD:mediathek**: Saarländischer Rundfunk
- **arte.tv** - **arte.tv**
- **arte.tv:+7** - **arte.tv:+7**
- **arte.tv:cinema** - **arte.tv:cinema**
@ -153,6 +152,8 @@
- **CSNNE** - **CSNNE**
- **CSpan**: C-SPAN - **CSpan**: C-SPAN
- **CtsNews**: 華視新聞 - **CtsNews**: 華視新聞
- **CTV**
- **CTVNews**
- **culturebox.francetvinfo.fr** - **culturebox.francetvinfo.fr**
- **CultureUnplugged** - **CultureUnplugged**
- **CWTV** - **CWTV**
@ -241,6 +242,7 @@
- **FreeVideo** - **FreeVideo**
- **Funimation** - **Funimation**
- **FunnyOrDie** - **FunnyOrDie**
- **Fusion**
- **GameInformer** - **GameInformer**
- **Gamekings** - **Gamekings**
- **GameOne** - **GameOne**
@ -273,6 +275,7 @@
- **Helsinki**: helsinki.fi - **Helsinki**: helsinki.fi
- **HentaiStigma** - **HentaiStigma**
- **HistoricFilms** - **HistoricFilms**
- **history:topic**: History.com Topic
- **hitbox** - **hitbox**
- **hitbox:live** - **hitbox:live**
- **HornBunny** - **HornBunny**
@ -280,6 +283,8 @@
- **HotStar** - **HotStar**
- **Howcast** - **Howcast**
- **HowStuffWorks** - **HowStuffWorks**
- **HRTi**
- **HRTiPlaylist**
- **HuffPost**: Huffington Post - **HuffPost**: Huffington Post
- **Hypem** - **Hypem**
- **Iconosquare** - **Iconosquare**
@ -326,7 +331,7 @@
- **kuwo:mv**: 酷我音乐 - MV - **kuwo:mv**: 酷我音乐 - MV
- **kuwo:singer**: 酷我音乐 - 歌手 - **kuwo:singer**: 酷我音乐 - 歌手
- **kuwo:song**: 酷我音乐 - **kuwo:song**: 酷我音乐
- **la7.tv** - **la7.it**
- **Laola1Tv** - **Laola1Tv**
- **Le**: 乐视网 - **Le**: 乐视网
- **Learnr** - **Learnr**
@ -359,6 +364,7 @@
- **MatchTV** - **MatchTV**
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE and KiKA
- **media.ccc.de** - **media.ccc.de**
- **META**
- **metacafe** - **metacafe**
- **Metacritic** - **Metacritic**
- **Mgoon** - **Mgoon**
@ -385,6 +391,7 @@
- **MovieFap** - **MovieFap**
- **Moviezine** - **Moviezine**
- **MPORA** - **MPORA**
- **MSN**
- **MTV** - **MTV**
- **mtv.de** - **mtv.de**
- **mtviggy.com** - **mtviggy.com**
@ -438,6 +445,7 @@
- **nick.de** - **nick.de**
- **niconico**: ニコニコ動画 - **niconico**: ニコニコ動画
- **NiconicoPlaylist** - **NiconicoPlaylist**
- **NineCNineMedia**
- **njoy**: N-JOY - **njoy**: N-JOY
- **njoy:embed** - **njoy:embed**
- **Noco** - **Noco**
@ -501,8 +509,9 @@
- **plus.google**: Google Plus - **plus.google**: Google Plus
- **pluzz.francetv.fr** - **pluzz.francetv.fr**
- **podomatic** - **podomatic**
- **PolskieRadio**
- **PornHd** - **PornHd**
- **PornHub** - **PornHub**: PornHub and Thumbzilla
- **PornHubPlaylist** - **PornHubPlaylist**
- **PornHubUserVideos** - **PornHubUserVideos**
- **Pornotube** - **Pornotube**
@ -586,8 +595,10 @@
- **Shared**: shared.sx and vivo.sx - **Shared**: shared.sx and vivo.sx
- **ShareSix** - **ShareSix**
- **Sina** - **Sina**
- **SixPlay**
- **skynewsarabia:article**
- **skynewsarabia:video** - **skynewsarabia:video**
- **skynewsarabia:video** - **SkySports**
- **Slideshare** - **Slideshare**
- **Slutload** - **Slutload**
- **smotri**: Smotri.com - **smotri**: Smotri.com
@ -619,6 +630,7 @@
- **SportBoxEmbed** - **SportBoxEmbed**
- **SportDeutschland** - **SportDeutschland**
- **Sportschau** - **Sportschau**
- **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR** - **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- **SSA** - **SSA**
@ -719,6 +731,7 @@
- **UDNEmbed**: 聯合影音 - **UDNEmbed**: 聯合影音
- **Unistra** - **Unistra**
- **Urort**: NRK P3 Urørt - **Urort**: NRK P3 Urørt
- **URPlay**
- **USAToday** - **USAToday**
- **ustream** - **ustream**
- **ustream:channel** - **ustream:channel**
@ -736,6 +749,7 @@
- **vh1.com** - **vh1.com**
- **Vice** - **Vice**
- **ViceShow** - **ViceShow**
- **Vidbit**
- **Viddler** - **Viddler**
- **video.google:search**: Google Video search - **video.google:search**: Google Video search
- **video.mit.edu** - **video.mit.edu**
@ -843,6 +857,7 @@
- **youtube:search**: YouTube.com searches - **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first - **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs - **youtube:search_url**: YouTube.com search URLs
- **youtube:shared**
- **youtube:show**: YouTube.com (multi-season) shows - **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)

View File

@ -21,25 +21,37 @@ try:
import py2exe import py2exe
except ImportError: except ImportError:
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
print("Cannot import py2exe", file=sys.stderr) print('Cannot import py2exe', file=sys.stderr)
exit(1) exit(1)
py2exe_options = { py2exe_options = {
"bundle_files": 1, 'bundle_files': 1,
"compressed": 1, 'compressed': 1,
"optimize": 2, 'optimize': 2,
"dist_dir": '.', 'dist_dir': '.',
"dll_excludes": ['w9xpopen.exe', 'crypt32.dll'], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
} }
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
DESCRIPTION = 'YouTube video downloader'
LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
py2exe_console = [{ py2exe_console = [{
"script": "./youtube_dl/__main__.py", 'script': './youtube_dl/__main__.py',
"dest_base": "youtube-dl", 'dest_base': 'youtube-dl',
'version': __version__,
'description': DESCRIPTION,
'comments': LONG_DESCRIPTION,
'product_name': 'youtube-dl',
'product_version': __version__,
}] }]
py2exe_params = { py2exe_params = {
'console': py2exe_console, 'console': py2exe_console,
'options': {"py2exe": py2exe_options}, 'options': {'py2exe': py2exe_options},
'zipfile': None 'zipfile': None
} }
@ -72,7 +84,7 @@ else:
params['scripts'] = ['bin/youtube-dl'] params['scripts'] = ['bin/youtube-dl']
class build_lazy_extractors(Command): class build_lazy_extractors(Command):
description = "Build the extractor lazy loading module" description = 'Build the extractor lazy loading module'
user_options = [] user_options = []
def initialize_options(self): def initialize_options(self):
@ -87,16 +99,11 @@ class build_lazy_extractors(Command):
dry_run=self.dry_run, dry_run=self.dry_run,
) )
# Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(),
'youtube_dl/version.py', 'exec'))
setup( setup(
name='youtube_dl', name='youtube_dl',
version=__version__, version=__version__,
description='YouTube video downloader', description=DESCRIPTION,
long_description='Small command-line program to download videos from' long_description=LONG_DESCRIPTION,
' YouTube.com and other video sites.',
url='https://github.com/rg3/youtube-dl', url='https://github.com/rg3/youtube-dl',
author='Ricardo Garcia', author='Ricardo Garcia',
author_email='ytdl@yt-dl.org', author_email='ytdl@yt-dl.org',
@ -112,17 +119,17 @@ setup(
# test_requires = ['nosetest'], # test_requires = ['nosetest'],
classifiers=[ classifiers=[
"Topic :: Multimedia :: Video", 'Topic :: Multimedia :: Video',
"Development Status :: 5 - Production/Stable", 'Development Status :: 5 - Production/Stable',
"Environment :: Console", 'Environment :: Console',
"License :: Public Domain", 'License :: Public Domain',
"Programming Language :: Python :: 2.6", 'Programming Language :: Python :: 2.6',
"Programming Language :: Python :: 2.7", 'Programming Language :: Python :: 2.7',
"Programming Language :: Python :: 3", 'Programming Language :: Python :: 3',
"Programming Language :: Python :: 3.2", 'Programming Language :: Python :: 3.2',
"Programming Language :: Python :: 3.3", 'Programming Language :: Python :: 3.3',
"Programming Language :: Python :: 3.4", 'Programming Language :: Python :: 3.4',
"Programming Language :: Python :: 3.5", 'Programming Language :: Python :: 3.5',
], ],
cmdclass={'build_lazy_extractors': build_lazy_extractors}, cmdclass={'build_lazy_extractors': build_lazy_extractors},

View File

@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL from test.helper import FakeYDL
from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
class TestIE(InfoExtractor): class TestIE(InfoExtractor):
@ -66,6 +66,11 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._html_search_meta('d', html), '4') self.assertEqual(ie._html_search_meta('d', html), '4')
self.assertEqual(ie._html_search_meta('e', html), '5') self.assertEqual(ie._html_search_meta('e', html), '5')
self.assertEqual(ie._html_search_meta('f', html), '6') self.assertEqual(ie._html_search_meta('f', html), '6')
self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
def test_download_json(self): def test_download_json(self):
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')

View File

@ -6,6 +6,7 @@ from __future__ import unicode_literals
import os import os
import sys import sys
import unittest import unittest
import collections
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -130,6 +131,15 @@ class TestAllURLsMatching(unittest.TestCase):
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo']) ['Yahoo'])
def test_no_duplicated_ie_names(self):
name_accu = collections.defaultdict(list)
for ie in self.ies:
name_accu[ie.IE_NAME.lower()].append(type(ie).__name__)
for (ie_name, ie_list) in name_accu.items():
self.assertEqual(
len(ie_list), 1,
'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list)))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -87,6 +87,7 @@ class TestCompat(unittest.TestCase):
def test_compat_shlex_split(self): def test_compat_shlex_split(self):
self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
def test_compat_etree_fromstring(self): def test_compat_etree_fromstring(self):
xml = ''' xml = '''

View File

@ -138,27 +138,27 @@ class TestProxy(unittest.TestCase):
self.proxy_thread.daemon = True self.proxy_thread.daemon = True
self.proxy_thread.start() self.proxy_thread.start()
self.cn_proxy = compat_http_server.HTTPServer( self.geo_proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('cn')) ('localhost', 0), _build_proxy_handler('geo'))
self.cn_port = http_server_port(self.cn_proxy) self.geo_port = http_server_port(self.geo_proxy)
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever) self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
self.cn_proxy_thread.daemon = True self.geo_proxy_thread.daemon = True
self.cn_proxy_thread.start() self.geo_proxy_thread.start()
def test_proxy(self): def test_proxy(self):
cn_proxy = 'localhost:{0}'.format(self.cn_port) geo_proxy = 'localhost:{0}'.format(self.geo_port)
ydl = YoutubeDL({ ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port), 'proxy': 'localhost:{0}'.format(self.port),
'cn_verification_proxy': cn_proxy, 'geo_verification_proxy': geo_proxy,
}) })
url = 'http://foo.com/bar' url = 'http://foo.com/bar'
response = ydl.urlopen(url).read().decode('utf-8') response = ydl.urlopen(url).read().decode('utf-8')
self.assertEqual(response, 'normal: {0}'.format(url)) self.assertEqual(response, 'normal: {0}'.format(url))
req = compat_urllib_request.Request(url) req = compat_urllib_request.Request(url)
req.add_header('Ytdl-request-proxy', cn_proxy) req.add_header('Ytdl-request-proxy', geo_proxy)
response = ydl.urlopen(req).read().decode('utf-8') response = ydl.urlopen(req).read().decode('utf-8')
self.assertEqual(response, 'cn: {0}'.format(url)) self.assertEqual(response, 'geo: {0}'.format(url))
def test_proxy_with_idn(self): def test_proxy_with_idn(self):
ydl = YoutubeDL({ ydl = YoutubeDL({

View File

@ -60,11 +60,13 @@ from youtube_dl.utils import (
timeconvert, timeconvert,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unified_timestamp,
unsmuggle_url, unsmuggle_url,
uppercase_escape, uppercase_escape,
lowercase_escape, lowercase_escape,
url_basename, url_basename,
urlencode_postdata, urlencode_postdata,
urshift,
update_url_query, update_url_query,
version_tuple, version_tuple,
xpath_with_ns, xpath_with_ns,
@ -283,8 +285,28 @@ class TestUtil(unittest.TestCase):
'20150202') '20150202')
self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214')
self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('25-09-2014'), '20140925')
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
self.assertEqual(unified_timestamp('8/7/2009'), 1247011200)
self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200)
self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598)
self.assertEqual(unified_timestamp('1968 12 10'), -33436800)
self.assertEqual(unified_timestamp('1968-12-10'), -33436800)
self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200)
self.assertEqual(
unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False),
1417001400)
self.assertEqual(
unified_timestamp('2/2/2015 6:47:40 PM', day_first=False),
1422902860)
self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900)
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
def test_determine_ext(self): def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
@ -383,6 +405,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_url, url) self.assertEqual(res_url, url)
self.assertEqual(res_data, None) self.assertEqual(res_data, None)
smug_url = smuggle_url(url, {'a': 'b'})
smug_smug_url = smuggle_url(smug_url, {'c': 'd'})
res_url, res_data = unsmuggle_url(smug_smug_url)
self.assertEqual(res_url, url)
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
def test_shell_quote(self): def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""") self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")
@ -959,5 +987,9 @@ The first line
self.assertRaises(ValueError, encode_base_n, 0, 70) self.assertRaises(ValueError, encode_base_n, 0, 70)
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
def test_urshift(self):
self.assertEqual(urshift(3, 1), 1)
self.assertEqual(urshift(-3, 1), 2147483646)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -196,8 +196,8 @@ class YoutubeDL(object):
prefer_insecure: Use HTTP instead of HTTPS to retrieve information. prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube. At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use proxy: URL of the proxy server to use
cn_verification_proxy: URL of the proxy to use for IP address verification geo_verification_proxy: URL of the proxy to use for IP address verification
on Chinese sites. (Experimental) on geo-restricted sites. (Experimental)
socket_timeout: Time to wait for unresponsive hosts, in seconds socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi support, using fridibi
@ -304,6 +304,11 @@ class YoutubeDL(object):
self.params.update(params) self.params.update(params)
self.cache = Cache(self) self.cache = Cache(self)
if self.params.get('cn_verification_proxy') is not None:
self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
if self.params.get('geo_verification_proxy') is None:
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
if params.get('bidi_workaround', False): if params.get('bidi_workaround', False):
try: try:
import pty import pty

View File

@ -382,6 +382,8 @@ def _real_main(argv=None):
'external_downloader_args': external_downloader_args, 'external_downloader_args': external_downloader_args,
'postprocessor_args': postprocessor_args, 'postprocessor_args': postprocessor_args,
'cn_verification_proxy': opts.cn_verification_proxy, 'cn_verification_proxy': opts.cn_verification_proxy,
'geo_verification_proxy': opts.geo_verification_proxy,
} }
with YoutubeDL(ydl_opts) as ydl: with YoutubeDL(ydl_opts) as ydl:

View File

@ -196,6 +196,11 @@ def build_fragments_list(boot_info):
first_frag_number = fragment_run_entry_table[0]['first'] first_frag_number = fragment_run_entry_table[0]['first']
fragments_counter = itertools.count(first_frag_number) fragments_counter = itertools.count(first_frag_number)
for segment, fragments_count in segment_run_table['segment_run']: for segment, fragments_count in segment_run_table['segment_run']:
# In some live HDS streams (for example Rai), `fragments_count` is
# abnormal and causing out-of-memory errors. It's OK to change the
# number of fragments for live streams as they are updated periodically
if fragments_count == 4294967295 and boot_info['live']:
fragments_count = 2
for _ in range(fragments_count): for _ in range(fragments_count):
res.append((segment, next(fragments_counter))) res.append((segment, next(fragments_counter)))
@ -329,7 +334,11 @@ class F4mFD(FragmentFD):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo')) bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url) # From Adobe F4M 3.0 spec:
# The <baseURL> element SHALL be the base URL for all relative
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
# URLs should be relative to the location of the containing document.
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
live = boot_info['live'] live = boot_info['live']
metadata_node = media.find(_add_ns('metadata')) metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None: if metadata_node is not None:

View File

@ -2,23 +2,137 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
smuggle_url, smuggle_url,
update_url_query, update_url_query,
unescapeHTML, unescapeHTML,
extract_attributes,
get_element_by_attribute,
)
from ..compat import (
compat_urlparse,
) )
class AENetworksIE(InfoExtractor): class AENetworksBaseIE(ThePlatformIE):
_THEPLATFORM_KEY = 'crazyjava'
_THEPLATFORM_SECRET = 's3cr3t'
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks' IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
},
'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
'info_dict': {
'id': '71889446852',
},
'playlist_mincount': 5,
}, {
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
'info_dict': {
'id': 'SERIES4317',
'title': 'Atlanta Plastic',
},
'playlist_mincount': 2,
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'only_matching': True
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
'only_matching': True
}, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True
}]
_DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY',
'aetv.com': 'AETV',
'mylifetime.com': 'LIFETIME',
'fyi.tv': 'FYI',
}
def _real_extract(self, url):
domain, show_path, movie_display_id = re.match(self._VALID_URL, url).groups()
display_id = show_path or movie_display_id
webpage = self._download_webpage(url, display_id)
if show_path:
url_parts = show_path.split('/')
url_parts_len = len(url_parts)
if url_parts_len == 1:
entries = []
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
entries.append(self.url_result(
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeriesId', webpage),
self._html_search_meta('aetn:SeriesTitle', webpage))
elif url_parts_len == 2:
entries = []
for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage):
episode_attributes = extract_attributes(episode_item)
episode_url = compat_urlparse.urljoin(
url, episode_attributes['data-canonical'])
entries.append(self.url_result(
episode_url, 'AENetworks',
episode_attributes['data-videoid']))
return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage))
query = {
'mbr': 'true',
'assetTypes': 'medium_video_s3'
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
query['auth'] = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
info.update(self._search_json_ld(webpage, video_id, fatal=False))
media_url = update_url_query(media_url, query)
media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
self._sort_formats(formats)
info.update({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
})
return info
class HistoryTopicIE(AENetworksBaseIE):
IE_NAME = 'history:topic'
IE_DESC = 'History.com Topic'
_VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)(?:/[^/]+(?:/(?P<video_display_id>[^/?#]+))?)?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
'info_dict': { 'info_dict': {
'id': 'g12m5Gyt3fdR', 'id': '40700995724',
'ext': 'mp4', 'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day", 'title': "Bet You Didn't Know: Valentine's Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
@ -31,57 +145,61 @@ class AENetworksIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'expected_warnings': ['JSON-LD'],
}, { }, {
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', 'info_dict':
'info_dict': { {
'id': 'eg47EERs_JsZ', 'id': 'world-war-i-history',
'ext': 'mp4', 'title': 'World War I History',
'title': 'Winter Is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
}, },
'add_ie': ['ThePlatform'], 'playlist_mincount': 24,
}, { }, {
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry', 'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage', 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history',
'only_matching': True 'only_matching': True,
}, { }, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients', 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches',
'only_matching': True 'only_matching': True,
}] }]
def _real_extract(self, url): def theplatform_url_result(self, theplatform_url, video_id, query):
page_type, video_id = re.match(self._VALID_URL, url).groups() return {
webpage = self._download_webpage(url, video_id)
video_url_re = [
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
r"media_url\s*=\s*'([^']+)'"
]
video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
query = {'mbr': 'true'}
if page_type == 'shows':
query['assetTypes'] = 'medium_video_s3'
if 'switch=hds' in video_url:
query['switch'] = 'hls'
info = self._search_json_ld(webpage, video_id, fatal=False)
info.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id,
'url': smuggle_url( 'url': smuggle_url(
update_url_query(video_url, query), update_url_query(theplatform_url, query),
{ {
'sig': { 'sig': {
'key': 'crazyjava', 'key': self._THEPLATFORM_KEY,
'secret': 's3cr3t'}, 'secret': self._THEPLATFORM_SECRET,
},
'force_smil_url': True 'force_smil_url': True
}), }),
}) 'ie_key': 'ThePlatform',
return info }
def _real_extract(self, url):
topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
if video_display_id:
webpage = self._download_webpage(url, video_display_id)
release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
release_url = unescapeHTML(release_url)
return self.theplatform_url_result(
release_url, video_id, {
'mbr': 'true',
'switch': 'hls'
})
else:
webpage = self._download_webpage(url, topic_id)
entries = []
for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
video_attributes = extract_attributes(episode_item)
entries.append(self.theplatform_url_result(
video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true',
'switch': 'hls'
}))
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))

View File

@ -7,6 +7,8 @@ from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_duration,
unified_strdate,
) )
@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
'info_dict': { 'info_dict': {
'id': 'manofsteel', 'id': '5111',
'title': 'Man of Steel',
}, },
'playlist': [ 'playlist': [
{ {
@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor):
'id': 'blackthorn', 'id': 'blackthorn',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
'expected_warnings': ['Unable to download JSON metadata'],
}, {
# json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
'info_dict': {
'id': '15881',
'title': 'Kung Fu Panda 3',
},
'playlist_mincount': 4,
}, { }, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/', 'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True, 'only_matching': True,
@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor):
movie = mobj.group('movie') movie = mobj.group('movie')
uploader_id = mobj.group('company') uploader_id = mobj.group('company')
webpage = self._download_webpage(url, movie)
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
film_data = self._download_json(
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
film_id, fatal=False)
if film_data:
entries = []
for clip in film_data.get('clips', []):
clip_title = clip['title']
formats = []
for version, version_data in clip.get('versions', {}).items():
for size, size_data in version_data.get('sizes', {}).items():
src = size_data.get('src')
if not src:
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
})
self._sort_formats(formats)
entries.append({
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
'formats': formats,
'title': clip_title,
'thumbnail': clip.get('screen') or clip.get('thumb'),
'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
'upload_date': unified_strdate(clip.get('posted')),
'uploader_id': uploader_id,
})
page_data = film_data.get('page', {})
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s): def fix_html(s):

View File

@ -419,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
'info_dict': { 'info_dict': {
'id': 'PL-013263', 'id': 'PL-013263',
'title': 'Areva & Uramin', 'title': 'Areva & Uramin',
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
}, },
'playlist_mincount': 6, 'playlist_mincount': 6,
}, { }, {

View File

@ -585,6 +585,13 @@ class BrightcoveNewIE(InfoExtractor):
'format_id': build_format_id('rtmp'), 'format_id': build_format_id('rtmp'),
}) })
formats.append(f) formats.append(f)
errors = json_data.get('errors')
if not formats and errors:
error = errors[0]
raise ExtractorError(
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}

View File

@ -5,6 +5,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .facebook import FacebookIE
class BuzzFeedIE(InfoExtractor): class BuzzFeedIE(InfoExtractor):
@ -20,11 +21,11 @@ class BuzzFeedIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'aVCR29aE_OQ', 'id': 'aVCR29aE_OQ',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Angry Ram destroys a punching bag..',
'description': 'md5:c59533190ef23fd4458a5e8c8c872345',
'upload_date': '20141024', 'upload_date': '20141024',
'uploader_id': 'Buddhanz1', 'uploader_id': 'Buddhanz1',
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl', 'uploader': 'Angry Ram',
'uploader': 'Buddhanz',
'title': 'Angry Ram destroys a punching bag',
} }
}] }]
}, { }, {
@ -41,13 +42,30 @@ class BuzzFeedIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'mVmBL8B-In0', 'id': 'mVmBL8B-In0',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:Munchkin the Teddy Bear gets her exercise',
'description': 'md5:28faab95cda6e361bcff06ec12fc21d8',
'upload_date': '20141124', 'upload_date': '20141124',
'uploader_id': 'CindysMunchkin', 'uploader_id': 'CindysMunchkin',
'description': 're:© 2014 Munchkin the',
'uploader': 're:^Munchkin the', 'uploader': 're:^Munchkin the',
'title': 're:Munchkin the Teddy Bear gets her exercise',
}, },
}] }]
}, {
'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK',
'info_dict': {
'id': 'the-most-adorable-crash-landing-ever',
'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing',
'description': 'This gosling knows how to stick a landing.',
},
'playlist': [{
'md5': '763ca415512f91ca62e4621086900a23',
'info_dict': {
'id': '971793786185728',
'ext': 'mp4',
'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...',
'uploader': 'Calgary Outdoor Centre-University of Calgary',
},
}],
'add_ie': ['Facebook'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -66,6 +84,10 @@ class BuzzFeedIE(InfoExtractor):
continue continue
entries.append(self.url_result(video['url'])) entries.append(self.url_result(video['url']))
facebook_url = FacebookIE._extract_url(webpage)
if facebook_url:
entries.append(self.url_result(facebook_url))
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': playlist_id, 'id': playlist_id,

View File

@ -80,9 +80,6 @@ class CBSInteractiveIE(ThePlatformIE):
media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId']) media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
formats, subtitles = [], {} formats, subtitles = [], {}
if site == 'cnet':
formats, subtitles = self._extract_theplatform_smil(
self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id)
for (fkey, vid) in vdata['files'].items(): for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']: if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue continue
@ -94,7 +91,7 @@ class CBSInteractiveIE(ThePlatformIE):
subtitles = self._merge_subtitles(subtitles, tp_subtitles) subtitles = self._merge_subtitles(subtitles, tp_subtitles)
self._sort_formats(formats) self._sort_formats(formats)
info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id) info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
info.update({ info.update({
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,

View File

@ -749,10 +749,12 @@ class InfoExtractor(object):
return self._og_search_property('url', html, **kargs) return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
if not isinstance(name, (list, tuple)):
name = [name]
if display_name is None: if display_name is None:
display_name = name display_name = name[0]
return self._html_search_regex( return self._html_search_regex(
self._meta_regex(name), [self._meta_regex(n) for n in name],
html, display_name, fatal=fatal, group='content', **kwargs) html, display_name, fatal=fatal, group='content', **kwargs)
def _dc_search_uploader(self, html): def _dc_search_uploader(self, html):
@ -876,7 +878,11 @@ class InfoExtractor(object):
f['ext'] = determine_ext(f['url']) f['ext'] = determine_ext(f['url'])
if isinstance(field_preference, (list, tuple)): if isinstance(field_preference, (list, tuple)):
return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference) return tuple(
f.get(field)
if f.get(field) is not None
else ('' if field == 'format_id' else -1)
for field in field_preference)
preference = f.get('preference') preference = f.get('preference')
if preference is None: if preference is None:
@ -1723,6 +1729,13 @@ class InfoExtractor(object):
def _mark_watched(self, *args, **kwargs): def _mark_watched(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses') raise NotImplementedError('This method must be implemented by subclasses')
def geo_verification_headers(self):
headers = {}
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
if geo_verification_proxy:
headers['Ytdl-request-proxy'] = geo_verification_proxy
return headers
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@ -0,0 +1,30 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class CTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
'info_dict': {
'id': '706966',
'ext': 'mp4',
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
'upload_date': '20150919',
'timestamp': 1442624700,
},
'expected_warnings': ['HTTP Error 404'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'id': video_id,
'url': '9c9media:ctv_web:%s' % video_id,
'ie_key': 'NineCNineMedia',
}

View File

@ -0,0 +1,65 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import orderedSet
class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
'info_dict': {
'id': '901995',
'ext': 'mp4',
'title': 'Extended: \'That person cannot be me\' Johnson says',
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284,
'upload_date': '20160630',
}
}, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
'info_dict':
{
'id': '1.2966224',
},
'playlist_mincount': 19,
}, {
'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
'info_dict':
{
'id': '1.2876780',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True,
}, {
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
'only_matching': True,
}]
def _real_extract(self, url):
page_id = self._match_id(url)
def ninecninemedia_url_result(clip_id):
return {
'_type': 'url_transparent',
'id': clip_id,
'url': '9c9media:ctvnews_web:%s' % clip_id,
'ie_key': 'NineCNineMedia',
}
if page_id.isdigit():
return ninecninemedia_url_result(page_id)
else:
webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={
'ot': 'example.AjaxPageLayout.ot',
'maxItemsPerPage': 1000000,
})
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
return self.playlist_result(entries, page_id)

View File

@ -20,7 +20,7 @@ from ..utils import (
class DCNIE(InfoExtractor): class DCNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
def _real_extract(self, url): def _real_extract(self, url):
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor):
'is_live': is_live, 'is_live': is_live,
} }
def _extract_video_formats(self, webpage, video_id, entry_protocol): def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
formats = [] formats = []
m3u8_url = self._html_search_regex( format_url_base = 'http' + self._html_search_regex(
r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False) [
if m3u8_url: r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
formats.extend(self._extract_m3u8_formats( r'<a[^>]+href="rtsp(://[^"]+)"'
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None)) ], webpage, 'format url')
# TODO: Current DASH formats are broken - $Time$ pattern in
rtsp_url = self._search_regex( # <SegmentTemplate> not implemented yet
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) # formats.extend(self._extract_mpd_formats(
if rtsp_url: # format_url_base + '/manifest.mpd',
formats.append({ # video_id, mpd_id='dash', fatal=False))
'url': rtsp_url, formats.extend(self._extract_m3u8_formats(
'format_id': 'rtsp', format_url_base + '/playlist.m3u8', video_id, 'mp4',
}) m3u8_entry_protocol, m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
format_url_base + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
return formats return formats
class DCNVideoIE(DCNBaseIE): class DCNVideoIE(DCNBaseIE):
IE_NAME = 'dcn:video' IE_NAME = 'dcn:video'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
'info_dict': 'info_dict':
{ {
@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
} }, {
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE):
class DCNLiveIE(DCNBaseIE): class DCNLiveIE(DCNBaseIE):
IE_NAME = 'dcn:live' IE_NAME = 'dcn:live'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) channel_id = self._match_id(url)
@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE):
class DCNSeasonIE(InfoExtractor): class DCNSeasonIE(InfoExtractor):
IE_NAME = 'dcn:season' IE_NAME = 'dcn:season'
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_TEST = { _TEST = {
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
'info_dict': 'info_dict':

View File

@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor):
'skip': 'Georestricted', 'skip': 'Georestricted',
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1',
webpage)
if mobj is not None:
return mobj.group('url')
@staticmethod @staticmethod
def _handle_error(response): def _handle_error(response):
status = int_or_none(response.get('status', 200)) status = int_or_none(response.get('status', 200))

View File

@ -20,7 +20,10 @@ from .adobetv import (
AdobeTVVideoIE, AdobeTVVideoIE,
) )
from .adultswim import AdultSwimIE from .adultswim import AdultSwimIE
from .aenetworks import AENetworksIE from .aenetworks import (
AENetworksIE,
HistoryTopicIE,
)
from .afreecatv import AfreecaTVIE from .afreecatv import AfreecaTVIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
@ -168,6 +171,8 @@ from .crunchyroll import (
) )
from .cspan import CSpanIE from .cspan import CSpanIE
from .ctsnews import CtsNewsIE from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE from .cultureunplugged import CultureUnpluggedIE
from .cwtv import CWTVIE from .cwtv import CWTVIE
from .dailymail import DailyMailIE from .dailymail import DailyMailIE
@ -276,6 +281,7 @@ from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE from .freevideo import FreeVideoIE
from .funimation import FunimationIE from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .gameinformer import GameInformerIE from .gameinformer import GameInformerIE
from .gamekings import GamekingsIE from .gamekings import GamekingsIE
from .gameone import ( from .gameone import (
@ -320,6 +326,10 @@ from .hotnewhiphop import HotNewHipHopIE
from .hotstar import HotStarIE from .hotstar import HotStarIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE from .howstuffworks import HowStuffWorksIE
from .hrti import (
HRTiIE,
HRTiPlaylistIE,
)
from .huffpost import HuffPostIE from .huffpost import HuffPostIE
from .hypem import HypemIE from .hypem import HypemIE
from .iconosquare import IconosquareIE from .iconosquare import IconosquareIE
@ -422,6 +432,7 @@ from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE from .makertv import MakerTVIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .meta import METAIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mgoon import MgoonIE from .mgoon import MgoonIE
@ -454,6 +465,7 @@ from .motherless import MotherlessIE
from .motorsport import MotorsportIE from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE from .movieclips import MovieClipsIE
from .moviezine import MoviezineIE from .moviezine import MoviezineIE
from .msn import MSNIE
from .mtv import ( from .mtv import (
MTVIE, MTVIE,
MTVServicesEmbeddedIE, MTVServicesEmbeddedIE,
@ -521,6 +533,7 @@ from .nick import (
NickDeIE, NickDeIE,
) )
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .noco import NocoIE from .noco import NocoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
@ -606,6 +619,7 @@ from .pluralsight import (
PluralsightCourseIE, PluralsightCourseIE,
) )
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
from .polskieradio import PolskieRadioIE
from .porn91 import Porn91IE from .porn91 import Porn91IE
from .pornhd import PornHdIE from .pornhd import PornHdIE
from .pornhub import ( from .pornhub import (
@ -704,10 +718,12 @@ from .shahid import ShahidIE
from .shared import SharedIE from .shared import SharedIE
from .sharesix import ShareSixIE from .sharesix import ShareSixIE
from .sina import SinaIE from .sina import SinaIE
from .sixplay import SixPlayIE
from .skynewsarabia import ( from .skynewsarabia import (
SkyNewsArabiaIE, SkyNewsArabiaIE,
SkyNewsArabiaArticleIE, SkyNewsArabiaArticleIE,
) )
from .skysports import SkySportsIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .slutload import SlutloadIE from .slutload import SlutloadIE
from .smotri import ( from .smotri import (
@ -889,6 +905,7 @@ from .udn import UDNEmbedIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .urort import UrortIE from .urort import UrortIE
from .urplay import URPlayIE
from .usatoday import USATodayIE from .usatoday import USATodayIE
from .ustream import UstreamIE, UstreamChannelIE from .ustream import UstreamIE, UstreamChannelIE
from .ustudio import ( from .ustudio import (
@ -915,6 +932,7 @@ from .vice import (
ViceIE, ViceIE,
ViceShowIE, ViceShowIE,
) )
from .vidbit import VidbitIE
from .viddler import ViddlerIE from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
@ -1048,6 +1066,7 @@ from .youtube import (
YoutubeSearchDateIE, YoutubeSearchDateIE,
YoutubeSearchIE, YoutubeSearchIE,
YoutubeSearchURLIE, YoutubeSearchURLIE,
YoutubeSharedVideoIE,
YoutubeShowIE, YoutubeShowIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,

View File

@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
if mobj is not None:
return mobj.group('url')
# Facebook API embed
# see https://developers.facebook.com/docs/plugins/embedded-video-player
mobj = re.search(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage)
if mobj is not None:
return mobj.group('url')
def _login(self): def _login(self):
(useremail, password) = self._get_login_info() (useremail, password) = self._get_login_info()
if useremail is None: if useremail is None:

View File

@ -0,0 +1,35 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .ooyala import OoyalaIE
class FusionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
'info_dict': {
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
'ext': 'mp4',
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
'duration': 140.0,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://fusion.net/video/201781',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex(
r'data-video-id=(["\'])(?P<code>.+?)\1',
webpage, 'ooyala code', group='code')
return OoyalaIE._build_url_result(ooyala_code)

View File

@ -64,6 +64,9 @@ from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .vessel import VesselIE from .vessel import VesselIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -920,6 +923,24 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Kaltura'], 'add_ie': ['Kaltura'],
}, },
{
# Kaltura embedded via quoted entry_id
'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
'info_dict': {
'id': '0_utuok90b',
'ext': 'mp4',
'title': '06_matthew_brender_raj_dutt',
'timestamp': 1466638791,
'upload_date': '20160622',
},
'add_ie': ['Kaltura'],
'expected_warnings': [
'Could not send HEAD request'
],
'params': {
'skip_download': True,
}
},
# Eagle.Platform embed (generic URL) # Eagle.Platform embed (generic URL)
{ {
'url': 'http://lenta.ru/news/2015/03/06/navalny/', 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@ -1091,12 +1112,17 @@ class GenericIE(InfoExtractor):
# Dailymotion Cloud video # Dailymotion Cloud video
{ {
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
'md5': '49444254273501a64675a7e68c502681', 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
'info_dict': { 'info_dict': {
'id': '5585de919473990de4bee11b', 'id': 'x2uy8t3',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Le débat', 'title': 'Sauvons les abeilles ! - Le débat',
'description': 'md5:d9082128b1c5277987825d684939ca26',
'thumbnail': 're:^https?://.*\.jpe?g$', 'thumbnail': 're:^https?://.*\.jpe?g$',
'timestamp': 1434970506,
'upload_date': '20150622',
'uploader': 'Public Sénat',
'uploader_id': 'xa9gza',
} }
}, },
# OnionStudios embed # OnionStudios embed
@ -1220,6 +1246,70 @@ class GenericIE(InfoExtractor):
'uploader': 'www.hudl.com', 'uploader': 'www.hudl.com',
}, },
}, },
# twitter:player embed
{
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
'md5': 'a3e0df96369831de324f0778e126653c',
'info_dict': {
'id': '4909620399001',
'ext': 'mp4',
'title': 'What Do Black Holes Sound Like?',
'description': 'what do black holes sound like',
'upload_date': '20160524',
'uploader_id': '29913724001',
'timestamp': 1464107587,
'uploader': 'TheAtlantic',
},
'add_ie': ['BrightcoveLegacy'],
},
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
'md5': 'fbcde74f534176ecb015849146dd3aee',
'info_dict': {
'id': '599637780109885',
'ext': 'mp4',
'title': 'Facebook video #599637780109885',
},
},
# Facebook API embed
{
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
'md5': 'a47372ee61b39a7b90287094d447d94e',
'info_dict': {
'id': '10153467542406923',
'ext': 'mp4',
'title': 'Facebook video #10153467542406923',
},
},
# Wordpress "YouTube Video Importer" plugin
{
'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
'md5': 'd16797741b560b485194eddda8121b48',
'info_dict': {
'id': 'HNTXWDXV9Is',
'ext': 'mp4',
'title': 'Blue Devils Drumline Stanford lot 2016',
'upload_date': '20160627',
'uploader_id': 'GENOCIDE8GENERAL10',
'uploader': 'cylus cyrus',
},
},
{
# video stored on custom kaltura server
'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
'md5': '537617d06e64dfed891fa1593c4b30cc',
'info_dict': {
'id': '0_1iotm5bh',
'ext': 'mp4',
'title': 'Elecciones británicas: 5 lecciones para Rajoy',
'description': 'md5:435a89d68b9760b92ce67ed227055f16',
'uploader_id': 'videos.expansion@el-mundo.net',
'upload_date': '20150429',
'timestamp': 1430303472,
},
'add_ie': ['Kaltura'],
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -1576,6 +1666,13 @@ class GenericIE(InfoExtractor):
if matches: if matches:
return _playlist_from_matches(matches, lambda m: unescapeHTML(m)) return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
# Look for Wordpress "YouTube Video Importer" plugin
matches = re.findall(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
if matches:
return _playlist_from_matches(matches, lambda m: m[-1])
# Look for embedded Dailymotion player # Look for embedded Dailymotion player
matches = re.findall( matches = re.findall(
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
@ -1718,10 +1815,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url')) return self.url_result(mobj.group('url'))
# Look for embedded Facebook player # Look for embedded Facebook player
mobj = re.search( facebook_url = FacebookIE._extract_url(webpage)
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) if facebook_url is not None:
if mobj is not None: return self.url_result(facebook_url, 'Facebook')
return self.url_result(mobj.group('url'), 'Facebook')
# Look for embedded VK player # Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
@ -1903,18 +1999,14 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks') return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds # Look for Kaltura embeds
mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or kaltura_url = KalturaIE._extract_url(webpage)
re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage)) if kaltura_url:
if mobj is not None: return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
return self.url_result(smuggle_url(
'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
{'source_url': url}), 'Kaltura')
# Look for Eagle.Platform embeds # Look for Eagle.Platform embeds
mobj = re.search( eagleplatform_url = EaglePlatformIE._extract_url(webpage)
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage) if eagleplatform_url:
if mobj is not None: return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
return self.url_result(mobj.group('url'), 'EaglePlatform')
# Look for ClipYou (uses Eagle.Platform) embeds # Look for ClipYou (uses Eagle.Platform) embeds
mobj = re.search( mobj = re.search(
@ -2060,6 +2152,11 @@ class GenericIE(InfoExtractor):
'uploader': video_uploader, 'uploader': video_uploader,
} }
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
if embed_url:
return self.url_result(embed_url)
def check_video(vurl): def check_video(vurl):
if YoutubeIE.suitable(vurl): if YoutubeIE.suitable(vurl):
return True return True

View File

@ -0,0 +1,202 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
parse_age_limit,
sanitized_Request,
try_get,
)
class HRTiBaseIE(InfoExtractor):
"""
Base Information Extractor for Croatian Radiotelevision
video on demand site https://hrti.hrt.hr
Reverse engineered from the JavaScript app in app.min.js
"""
_NETRC_MACHINE = 'hrti'
_APP_LANGUAGE = 'hr'
_APP_VERSION = '1.1'
_APP_PUBLICATION_ID = 'all_in_one'
_API_URL = 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json'
def _initialize_api(self):
init_data = {
'application_publication_id': self._APP_PUBLICATION_ID
}
uuid = self._download_json(
self._API_URL, None, note='Downloading uuid',
errnote='Unable to download uuid',
data=json.dumps(init_data).encode('utf-8'))['uuid']
app_data = {
'uuid': uuid,
'application_publication_id': self._APP_PUBLICATION_ID,
'application_version': self._APP_VERSION
}
req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
req.get_method = lambda: 'PUT'
resources = self._download_json(
req, None, note='Downloading session information',
errnote='Unable to download session information')
self._session_id = resources['session_id']
modules = resources['modules']
self._search_url = modules['vod_catalog']['resources']['search']['uri'].format(
language=self._APP_LANGUAGE,
application_id=self._APP_PUBLICATION_ID)
self._login_url = (modules['user']['resources']['login']['uri'] +
'/format/json').format(session_id=self._session_id)
self._logout_url = modules['user']['resources']['logout']['uri']
def _login(self):
(username, password) = self._get_login_info()
# TODO: figure out authentication with cookies
if username is None or password is None:
self.raise_login_required()
auth_data = {
'username': username,
'password': password,
}
try:
auth_info = self._download_json(
self._login_url, None, note='Logging in', errnote='Unable to log in',
data=json.dumps(auth_data).encode('utf-8'))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
else:
raise
error_message = auth_info.get('error', {}).get('message')
if error_message:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error_message),
expected=True)
self._token = auth_info['secure_streaming_token']
def _real_initialize(self):
self._initialize_api()
self._login()
class HRTiIE(HRTiBaseIE):
_VALID_URL = r'''(?x)
(?:
hrti:(?P<short_id>[0-9]+)|
https?://
hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
)
'''
_TESTS = [{
'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd',
'info_dict': {
'id': '2181385',
'display_id': 'republika-dokumentarna-serija-16-hd',
'ext': 'mp4',
'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)',
'description': 'md5:48af85f620e8e0e1df4096270568544f',
'duration': 2922,
'view_count': int,
'average_rating': int,
'episode_number': int,
'season_number': int,
'age_limit': 12,
},
'skip': 'Requires account credentials',
}, {
'url': 'https://hrti.hrt.hr/#/video/show/2181385/',
'only_matching': True,
}, {
'url': 'hrti:2181385',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('short_id') or mobj.group('id')
display_id = mobj.group('display_id') or video_id
video = self._download_json(
'%s/video_id/%s/format/json' % (self._search_url, video_id),
display_id, 'Downloading video metadata JSON')['video'][0]
title_info = video['title']
title = title_info['title_long']
movie = video['video_assets']['movie'][0]
m3u8_url = movie['url'].format(TOKEN=self._token)
formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
description = clean_html(title_info.get('summary_long'))
age_limit = parse_age_limit(video.get('parental_control', {}).get('rating'))
view_count = int_or_none(video.get('views'))
average_rating = int_or_none(video.get('user_rating'))
duration = int_or_none(movie.get('duration'))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'average_rating': average_rating,
'age_limit': age_limit,
'formats': formats,
}
class HRTiPlaylistIE(HRTiBaseIE):
_VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
_TESTS = [{
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
'info_dict': {
'id': '212',
'title': 'ekumena',
},
'playlist_mincount': 8,
'skip': 'Requires account credentials',
}, {
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
category_id = mobj.group('id')
display_id = mobj.group('display_id') or category_id
response = self._download_json(
'%s/category_id/%s/format/json' % (self._search_url, category_id),
display_id, 'Downloading video metadata JSON')
video_ids = try_get(
response, lambda x: x['video_listings'][0]['alternatives'][0]['list'],
list) or [video['id'] for video in response.get('videos', []) if video.get('id')]
entries = [self.url_result('hrti:%s' % video_id) for video_id in video_ids]
return self.playlist_result(entries, category_id, display_id)

View File

@ -3,28 +3,22 @@ from __future__ import unicode_literals
import hashlib import hashlib
import itertools import itertools
import math
import os
import random
import re import re
import time import time
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_parse_qs,
compat_str, compat_str,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
) )
from ..utils import ( from ..utils import (
clean_html,
decode_packed_codes, decode_packed_codes,
get_element_by_id,
get_element_by_attribute,
ExtractorError, ExtractorError,
ohdave_rsa_encrypt, ohdave_rsa_encrypt,
remove_start, remove_start,
sanitized_Request,
urlencode_postdata,
url_basename,
) )
@ -171,70 +165,21 @@ class IqiyiIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.iqiyi.com/v_19rrojlavg.html', 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
'md5': '2cb594dc2781e6c941a110d8f358118b', # MD5 checksum differs on my machine and Travis CI
'info_dict': { 'info_dict': {
'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
'ext': 'mp4',
'title': '美国德州空中惊现奇异云团 酷似UFO', 'title': '美国德州空中惊现奇异云团 酷似UFO',
'ext': 'f4v',
} }
}, { }, {
'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
'md5': '667171934041350c5de3f5015f7f1152',
'info_dict': { 'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb', 'id': 'e3f585b550a280af23c98b6cb2be19fb',
'title': '名侦探柯南第752集', 'ext': 'mp4',
}, 'title': '名侦探柯南 国语版第752集 迫近灰原秘密的黑影 下篇',
'playlist': [{
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}, {
'info_dict': {
'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
'ext': 'f4v',
'title': '名侦探柯南第752集',
},
}],
'params': {
'skip_download': True,
}, },
'skip': 'Geo-restricted to China',
}, { }, {
'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
'only_matching': True, 'only_matching': True,
@ -250,22 +195,10 @@ class IqiyiIE(InfoExtractor):
'url': 'http://www.iqiyi.com/v_19rrny4w8w.html', 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
'info_dict': { 'info_dict': {
'id': 'f3cf468b39dddb30d676f89a91200dc1', 'id': 'f3cf468b39dddb30d676f89a91200dc1',
'ext': 'mp4',
'title': '泰坦尼克号', 'title': '泰坦尼克号',
}, },
'playlist': [{ 'skip': 'Geo-restricted to China',
'info_dict': {
'id': 'f3cf468b39dddb30d676f89a91200dc1_part1',
'ext': 'f4v',
'title': '泰坦尼克号',
},
}, {
'info_dict': {
'id': 'f3cf468b39dddb30d676f89a91200dc1_part2',
'ext': 'f4v',
'title': '泰坦尼克号',
},
}],
'expected_warnings': ['Needs a VIP account for full video'],
}, { }, {
'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html', 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
'info_dict': { 'info_dict': {
@ -278,20 +211,15 @@ class IqiyiIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_FORMATS_MAP = [ _FORMATS_MAP = {
('1', 'h6'), '96': 1, # 216p, 240p
('2', 'h5'), '1': 2, # 336p, 360p
('3', 'h4'), '2': 3, # 480p, 504p
('4', 'h3'), '21': 4, # 504p
('5', 'h2'), '4': 5, # 720p
('10', 'h1'), '17': 5, # 720p
] '5': 6, # 1072p, 1080p
'18': 7, # 1080p
AUTH_API_ERRORS = {
# No preview available (不允许试看鉴权失败)
'Q00505': 'This video requires a VIP account',
# End of preview time (试看结束鉴权失败)
'Q00506': 'Needs a VIP account for full video',
} }
def _real_initialize(self): def _real_initialize(self):
@ -352,177 +280,23 @@ class IqiyiIE(InfoExtractor):
return True return True
def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning): def get_raw_data(self, tvid, video_id):
auth_params = { tm = int(time.time() * 1000)
# version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
'version': '2.0', key = 'd5fb4bd9d50c4be6948c97edd7254b0e'
'platform': 'b6c13e26323c537d', sc = md5_text(compat_str(tm) + key + tvid)
'aid': tvid, params = {
'tvid': tvid, 'tvid': tvid,
'uid': '',
'deviceId': _uuid,
'playType': 'main', # XXX: always main?
'filename': os.path.splitext(url_basename(api_video_url))[0],
}
qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query)
for key, val in qd_items.items():
auth_params[key] = val[0]
auth_req = sanitized_Request(
'http://api.vip.iqiyi.com/services/ckn.action',
urlencode_postdata(auth_params))
# iQiyi server throws HTTP 405 error without the following header
auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
auth_result = self._download_json(
auth_req, video_id,
note='Downloading video authentication JSON',
errnote='Unable to download video authentication JSON')
code = auth_result.get('code')
msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code
if code == 'Q00506':
if do_report_warning:
self.report_warning(msg)
return False
if 'data' not in auth_result:
if msg is not None:
raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True)
raise ExtractorError('Unexpected error from Iqiyi auth API')
return auth_result['data']
def construct_video_urls(self, data, video_id, _uuid, tvid):
def do_xor(x, y):
a = y % 3
if a == 1:
return x ^ 121
if a == 2:
return x ^ 72
return x ^ 103
def get_encode_code(l):
a = 0
b = l.split('-')
c = len(b)
s = ''
for i in range(c - 1, -1, -1):
a = do_xor(int(b[c - i - 1], 16), i)
s += chr(a)
return s[::-1]
def get_path_key(x, format_id, segment_index):
mg = ')(*&^flash@#$%a'
tm = self._download_json(
'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
)['t']
t = str(int(math.floor(int(tm) / (600.0))))
return md5_text(t + mg + x)
video_urls_dict = {}
need_vip_warning_report = True
for format_item in data['vp']['tkl'][0]['vs']:
if 0 < int(format_item['bid']) <= 10:
format_id = self.get_format(format_item['bid'])
else:
continue
video_urls = []
video_urls_info = format_item['fs']
if not format_item['fs'][0]['l'].startswith('/'):
t = get_encode_code(format_item['fs'][0]['l'])
if t.endswith('mp4'):
video_urls_info = format_item['flvs']
for segment_index, segment in enumerate(video_urls_info):
vl = segment['l']
if not vl.startswith('/'):
vl = get_encode_code(vl)
is_vip_video = '/vip/' in vl
filesize = segment['b']
base_url = data['vp']['du'].split('/')
if not is_vip_video:
key = get_path_key(
vl.split('/')[-1].split('.')[0], format_id, segment_index)
base_url.insert(-1, key)
base_url = '/'.join(base_url)
param = {
'su': _uuid,
'qyid': uuid.uuid4().hex,
'client': '',
'z': '',
'bt': '',
'ct': '',
'tn': str(int(time.time()))
}
api_video_url = base_url + vl
if is_vip_video:
api_video_url = api_video_url.replace('.f4v', '.hml')
auth_result = self._authenticate_vip_video(
api_video_url, video_id, tvid, _uuid, need_vip_warning_report)
if auth_result is False:
need_vip_warning_report = False
break
param.update({
't': auth_result['t'],
# cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
'cid': 'afbe8fd3d73448c9',
'vid': video_id,
'QY00001': auth_result['u'],
})
api_video_url += '?' if '?' not in api_video_url else '&'
api_video_url += compat_urllib_parse_urlencode(param)
js = self._download_json(
api_video_url, video_id,
note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
video_url = js['l']
video_urls.append(
(video_url, filesize))
video_urls_dict[format_id] = video_urls
return video_urls_dict
def get_format(self, bid):
matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
return matched_format_ids[0] if len(matched_format_ids) else None
def get_bid(self, format_id):
matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
return matched_bids[0] if len(matched_bids) else None
def get_raw_data(self, tvid, video_id, enc_key, _uuid):
tm = str(int(time.time()))
tail = tm + tvid
param = {
'key': 'fvip',
'src': md5_text('youtube-dl'),
'tvId': tvid,
'vid': video_id, 'vid': video_id,
'vinfo': 1, 'src': '76f90cbd92f94a2e925d83e8ccd22cb7',
'tm': tm, 'sc': sc,
'enc': md5_text(enc_key + tail), 't': tm,
'qyid': _uuid,
'tn': random.random(),
# In iQiyi's flash player, um is set to 1 if there's a logged user
# Some 1080P formats are only available with a logged user.
# Here force um=1 to trick the iQiyi server
'um': 1,
'authkey': md5_text(md5_text('') + tail),
'k_tag': 1,
} }
api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ return self._download_json(
compat_urllib_parse_urlencode(param) 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id),
raw_data = self._download_json(api_url, video_id) video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='),
return raw_data query=params, headers=self.geo_verification_headers())
def get_enc_key(self, video_id):
# TODO: automatic key extraction
# last update at 2016-01-22 for Zombie::bite
enc_key = '4a1caba4b4465345366f28da7c117d20'
return enc_key
def _extract_playlist(self, webpage): def _extract_playlist(self, webpage):
PAGE_SIZE = 50 PAGE_SIZE = 50
@ -571,58 +345,41 @@ class IqiyiIE(InfoExtractor):
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
video_id = self._search_regex( video_id = self._search_regex(
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
_uuid = uuid.uuid4().hex
enc_key = self.get_enc_key(video_id) formats = []
for _ in range(5):
raw_data = self.get_raw_data(tvid, video_id)
raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) if raw_data['code'] != 'A00000':
if raw_data['code'] == 'A00111':
self.raise_geo_restricted()
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
if raw_data['code'] != 'A000000': data = raw_data['data']
raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
data = raw_data['data'] for stream in data['vidl']:
if 'm3utx' not in stream:
continue
vd = compat_str(stream['vd'])
formats.append({
'url': stream['m3utx'],
'format_id': vd,
'ext': 'mp4',
'preference': self._FORMATS_MAP.get(vd, -1),
'protocol': 'm3u8_native',
})
title = data['vi']['vn'] if formats:
break
# generate video_urls_dict self._sleep(5, video_id)
video_urls_dict = self.construct_video_urls(
data, video_id, _uuid, tvid)
# construct info self._sort_formats(formats)
entries = [] title = (get_element_by_id('widget-videotitle', webpage) or
for format_id in video_urls_dict: clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)))
video_urls = video_urls_dict[format_id]
for i, video_url_info in enumerate(video_urls):
if len(entries) < i + 1:
entries.append({'formats': []})
entries[i]['formats'].append(
{
'url': video_url_info[0],
'filesize': video_url_info[-1],
'format_id': format_id,
'preference': int(self.get_bid(format_id))
}
)
for i in range(len(entries)): return {
self._sort_formats(entries[i]['formats']) 'id': video_id,
entries[i].update( 'title': title,
{ 'formats': formats,
'id': '%s_part%d' % (video_id, i + 1), }
'title': title,
}
)
if len(entries) > 1:
info = {
'_type': 'multi_video',
'id': video_id,
'title': title,
'entries': entries,
}
else:
info = entries[0]
info['id'] = video_id
info['title'] = title
return info

View File

@ -6,7 +6,6 @@ import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse, compat_urlparse,
compat_parse_qs, compat_parse_qs,
) )
@ -15,6 +14,7 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
unsmuggle_url, unsmuggle_url,
smuggle_url,
) )
@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor):
)(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))? )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
) )
''' '''
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?' _SERVICE_URL = 'http://cdnapi.kaltura.com'
_SERVICE_BASE = '/api_v3/index.php'
_TESTS = [ _TESTS = [
{ {
'url': 'kaltura:269692:1_1jc2y3e4', 'url': 'kaltura:269692:1_1jc2y3e4',
@ -64,16 +65,50 @@ class KalturaIE(InfoExtractor):
} }
] ]
def _kaltura_api_call(self, video_id, actions, *args, **kwargs): @staticmethod
def _extract_url(webpage):
mobj = (
re.search(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
""", webpage) or
re.search(
r'''(?xs)
(?P<q1>["\'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
(?P=q1).*?
(?:
entry_?[Ii]d|
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
)\s*:\s*
(?P<q3>["\'])(?P<id>.+?)(?P=q3)
''', webpage))
if mobj:
embed_info = mobj.groupdict()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
service_url = re.search(
r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage)
if service_url:
url = smuggle_url(url, {'service_url': service_url.group(1)})
return url
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0] params = actions[0]
if len(actions) > 1: if len(actions) > 1:
for i, a in enumerate(actions[1:], start=1): for i, a in enumerate(actions[1:], start=1):
for k, v in a.items(): for k, v in a.items():
params['%d:%s' % (i, k)] = v params['%d:%s' % (i, k)] = v
query = compat_urllib_parse_urlencode(params) data = self._download_json(
url = self._API_BASE + query (service_url or self._SERVICE_URL) + self._SERVICE_BASE,
data = self._download_json(url, video_id, *args, **kwargs) video_id, query=params, *args, **kwargs)
status = data if len(actions) == 1 else data[0] status = data if len(actions) == 1 else data[0]
if status.get('objectType') == 'KalturaAPIException': if status.get('objectType') == 'KalturaAPIException':
@ -82,7 +117,7 @@ class KalturaIE(InfoExtractor):
return data return data
def _get_kaltura_signature(self, video_id, partner_id): def _get_kaltura_signature(self, video_id, partner_id, service_url=None):
actions = [{ actions = [{
'apiVersion': '3.1', 'apiVersion': '3.1',
'expiry': 86400, 'expiry': 86400,
@ -92,10 +127,10 @@ class KalturaIE(InfoExtractor):
'widgetId': '_%s' % partner_id, 'widgetId': '_%s' % partner_id,
}] }]
return self._kaltura_api_call( return self._kaltura_api_call(
video_id, actions, note='Downloading Kaltura signature')['ks'] video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
def _get_video_info(self, video_id, partner_id): def _get_video_info(self, video_id, partner_id, service_url=None):
signature = self._get_kaltura_signature(video_id, partner_id) signature = self._get_kaltura_signature(video_id, partner_id, service_url)
actions = [ actions = [
{ {
'action': 'null', 'action': 'null',
@ -118,7 +153,7 @@ class KalturaIE(InfoExtractor):
}, },
] ]
return self._kaltura_api_call( return self._kaltura_api_call(
video_id, actions, note='Downloading video info JSON') video_id, actions, service_url, note='Downloading video info JSON')
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
@ -127,7 +162,7 @@ class KalturaIE(InfoExtractor):
partner_id, entry_id = mobj.group('partner_id', 'id') partner_id, entry_id = mobj.group('partner_id', 'id')
ks = None ks = None
if partner_id and entry_id: if partner_id and entry_id:
info, flavor_assets = self._get_video_info(entry_id, partner_id) info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
else: else:
path, query = mobj.group('path', 'query') path, query = mobj.group('path', 'query')
if not path and not query: if not path and not query:
@ -175,12 +210,17 @@ class KalturaIE(InfoExtractor):
unsigned_url += '?referrer=%s' % referrer unsigned_url += '?referrer=%s' % referrer
return unsigned_url return unsigned_url
data_url = info['dataUrl']
if '/flvclipper/' in data_url:
data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
formats = [] formats = []
for f in flavor_assets: for f in flavor_assets:
# Continue if asset is not ready # Continue if asset is not ready
if f['status'] != 2: if f['status'] != 2:
continue continue
video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id'])) video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
formats.append({ formats.append({
'format_id': '%(fileExt)s-%(bitrate)s' % f, 'format_id': '%(fileExt)s-%(bitrate)s' % f,
'ext': f.get('fileExt'), 'ext': f.get('fileExt'),
@ -193,9 +233,12 @@ class KalturaIE(InfoExtractor):
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'url': video_url, 'url': video_url,
}) })
m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp')) if '/playManifest/' in data_url:
formats.extend(self._extract_m3u8_formats( m3u8_url = sign_url(data_url.replace(
m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) 'format/url', 'format/applehttp'))
formats.extend(self._extract_m3u8_formats(
m3u8_url, entry_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._check_formats(formats, entry_id) self._check_formats(formats, entry_id)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -26,11 +26,6 @@ class KuwoBaseIE(InfoExtractor):
def _get_formats(self, song_id, tolerate_ip_deny=False): def _get_formats(self, song_id, tolerate_ip_deny=False):
formats = [] formats = []
for file_format in self._FORMATS: for file_format in self._FORMATS:
headers = {}
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
headers['Ytdl-request-proxy'] = cn_verification_proxy
query = { query = {
'format': file_format['ext'], 'format': file_format['ext'],
'br': file_format.get('br', ''), 'br': file_format.get('br', ''),
@ -42,7 +37,7 @@ class KuwoBaseIE(InfoExtractor):
song_url = self._download_webpage( song_url = self._download_webpage(
'http://antiserver.kuwo.cn/anti.s', 'http://antiserver.kuwo.cn/anti.s',
song_id, note='Download %s url info' % file_format['format'], song_id, note='Download %s url info' % file_format['format'],
query=query, headers=headers, query=query, headers=self.geo_verification_headers(),
) )
if song_url == 'IPDeny' and not tolerate_ip_deny: if song_url == 'IPDeny' and not tolerate_ip_deny:

View File

@ -1,60 +1,65 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_duration, js_to_json,
smuggle_url,
) )
class LA7IE(InfoExtractor): class LA7IE(InfoExtractor):
IE_NAME = 'la7.tv' IE_NAME = 'la7.it'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)(https?://)?(?:
https?://(?:www\.)?la7\.tv/ (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
(?: tg\.la7\.it/repliche-tgla7\?id=
richplayer/\?assetid=| )(?P<id>.+)'''
\?contentId=
)
(?P<id>[0-9]+)'''
_TEST = { _TESTS = [{
'url': 'http://www.la7.tv/richplayer/?assetid=50355319', # 'src' is a plain URL
'md5': 'ec7d1f0224d20ba293ab56cf2259651f', 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
'info_dict': { 'info_dict': {
'id': '50355319', 'id': 'inccool8-02-10-2015-163722',
'ext': 'mp4', 'ext': 'mp4',
'title': 'IL DIVO', 'title': 'Inc.Cool8',
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci', 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
'duration': 6254, 'thumbnail': 're:^https?://.*',
'uploader_id': 'kdla7pillole@iltrovatore.it',
'timestamp': 1443814869,
'upload_date': '20151002',
}, },
'skip': 'Blocked in the US', }, {
} # 'src' is a dictionary
'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
'md5': '6b0d8888d286e39870208dfeceaf456b',
'info_dict': {
'id': '189080',
'ext': 'mp4',
'title': 'TG LA7',
},
}, {
'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
doc = self._download_xml(xml_url, video_id)
video_title = doc.find('title').text webpage = self._download_webpage(url, video_id)
description = doc.find('description').text
duration = parse_duration(doc.find('duration').text)
thumbnail = doc.find('img').text
view_count = int(doc.find('views').text)
prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:') player_data = self._parse_json(
self._search_regex(r'videoLa7\(({[^;]+})\);', webpage, 'player data'),
formats = [{ video_id, transform_source=js_to_json)
'format': vnode.find('quality').text,
'tbr': int(vnode.find('quality').text),
'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
} for vnode in doc.findall('.//videos/video')]
self._sort_formats(formats)
return { return {
'_type': 'url_transparent',
'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
'service_url': 'http://kdam.iltrovatore.it',
}),
'id': video_id, 'id': video_id,
'title': video_title, 'title': player_data['title'],
'description': description, 'description': self._og_search_description(webpage, default=None),
'thumbnail': thumbnail, 'thumbnail': player_data.get('poster'),
'duration': duration, 'ie_key': 'Kaltura',
'formats': formats,
'view_count': view_count,
} }

View File

@ -20,9 +20,9 @@ from ..utils import (
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_iso8601, parse_iso8601,
sanitized_Request,
str_or_none, str_or_none,
url_basename, url_basename,
urshift,
) )
@ -74,15 +74,11 @@ class LeIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
def ror(self, param1, param2): def ror(self, param1, param2):
_loc3_ = 0 _loc3_ = 0
while _loc3_ < param2: while _loc3_ < param2:
param1 = self.urshift(param1, 1) + ((param1 & 1) << 31) param1 = urshift(param1, 1) + ((param1 & 1) << 31)
_loc3_ += 1 _loc3_ += 1
return param1 return param1
@ -124,16 +120,11 @@ class LeIE(InfoExtractor):
'tkey': self.calc_time_key(int(time.time())), 'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.le.com' 'domain': 'www.le.com'
} }
play_json_req = sanitized_Request(
'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse_urlencode(params)
)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
play_json = self._download_json( play_json = self._download_json(
play_json_req, 'http://api.le.com/mms/out/video/playJson',
media_id, 'Downloading playJson data') media_id, 'Downloading playJson data', query=params,
headers=self.geo_verification_headers())
# Check for errors # Check for errors
playstatus = play_json['playstatus'] playstatus = play_json['playstatus']

View File

@ -1,8 +1,6 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -23,34 +21,5 @@ class M6IE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') return self.url_result('6play:%s' % video_id, 'SixPlay', video_id)
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
'Downloading video RSS')
title = rss.find('./channel/item/title').text
description = rss.find('./channel/item/description').text
thumbnail = rss.find('./channel/item/visuel_clip_big').text
duration = int(rss.find('./channel/item/duration').text)
view_count = int(rss.find('./channel/item/nombre_vues').text)
formats = []
for format_id in ['lq', 'sd', 'hq', 'hd']:
video_url = rss.find('./channel/item/url_video_%s' % format_id)
if video_url is None:
continue
formats.append({
'url': video_url.text,
'format_id': format_id,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'formats': formats,
}

View File

@ -0,0 +1,73 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .pladform import PladformIE
from ..utils import (
unescapeHTML,
int_or_none,
ExtractorError,
)
class METAIE(InfoExtractor):
_VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://video.meta.ua/5502115.video',
'md5': '71b6f3ee274bef16f1ab410f7f56b476',
'info_dict': {
'id': '5502115',
'ext': 'mp4',
'title': 'Sony Xperia Z camera test [HQ]',
'description': 'Xperia Z shoots video in FullHD HDR.',
'uploader_id': 'nomobile',
'uploader': 'CHЁZA.TV',
'upload_date': '20130211',
},
'add_ie': ['Youtube'],
}, {
'url': 'http://video.meta.ua/iframe/5502115',
'only_matching': True,
}, {
# pladform embed
'url': 'http://video.meta.ua/7121015.video',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
st_html5 = self._search_regex(
r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None)
if st_html5:
# uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js
json_str = ''
for i in range(0, len(st_html5), 3):
json_str += '&#x0%s;' % st_html5[i:i + 3]
uppod_data = self._parse_json(unescapeHTML(json_str), video_id)
error = uppod_data.get('customnotfound')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
video_url = uppod_data['file']
info = {
'id': video_id,
'url': video_url,
'title': uppod_data.get('comment') or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage),
'duration': int_or_none(self._og_search_property(
'video:duration', webpage, default=None)),
}
if 'youtube.com/' in video_url:
info.update({
'_type': 'url_transparent',
'ie_key': 'Youtube',
})
return info
pladform_url = PladformIE._extract_url(webpage)
if pladform_url:
return self.url_result(pladform_url)

View File

@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor):
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
like_count = parse_count(self._search_regex( like_count = parse_count(self._search_regex(
r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)', r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)',
webpage, 'like count', fatal=False)) webpage, 'like count', default=None))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'], r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False)) webpage, 'play count', default=None))
return { return {
'id': track_id, 'id': track_id,

122
youtube_dl/extractor/msn.py Normal file
View File

@ -0,0 +1,122 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
unescapeHTML,
)
class MSNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE',
'md5': '8442f66c116cbab1ff7098f986983458',
'info_dict': {
'id': 'BBqQYNE',
'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message',
'ext': 'mp4',
'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message',
'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25',
'duration': 104,
'uploader': 'CBS Entertainment',
'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v',
},
}, {
'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
'only_matching': True,
}, {
'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
'only_matching': True,
}, {
# geo restricted
'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
'only_matching': True,
}, {
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-raped-woman-comment/vi-AAhvzW6',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
video = self._parse_json(
self._search_regex(
r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
webpage, 'video data', default='{}', group='data'),
display_id, transform_source=unescapeHTML)
if not video:
error = unescapeHTML(self._search_regex(
r'data-error=(["\'])(?P<error>.+?)\1',
webpage, 'error', group='error'))
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
title = video['title']
formats = []
for file_ in video.get('videoFiles', []):
format_url = file_.get('url')
if not format_url:
continue
ext = determine_ext(format_url)
# .ism is not yet supported (see
# https://github.com/rg3/youtube-dl/issues/8118)
if ext == 'ism':
continue
if 'm3u8' in format_url:
# m3u8_native should not be used here until
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
m3u8_formats = self._extract_m3u8_formats(
format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False)
# Despite metadata in m3u8 all video+audio formats are
# actually video-only (no audio)
for f in m3u8_formats:
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
f['acodec'] = 'none'
formats.extend(m3u8_formats)
else:
formats.append({
'url': format_url,
'ext': 'mp4',
'format_id': 'http',
'width': int_or_none(file_.get('width')),
'height': int_or_none(file_.get('height')),
})
self._sort_formats(formats)
subtitles = {}
for file_ in video.get('files', []):
format_url = file_.get('url')
format_code = file_.get('formatCode')
if not format_url or not format_code:
continue
if compat_str(format_code) == '3100':
subtitles.setdefault(file_.get('culture', 'en'), []).append({
'ext': determine_ext(format_url, 'ttml'),
'url': format_url,
})
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': video.get('description'),
'thumbnail': video.get('headlineImage', {}).get('url'),
'duration': int_or_none(video.get('durationSecs')),
'uploader': video.get('sourceFriendly'),
'uploader_id': video.get('providerId'),
'creator': video.get('creator'),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -1,6 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
smuggle_url, smuggle_url,
url_basename, url_basename,
@ -61,7 +62,7 @@ class NationalGeographicIE(InfoExtractor):
} }
class NationalGeographicChannelIE(InfoExtractor): class NationalGeographicChannelIE(ThePlatformIE):
IE_NAME = 'natgeo:channel' IE_NAME = 'natgeo:channel'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)' _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
@ -102,12 +103,22 @@ class NationalGeographicChannelIE(InfoExtractor):
release_url = self._search_regex( release_url = self._search_regex(
r'video_auth_playlist_url\s*=\s*"([^"]+)"', r'video_auth_playlist_url\s*=\s*"([^"]+)"',
webpage, 'release url') webpage, 'release url')
query = {
'mbr': 'true',
'switch': 'http',
}
is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
if is_auth == 'auth':
auth_resource_id = self._search_regex(
r"video_auth_resourceId\s*=\s*'([^']+)'",
webpage, 'auth resource id')
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or ''
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'ThePlatform', 'ie_key': 'ThePlatform',
'url': smuggle_url( 'url': smuggle_url(
update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}), update_url_query(release_url, query),
{'force_smil_url': True}), {'force_smil_url': True}),
'display_id': display_id, 'display_id': display_id,
} }

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
parse_duration,
ExtractorError
)
class NineCNineMediaIE(InfoExtractor):
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
def _real_extract(self, url):
destination_code, video_id = re.match(self._VALID_URL, url).groups()
api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
content = self._download_json(api_base_url, video_id, query={
'$include': '[contentpackages]',
})
title = content['Name']
if len(content['ContentPackages']) > 1:
raise ExtractorError('multiple content packages')
content_package = content['ContentPackages'][0]
stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
stacks = self._download_json(stacks_base_url, video_id)['Items']
if len(stacks) > 1:
raise ExtractorError('multiple stacks')
stack = stacks[0]
stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
formats = []
formats.extend(self._extract_m3u8_formats(
stack_base_url + 'm3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
stack_base_url + 'f4m', video_id,
f4m_id='hds', fatal=False))
mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
if mp4_url:
formats.append({
'url': mp4_url,
'format_id': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': content.get('Desc') or content.get('ShortDesc'),
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
'duration': parse_duration(content.get('BroadcastTime')),
'formats': formats,
}

View File

@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
float_or_none,
) )
@ -15,15 +16,14 @@ class OnionStudiosIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': 'd4851405d31adfadf71cd7a487b765bb', 'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
'info_dict': { 'info_dict': {
'id': '2937', 'id': '2937',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail', 'title': 'Hannibal charges forward, stops for a cocktail',
'description': 'md5:e786add7f280b7f0fe237b64cc73df76',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'The A.V. Club', 'uploader': 'The A.V. Club',
'uploader_id': 'TheAVClub', 'uploader_id': 'the-av-club',
}, },
}, { }, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true', 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
@ -40,50 +40,39 @@ class OnionStudiosIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( video_data = self._download_json(
'http://www.onionstudios.com/embed?id=%s' % video_id, video_id) 'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
title = video_data['title']
formats = [] formats = []
for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage): for source in video_data.get('sources', []):
ext = determine_ext(src) source_url = source.get('url')
if ext == 'm3u8': if not source_url:
continue
content_type = source.get('content_type')
ext = determine_ext(source_url)
if content_type == 'application/x-mpegURL' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else: else:
height = int_or_none(self._search_regex( tbr = int_or_none(source.get('bitrate'))
r'/(\d+)\.%s' % ext, src, 'height', default=None))
formats.append({ formats.append({
'format_id': ext + ('-%sp' % height if height else ''), 'format_id': ext + ('-%d' % tbr if tbr else ''),
'url': src, 'url': source_url,
'height': height, 'width': int_or_none(source.get('width')),
'tbr': tbr,
'ext': ext, 'ext': ext,
'preference': 1,
}) })
self._sort_formats(formats) self._sort_formats(formats)
title = self._search_regex(
r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
webpage, 'title', group='title')
description = self._search_regex(
r'share_description\s*=\s*(["\'])(?P<description>[^\'"]+?)\1',
webpage, 'description', default=None, group='description')
thumbnail = self._search_regex(
r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
webpage, 'thumbnail', default=False, group='thumbnail')
uploader_id = self._search_regex(
r'twitter_handle\s*=\s*(["\'])(?P<uploader_id>[^\1]+?)\1',
webpage, 'uploader id', fatal=False, group='uploader_id')
uploader = self._search_regex(
r'window\.channelName\s*=\s*(["\'])Embedded:(?P<uploader>[^\1]+?)\1',
webpage, 'uploader', default=False, group='uploader')
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'thumbnail': video_data.get('poster_url'),
'thumbnail': thumbnail, 'uploader': video_data.get('channel_name'),
'uploader': uploader, 'uploader_id': video_data.get('channel_slug'),
'uploader_id': uploader_id, 'duration': float_or_none(video_data.get('duration', 1000)),
'tags': video_data.get('tags'),
'formats': formats, 'formats': formats,
} }

View File

@ -516,9 +516,14 @@ class PBSIE(InfoExtractor):
# https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'): if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'):
continue continue
f_url = re.sub(r'\d+k|baseline', bitrate, http_url)
# This may produce invalid links sometimes (e.g.
# http://www.pbs.org/wgbh/frontline/film/suicide-plan)
if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate):
continue
f = m3u8_format.copy() f = m3u8_format.copy()
f.update({ f.update({
'url': re.sub(r'\d+k|baseline', bitrate, http_url), 'url': f_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http', 'protocol': 'http',
}) })

View File

@ -120,9 +120,12 @@ class PeriscopeUserIE(InfoExtractor):
title = user.get('display_name') or user.get('username') title = user.get('display_name') or user.get('username')
description = user.get('description') description = user.get('description')
broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or
data_store.get('BroadcastCache', {}).get('broadcastIds', []))
entries = [ entries = [
self.url_result( self.url_result(
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) 'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id))
for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])] for broadcast_id in broadcast_ids]
return self.playlist_result(entries, user_id, title, description) return self.playlist_result(entries, user_id, title, description)

View File

@ -49,7 +49,7 @@ class PladformIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage) r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')

View File

@ -0,0 +1,95 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
int_or_none,
strip_or_none,
unified_timestamp,
)
class PolskieRadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie',
'info_dict': {
'id': '1587943',
'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
},
'playlist': [{
'md5': '2984ee6ce9046d91fc233bc1a864a09a',
'info_dict': {
'id': '1540576',
'ext': 'mp3',
'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
'timestamp': 1456594200,
'upload_date': '20160227',
'duration': 2364,
},
}],
}, {
'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal',
'info_dict': {
'id': '1635803',
'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał',
'description': 'md5:01cb7d0cad58664095d72b51a1ebada2',
},
'playlist_mincount': 12,
}, {
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
'only_matching': True,
}, {
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943',
'only_matching': True,
}, {
# with mp4 video
'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
content = self._search_regex(
r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
webpage, 'content')
timestamp = unified_timestamp(self._html_search_regex(
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
webpage, 'timestamp', fatal=False))
entries = []
media_urls = set()
for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content):
media = self._parse_json(data_media, playlist_id, fatal=False)
if not media.get('file') or not media.get('desc'):
continue
media_url = self._proto_relative_url(media['file'], 'http:')
if media_url in media_urls:
continue
media_urls.add(media_url)
entries.append({
'id': compat_str(media['id']),
'url': media_url,
'title': compat_urllib_parse_unquote(media['desc']),
'duration': int_or_none(media.get('length')),
'vcodec': 'none' if media.get('provider') == 'audio' else None,
'timestamp': timestamp,
})
title = self._og_search_title(webpage).strip()
description = strip_or_none(self._og_search_description(webpage))
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -25,7 +25,15 @@ from ..aes import (
class PornHubIE(InfoExtractor): class PornHubIE(InfoExtractor):
_VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)' IE_DESC = 'PornHub and Thumbzilla'
_VALID_URL = r'''(?x)
https?://
(?:
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[0-9a-z]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': '1e19b41231a02eba417839222ac9d58e', 'md5': '1e19b41231a02eba417839222ac9d58e',
@ -63,8 +71,24 @@ class PornHubIE(InfoExtractor):
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True, 'only_matching': True,
}, { }, {
# removed at the request of cam4.com
'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
'only_matching': True, 'only_matching': True,
}, {
# removed at the request of the copyright owner
'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
'only_matching': True,
}, {
# removed by uploader
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
'only_matching': True,
}, {
# private video
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
'only_matching': True,
}, {
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
'only_matching': True,
}] }]
@classmethod @classmethod
@ -87,8 +111,8 @@ class PornHubIE(InfoExtractor):
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
error_msg = self._html_search_regex( error_msg = self._html_search_regex(
r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>', r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>',
webpage, 'error message', default=None) webpage, 'error message', default=None, group='error')
if error_msg: if error_msg:
error_msg = re.sub(r'\s+', ' ', error_msg) error_msg = re.sub(r'\s+', ' ', error_msg)
raise ExtractorError( raise ExtractorError(

View File

@ -5,7 +5,7 @@ import re
from hashlib import sha1 from hashlib import sha1
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
@ -71,6 +71,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'This video is unavailable',
}, },
{ {
'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
@ -86,6 +87,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'This video is unavailable',
}, },
{ {
'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
@ -101,6 +103,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'This video is unavailable',
}, },
{ {
'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
@ -116,6 +119,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'This video is unavailable',
}, },
{ {
'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
@ -131,6 +135,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'This video is unavailable',
}, },
{ {
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
@ -227,70 +232,42 @@ class ProSiebenSat1IE(InfoExtractor):
] ]
def _extract_clip(self, url, webpage): def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') clip_id = self._html_search_regex(
self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'prosieben' access_token = 'prosieben'
client_name = 'kolibri-2.0.19-splec4' client_name = 'kolibri-2.0.19-splec4'
client_location = url client_location = url
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({ video = self._download_json(
'access_token': access_token, 'http://vas.sim-technik.de/vas/live/v2/videos',
'client_location': client_location, clip_id, 'Downloading videos JSON', query={
'client_name': client_name, 'access_token': access_token,
'ids': clip_id, 'client_location': client_location,
}) 'client_name': client_name,
'ids': clip_id,
video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0] })[0]
if video.get('is_protected') is True: if video.get('is_protected') is True:
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
duration = float_or_none(video.get('duration')) duration = float_or_none(video.get('duration'))
source_ids = [source['id'] for source in video['sources']] source_ids = [compat_str(source['id']) for source in video['sources']]
source_ids_str = ','.join(map(str, source_ids))
g = '01!8d8F_)r9]4s[qeuXfP%' g = '01!8d8F_)r9]4s[qeuXfP%'
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest()
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]) sources = self._download_json(
.encode('utf-8')).hexdigest() 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
clip_id, 'Downloading sources JSON', query={
sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({ 'access_token': access_token,
'access_token': access_token, 'client_id': client_id,
'client_id': client_id, 'client_location': client_location,
'client_location': client_location, 'client_name': client_name,
'client_name': client_name, })
}))
sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON')
server_id = sources['server_id'] server_id = sources['server_id']
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id,
client_location, source_ids_str, g, client_name])
.encode('utf-8')).hexdigest()
url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
'server_id': server_id,
'source_ids': source_ids_str,
}))
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
formats = []
urls_sources = urls['sources']
if isinstance(urls_sources, dict):
urls_sources = urls_sources.values()
def fix_bitrate(bitrate): def fix_bitrate(bitrate):
bitrate = int_or_none(bitrate) bitrate = int_or_none(bitrate)
@ -298,37 +275,73 @@ class ProSiebenSat1IE(InfoExtractor):
return None return None
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source in urls_sources: formats = []
protocol = source['protocol'] for source_id in source_ids:
source_url = source['url'] client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest()
if protocol == 'rtmp' or protocol == 'rtmpe': urls = self._download_json(
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
if not mobj: clip_id, 'Downloading urls JSON', fatal=False, query={
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
'server_id': server_id,
'source_ids': source_id,
})
if not urls:
continue
if urls.get('status_code') != 0:
raise ExtractorError('This video is unavailable', expected=True)
urls_sources = urls['sources']
if isinstance(urls_sources, dict):
urls_sources = urls_sources.values()
for source in urls_sources:
source_url = source.get('url')
if not source_url:
continue continue
path = mobj.group('path') protocol = source.get('protocol')
mp4colon_index = path.rfind('mp4:') mimetype = source.get('mimetype')
app = path[:mp4colon_index] if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
play_path = path[mp4colon_index:] formats.extend(self._extract_f4m_formats(
formats.append({ source_url, clip_id, f4m_id='hds', fatal=False))
'url': '%s/%s' % (mobj.group('url'), app), elif mimetype == 'application/x-mpegURL':
'app': app, formats.extend(self._extract_m3u8_formats(
'play_path': play_path, source_url, clip_id, 'mp4', 'm3u8_native',
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', m3u8_id='hls', fatal=False))
'page_url': 'http://www.prosieben.de', else:
'vbr': fix_bitrate(source['bitrate']), tbr = fix_bitrate(source['bitrate'])
'ext': 'mp4', if protocol in ('rtmp', 'rtmpe'):
'format_id': '%s_%s' % (source['cdn'], source['bitrate']), mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
}) if not mobj:
elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': continue
formats.extend(self._extract_f4m_formats(source_url, clip_id)) path = mobj.group('path')
else: mp4colon_index = path.rfind('mp4:')
formats.append({ app = path[:mp4colon_index]
'url': source_url, play_path = path[mp4colon_index:]
'vbr': fix_bitrate(source['bitrate']), formats.append({
}) 'url': '%s/%s' % (mobj.group('url'), app),
'app': app,
'play_path': play_path,
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
'page_url': 'http://www.prosieben.de',
'tbr': tbr,
'ext': 'flv',
'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
})
else:
formats.append({
'url': source_url,
'tbr': tbr,
'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
})
self._sort_formats(formats) self._sort_formats(formats)
description = self._html_search_regex(
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
return { return {
'id': clip_id, 'id': clip_id,
'title': title, 'title': title,

View File

@ -1,47 +1,141 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urlparse
compat_urllib_parse,
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError,
determine_ext, determine_ext,
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
int_or_none,
parse_duration, parse_duration,
unified_strdate, unified_strdate,
int_or_none, update_url_query,
xpath_text, xpath_text,
) )
class RaiTVIE(InfoExtractor): class RaiBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' def _extract_relinker_formats(self, relinker_url, video_id):
formats = []
for platform in ('mon', 'flash', 'native'):
relinker = self._download_xml(
relinker_url, video_id,
note='Downloading XML metadata for platform %s' % platform,
transform_source=fix_xml_ampersands,
query={'output': 45, 'pl': platform},
headers=self.geo_verification_headers())
media_url = find_xpath_attr(relinker, './url', 'type', 'content').text
if media_url == 'http://download.rai.it/video_no_available.mp4':
self.raise_geo_restricted()
ext = determine_ext(media_url)
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
continue
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
manifest_url = update_url_query(
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
formats.extend(self._extract_f4m_formats(
manifest_url, video_id, f4m_id='hds', fatal=False))
else:
bitrate = int_or_none(xpath_text(relinker, 'bitrate'))
formats.append({
'url': media_url,
'tbr': bitrate if bitrate > 0 else None,
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
})
return formats
def _extract_from_content_id(self, content_id, base_url):
media = self._download_json(
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
content_id, 'Downloading video JSON')
thumbnails = []
for image_type in ('image', 'image_medium', 'image_300'):
thumbnail_url = media.get(image_type)
if thumbnail_url:
thumbnails.append({
'url': compat_urlparse.urljoin(base_url, thumbnail_url),
})
formats = []
media_type = media['type']
if 'Audio' in media_type:
formats.append({
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
})
elif 'Video' in media_type:
formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id))
self._sort_formats(formats)
else:
raise ExtractorError('not a media file')
subtitles = {}
captions = media.get('subtitlesUrl')
if captions:
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = [{
'ext': 'srt',
'url': captions,
}]
return {
'id': content_id,
'title': media['name'],
'description': media.get('desc'),
'thumbnails': thumbnails,
'uploader': media.get('author'),
'upload_date': unified_strdate(media.get('date')),
'duration': parse_duration(media.get('length')),
'formats': formats,
'subtitles': subtitles,
}
class RaiTVIE(RaiBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '96382709b61dd64a6b88e0f791e6df4c', 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': { 'info_dict': {
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'flv', 'ext': 'mp4',
'title': 'Report del 07/04/2014', 'title': 'Report del 07/04/2014',
'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
'upload_date': '20140407', 'upload_date': '20140407',
'duration': 6160, 'duration': 6160,
'thumbnail': 're:^https?://.*\.jpg$',
} }
}, },
{ {
# no m3u8 stream
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
'md5': 'd9751b78eac9710d62c2447b224dea39', # HDS download, MD5 is unstable
'info_dict': { 'info_dict': {
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
'ext': 'flv', 'ext': 'flv',
'title': 'TG PRIMO TEMPO', 'title': 'TG PRIMO TEMPO',
'upload_date': '20140612', 'upload_date': '20140612',
'duration': 1758, 'duration': 1758,
'thumbnail': 're:^https?://.*\.jpg$',
}, },
'skip': 'Geo-restricted to Italy',
}, },
{ {
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', 'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
@ -67,127 +161,70 @@ class RaiTVIE(InfoExtractor):
}, },
{ {
'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
'md5': '496ab63e420574447f70d02578333437', 'md5': 'e57493e1cb8bc7c564663f363b171847',
'info_dict': { 'info_dict': {
'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
'ext': 'flv', 'ext': 'mp4',
'title': 'Il Candidato - Primo episodio: "Le Primarie"', 'title': 'Il Candidato - Primo episodio: "Le Primarie"',
'description': 'md5:364b604f7db50594678f483353164fb8', 'description': 'md5:364b604f7db50594678f483353164fb8',
'upload_date': '20140923', 'upload_date': '20140923',
'duration': 386, 'duration': 386,
'thumbnail': 're:^https?://.*\.jpg$',
} }
}, },
] ]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
media = self._download_json(
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % video_id,
video_id, 'Downloading video JSON')
thumbnails = [] return self._extract_from_content_id(video_id, url)
for image_type in ('image', 'image_medium', 'image_300'):
thumbnail_url = media.get(image_type)
if thumbnail_url:
thumbnails.append({
'url': thumbnail_url,
})
subtitles = []
formats = []
media_type = media['type']
if 'Audio' in media_type:
formats.append({
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
})
elif 'Video' in media_type:
def fix_xml(xml):
return xml.replace(' tag elementi', '').replace('>/', '</')
relinker = self._download_xml(
media['mediaUri'] + '&output=43',
video_id, transform_source=fix_xml)
has_subtitle = False
for element in relinker.findall('element'):
media_url = xpath_text(element, 'url')
ext = determine_ext(media_url)
content_type = xpath_text(element, 'content-type')
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
media_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
video_id, f4m_id='hds', fatal=False))
elif ext == 'stl':
has_subtitle = True
elif content_type.startswith('video/'):
bitrate = int_or_none(xpath_text(element, 'bitrate'))
formats.append({
'url': media_url,
'tbr': bitrate if bitrate > 0 else None,
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
})
elif content_type.startswith('image/'):
thumbnails.append({
'url': media_url,
})
self._sort_formats(formats)
if has_subtitle:
webpage = self._download_webpage(url, video_id)
subtitles = self._get_subtitles(video_id, webpage)
else:
raise ExtractorError('not a media file')
return {
'id': video_id,
'title': media['name'],
'description': media.get('desc'),
'thumbnails': thumbnails,
'uploader': media.get('author'),
'upload_date': unified_strdate(media.get('date')),
'duration': parse_duration(media.get('length')),
'formats': formats,
'subtitles': subtitles,
}
def _get_subtitles(self, video_id, webpage):
subtitles = {}
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
if m:
captions = m.group('captions')
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = [{
'ext': 'srt',
'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
}]
return subtitles
class RaiIE(InfoExtractor): class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': 'e0e7a8a131e249d1aa0ebf270d1d8db7', 'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': { 'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', 'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'flv', 'ext': 'mp4',
'title': 'Il pacco', 'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'upload_date': '20141221', 'upload_date': '20141221',
}, },
} },
{
# Direct relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
# HDS live stream, MD5 is unstable
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'skip': 'Geo-restricted to Italy',
},
{
# Embedded content item ID
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
'md5': '84c1135ce960e8822ae63cec34441d63',
'info_dict': {
'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8',
'ext': 'mp4',
'title': 'TG1 ore 20:00 del 02/07/2016',
'upload_date': '20160702',
},
},
{
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
# HDS live stream, MD5 is unstable
'info_dict': {
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
'ext': 'flv',
'title': 'La diretta di Rainews24',
},
},
] ]
@classmethod @classmethod
@ -201,7 +238,30 @@ class RaiIE(InfoExtractor):
iframe_url = self._search_regex( iframe_url = self._search_regex(
[r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"', [r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
r'drawMediaRaiTV\(["\'](.+?)["\']'], r'drawMediaRaiTV\(["\'](.+?)["\']'],
webpage, 'iframe') webpage, 'iframe', default=None)
if not iframe_url.startswith('http'): if iframe_url:
iframe_url = compat_urlparse.urljoin(url, iframe_url) if not iframe_url.startswith('http'):
return self.url_result(iframe_url) iframe_url = compat_urlparse.urljoin(url, iframe_url)
return self.url_result(iframe_url)
content_item_id = self._search_regex(
r'initEdizione\((?P<q1>[\'"])ContentItem-(?P<content_id>[^\'"]+)(?P=q1)',
webpage, 'content item ID', group='content_id', default=None)
if content_item_id:
return self._extract_from_content_id(content_item_id, url)
relinker_url = compat_urlparse.urljoin(url, self._search_regex(
r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P<q1>[\'"])(?P<url>(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)',
webpage, 'relinker URL', group='url'))
formats = self._extract_relinker_formats(relinker_url, video_id)
self._sort_formats(formats)
title = self._search_regex(
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
webpage, 'title', group='title', default=None) or self._og_search_title(webpage)
return {
'id': video_id,
'title': title,
'formats': formats,
}

View File

@ -1,23 +1,23 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
js_to_json,
) )
from ..compat import compat_str
class RDSIE(InfoExtractor): class RDSIE(InfoExtractor):
IE_DESC = 'RDS.ca' IE_DESC = 'RDS.ca'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)' _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799', 'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
'info_dict': { 'info_dict': {
'id': '3.1132799', 'id': '604333',
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville', 'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Fowler Jr. prend la direction de Jacksonville', 'title': 'Fowler Jr. prend la direction de Jacksonville',
@ -33,22 +33,17 @@ class RDSIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
# TODO: extract f4m from 9c9media.com item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
video_url = self._search_regex( video_id = compat_str(item['id'])
r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"', title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
webpage, 'video url')
title = self._og_search_title(webpage) or self._html_search_meta(
'title', webpage, 'title', fatal=True) 'title', webpage, 'title', fatal=True)
description = self._og_search_description(webpage) or self._html_search_meta( description = self._og_search_description(webpage) or self._html_search_meta(
'description', webpage, 'description') 'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage) or self._search_regex( thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
[r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
@ -61,13 +56,15 @@ class RDSIE(InfoExtractor):
age_limit = self._family_friendly_search(webpage) age_limit = self._family_friendly_search(webpage)
return { return {
'_type': 'url_transparent',
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url, 'url': '9c9media:rds_web:%s' % video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'timestamp': timestamp, 'timestamp': timestamp,
'duration': duration, 'duration': duration,
'age_limit': age_limit, 'age_limit': age_limit,
'ie_key': 'NineCNineMedia',
} }

View File

@ -9,7 +9,7 @@ class RTVNHIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.rtvnh.nl/video/131946', 'url': 'http://www.rtvnh.nl/video/131946',
'md5': '6e1d0ab079e2a00b6161442d3ceacfc1', 'md5': 'cdbec9f44550763c8afc96050fa747dc',
'info_dict': { 'info_dict': {
'id': '131946', 'id': '131946',
'ext': 'mp4', 'ext': 'mp4',
@ -29,15 +29,29 @@ class RTVNHIE(InfoExtractor):
raise ExtractorError( raise ExtractorError(
'%s returned error code %d' % (self.IE_NAME, status), expected=True) '%s returned error code %d' % (self.IE_NAME, status), expected=True)
formats = self._extract_smil_formats( formats = []
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id, fatal=False) rtmp_formats = self._extract_smil_formats(
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
formats.extend(rtmp_formats)
for item in meta['source']['fb']: for rtmp_format in rtmp_formats:
if item.get('type') == 'hls': rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
formats.extend(self._extract_m3u8_formats( rtsp_format = rtmp_format.copy()
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) del rtsp_format['play_path']
elif item.get('type') == '': del rtsp_format['ext']
formats.append({'url': item['file']}) rtsp_format.update({
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'url': rtmp_url.replace('rtmp://', 'rtsp://'),
'protocol': 'rtsp',
})
formats.append(rtsp_format)
http_base_url = rtmp_url.replace('rtmp://', 'http://')
formats.extend(self._extract_m3u8_formats(
http_base_url + '/playlist.m3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -1,18 +1,12 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
js_to_json,
mimetype2ext, mimetype2ext,
sanitized_Request,
unified_strdate,
) )
@ -27,7 +21,8 @@ class SandiaIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Xyce Software Training - Section 1', 'title': 'Xyce Software Training - Section 1',
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}', 'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
'upload_date': '20120904', 'upload_date': '20120409',
'timestamp': 1333983600,
'duration': 7794, 'duration': 7794,
} }
} }
@ -35,81 +30,36 @@ class SandiaIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
req = sanitized_Request(url) presentation_data = self._download_json(
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4') 'http://digitalops.sandia.gov/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
webpage = self._download_webpage(req, video_id) video_id, data=json.dumps({
'getPlayerOptionsRequest': {
'ResourceId': video_id,
'QueryString': '',
}
}), headers={
'Content-Type': 'application/json; charset=utf-8',
})['d']['Presentation']
js_path = self._search_regex( title = presentation_data['Title']
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
webpage, 'JS code URL')
js_url = compat_urlparse.urljoin(url, js_path)
js_code = self._download_webpage(
js_url, video_id, note='Downloading player')
def extract_str(key, **args):
return self._search_regex(
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
js_code, key, **args)
def extract_data(key, **args):
data_json = extract_str(key, **args)
if data_json is None:
return data_json
return self._parse_json(
data_json, video_id, transform_source=js_to_json)
formats = [] formats = []
for i in itertools.count(): for stream in presentation_data.get('Streams', []):
fd = extract_data('VideoUrls[%d]' % i, default=None) for fd in stream.get('VideoUrls', []):
if fd is None: formats.append({
break 'format_id': fd['MediaType'],
formats.append({ 'format_note': fd['MimeType'].partition('/')[2],
'format_id': '%s' % i, 'ext': mimetype2ext(fd['MimeType']),
'format_note': fd['MimeType'].partition('/')[2], 'url': fd['Location'],
'ext': mimetype2ext(fd['MimeType']), 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
'url': fd['Location'], })
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
})
self._sort_formats(formats) self._sort_formats(formats)
slide_baseurl = compat_urlparse.urljoin(
url, extract_data('SlideBaseUrl'))
slide_template = slide_baseurl + re.sub(
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
slides = []
last_slide_time = 0
for i in itertools.count(1):
sd = extract_str('Slides[%d]' % i, default=None)
if sd is None:
break
timestamp = int_or_none(self._search_regex(
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
sd, 'slide %s timestamp' % i, fatal=False))
slides.append({
'url': slide_template % i,
'duration': timestamp - last_slide_time,
})
last_slide_time = timestamp
formats.append({
'format_id': 'slides',
'protocol': 'slideshow',
'url': json.dumps(slides),
'preference': -10000, # Downloader not yet written
})
self._sort_formats(formats)
title = extract_data('Title')
description = extract_data('Description', fatal=False)
duration = int_or_none(extract_data(
'Duration', fatal=False), scale=1000)
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': presentation_data.get('Description'),
'formats': formats, 'formats': formats,
'upload_date': upload_date, 'timestamp': int_or_none(presentation_data.get('UnixTime'), 1000),
'duration': duration, 'duration': int_or_none(presentation_data.get('Duration'), 1000),
} }

View File

@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
qualities,
int_or_none,
)
class SixPlayIE(InfoExtractor):
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
'md5': '42310bffe4ba3982db112b9cd3467328',
'info_dict': {
'id': '11495320',
'ext': 'mp4',
'title': 'Jamel et ses amis au Marrakech du rire 2015',
'description': 'md5:ba2149d5c321d5201b78070ee839d872',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
clip_data = self._download_json(
'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
video_id)
video_data = clip_data['videoInfo']
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
for source in clip_data['sources']:
source_type, source_url = source.get('type'), source.get('src')
if not source_url or source_type == 'hls/primetime':
continue
if source_type == 'application/vnd.apple.mpegURL':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
source_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
elif source_type == 'video/mp4':
quality = source.get('quality')
formats.append({
'url': source_url,
'format_id': quality,
'quality': quality_key(quality),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['title'].strip(),
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'series': video_data.get('titlePgm'),
'formats': formats,
}

View File

@ -67,7 +67,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE): class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
IE_NAME = 'skynewsarabia:video' IE_NAME = 'skynewsarabia:article'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9', 'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9',

View File

@ -0,0 +1,33 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class SkySportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
'md5': 'c44a1db29f27daf9a0003e010af82100',
'info_dict': {
'id': '10328419',
'ext': 'flv',
'title': 'Bale: Its our time to shine',
'description': 'md5:9fd1de3614d525f5addda32ac3c482c9',
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'ooyala:%s' % self._search_regex(
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'ie_key': 'Ooyala',
}

View File

@ -9,6 +9,7 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
get_element_by_id,
) )
@ -40,7 +41,7 @@ class SlideshareIE(InfoExtractor):
bucket = info['jsplayer']['video_bucket'] bucket = info['jsplayer']['video_bucket']
ext = info['jsplayer']['video_extension'] ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex( description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex(
r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage, r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
'description', fatal=False) 'description', fatal=False)
@ -51,5 +52,5 @@ class SlideshareIE(InfoExtractor):
'ext': ext, 'ext': ext,
'url': video_url, 'url': video_url,
'thumbnail': info['slideshow']['pin_image_url'], 'thumbnail': info['slideshow']['pin_image_url'],
'description': description, 'description': description.strip() if description else None,
} }

View File

@ -8,10 +8,7 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
) )
from ..utils import ( from ..utils import ExtractorError
ExtractorError,
sanitized_Request,
)
class SohuIE(InfoExtractor): class SohuIE(InfoExtractor):
@ -96,15 +93,10 @@ class SohuIE(InfoExtractor):
else: else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
req = sanitized_Request(base_data_url + vid_id)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
if cn_verification_proxy:
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
return self._download_json( return self._download_json(
req, video_id, base_data_url + vid_id, video_id,
'Downloading JSON data for %s' % vid_id) 'Downloading JSON data for %s' % vid_id,
headers=self.geo_verification_headers())
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')

View File

@ -4,8 +4,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from .spiegeltv import SpiegeltvIE from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse
from ..utils import (
extract_attributes,
unified_strdate,
get_element_by_attribute,
)
class SpiegelIE(InfoExtractor): class SpiegelIE(InfoExtractor):
@ -19,6 +24,7 @@ class SpiegelIE(InfoExtractor):
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', 'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
'description': 'md5:8029d8310232196eb235d27575a8b9f4', 'description': 'md5:8029d8310232196eb235d27575a8b9f4',
'duration': 49, 'duration': 49,
'upload_date': '20130311',
}, },
}, { }, {
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
@ -29,6 +35,7 @@ class SpiegelIE(InfoExtractor):
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', 'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
'description': 'md5:c2322b65e58f385a820c10fa03b2d088', 'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983, 'duration': 983,
'upload_date': '20131115',
}, },
}, { }, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html', 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
@ -38,6 +45,7 @@ class SpiegelIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.', 'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"', 'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
'upload_date': '20140904',
} }
}, { }, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html', 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
@ -52,10 +60,10 @@ class SpiegelIE(InfoExtractor):
if SpiegeltvIE.suitable(handle.geturl()): if SpiegeltvIE.suitable(handle.geturl()):
return self.url_result(handle.geturl(), 'Spiegeltv') return self.url_result(handle.geturl(), 'Spiegeltv')
title = re.sub(r'\s+', ' ', self._html_search_regex( video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default=''))
r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
webpage, 'title')) title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage)
description = self._html_search_meta('description', webpage, 'description') description = video_data.get('data-video-teaser') or self._html_search_meta('description', webpage, 'description')
base_url = self._search_regex( base_url = self._search_regex(
[r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'], [r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
@ -87,8 +95,9 @@ class SpiegelIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description.strip() if description else None,
'duration': duration, 'duration': duration,
'upload_date': unified_strdate(video_data.get('data-video-date')),
'formats': formats, 'formats': formats,
} }

View File

@ -9,6 +9,7 @@ from ..utils import (
class SRMediathekIE(ARDMediathekIE): class SRMediathekIE(ARDMediathekIE):
IE_NAME = 'sr:mediathek'
IE_DESC = 'Saarländischer Rundfunk' IE_DESC = 'Saarländischer Rundfunk'
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'

View File

@ -56,7 +56,7 @@ class StitcherIE(InfoExtractor):
episode = self._parse_json( episode = self._parse_json(
js_to_json(self._search_regex( js_to_json(self._search_regex(
r'(?s)var\s+stitcher\s*=\s*({.+?});\n', webpage, 'episode config')), r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
display_id)['config']['episode'] display_id)['config']['episode']
title = unescapeHTML(episode['title']) title = unescapeHTML(episode['title'])

View File

@ -120,7 +120,7 @@ class SVTIE(SVTBaseIE):
class SVTPlayIE(SVTBaseIE): class SVTPlayIE(SVTBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv' IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
@ -141,6 +141,9 @@ class SVTPlayIE(SVTBaseIE):
# geo restricted to Sweden # geo restricted to Sweden
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -6,6 +6,7 @@ import time
import hmac import hmac
import binascii import binascii
import hashlib import hashlib
import netrc
from .once import OnceIE from .once import OnceIE
@ -24,6 +25,9 @@ from ..utils import (
xpath_with_ns, xpath_with_ns,
mimetype2ext, mimetype2ext,
find_xpath_attr, find_xpath_attr,
unescapeHTML,
urlencode_postdata,
unified_timestamp,
) )
default_ns = 'http://www.w3.org/2005/SMIL21/Language' default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@ -62,10 +66,11 @@ class ThePlatformBaseIE(OnceIE):
return formats, subtitles return formats, subtitles
def get_metadata(self, path, video_id): def _download_theplatform_metadata(self, path, video_id):
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
info = self._download_json(info_url, video_id) return self._download_json(info_url, video_id)
def _parse_theplatform_metadata(self, info):
subtitles = {} subtitles = {}
captions = info.get('captions') captions = info.get('captions')
if isinstance(captions, list): if isinstance(captions, list):
@ -86,6 +91,10 @@ class ThePlatformBaseIE(OnceIE):
'uploader': info.get('billingCode'), 'uploader': info.get('billingCode'),
} }
def _extract_theplatform_metadata(self, path, video_id):
info = self._download_theplatform_metadata(path, video_id)
return self._parse_theplatform_metadata(info)
class ThePlatformIE(ThePlatformBaseIE): class ThePlatformIE(ThePlatformBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -158,6 +167,7 @@ class ThePlatformIE(ThePlatformBaseIE):
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781', 'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True, 'only_matching': True,
}] }]
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
@classmethod @classmethod
def _extract_urls(cls, webpage): def _extract_urls(cls, webpage):
@ -192,6 +202,96 @@ class ThePlatformIE(ThePlatformBaseIE):
sig = flags + expiration_date + checksum + str_to_hex(sig_secret) sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig) return '%s&sig=%s' % (url, sig)
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
def xml_text(xml_str, tag):
return self._search_regex(
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
mvpd_headers = {
'ap_42': 'anonymous',
'ap_11': 'Linux i686',
'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
}
guid = xml_text(resource, 'guid')
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token:
token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', ''))
if token_expires and token_expires >= time.time():
authn_token = None
if not authn_token:
# TODO add support for other TV Providers
mso_id = 'DTV'
login_info = netrc.netrc().authenticators(mso_id)
if not login_info:
return None
def post_form(form_page, note, data={}):
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
return self._download_webpage(
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
provider_redirect_page = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
'Downloading Provider Redirect Page', query={
'noflash': 'true',
'mso_id': mso_id,
'requestor_id': requestor_id,
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
provider_login_page = post_form(
provider_redirect_page, 'Downloading Provider Login Page')
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
'username': login_info[0],
'password': login_info[2],
})
post_form(mvpd_confirm_page, 'Confirming Login')
session = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
'Retrieving Session', data=urlencode_postdata({
'_method': 'GET',
'requestor_id': requestor_id,
}), headers=mvpd_headers)
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
if not authz_token:
authorize = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
'Retrieving Authorization Token', data=urlencode_postdata({
'resource_id': resource,
'requestor_id': requestor_id,
'authentication_token': authn_token,
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
'userMeta': '1',
}), headers=mvpd_headers)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
mvpd_headers.update({
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
})
return self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
video_id, 'Retrieving Media Token', data=urlencode_postdata({
'authz_token': authz_token,
'requestor_id': requestor_id,
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
'hashed_guid': 'false',
}), headers=mvpd_headers)
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
@ -265,7 +365,7 @@ class ThePlatformIE(ThePlatformBaseIE):
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id) formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
self._sort_formats(formats) self._sort_formats(formats)
ret = self.get_metadata(path, video_id) ret = self._extract_theplatform_metadata(path, video_id)
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles) combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
ret.update({ ret.update({
'id': video_id, 'id': video_id,
@ -339,7 +439,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
timestamp = int_or_none(entry.get('media$availableDate'), scale=1000) timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
categories = [item['media$name'] for item in entry.get('media$categories', [])] categories = [item['media$name'] for item in entry.get('media$categories', [])]
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id) ret = self._extract_theplatform_metadata('%s/%s' % (provider_id, first_video_id), video_id)
subtitles = self._merge_subtitles(subtitles, ret['subtitles']) subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
ret.update({ ret.update({
'id': video_id, 'id': video_id,

View File

@ -4,6 +4,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
get_element_by_attribute,
ExtractorError,
)
class TVPIE(InfoExtractor): class TVPIE(InfoExtractor):
@ -21,7 +27,7 @@ class TVPIE(InfoExtractor):
}, },
}, { }, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
'md5': 'c3b15ed1af288131115ff17a17c19dda', 'md5': 'b0005b542e5b4de643a9690326ab1257',
'info_dict': { 'info_dict': {
'id': '17916176', 'id': '17916176',
'ext': 'mp4', 'ext': 'mp4',
@ -53,6 +59,11 @@ class TVPIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id) 'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
error_massage = get_element_by_attribute('class', 'msg error', webpage)
if error_massage:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, clean_html(error_massage)), expected=True)
title = self._search_regex( title = self._search_regex(
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
webpage, 'title', group='title') webpage, 'title', group='title')
@ -66,24 +77,50 @@ class TVPIE(InfoExtractor):
r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None) r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None)
video_url = self._search_regex( video_url = self._search_regex(
r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None) r'0:{src:([\'"])(?P<url>.*?)\1', webpage,
if not video_url: 'formats', group='url', default=None)
if not video_url or 'material_niedostepny.mp4' in video_url:
video_url = self._download_json( video_url = self._download_json(
'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id, 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id,
video_id)['video_url'] video_id)['video_url']
ext = video_url.rsplit('.', 1)[-1] formats = []
if ext != 'ism/manifest': video_url_base = self._search_regex(
if '/' in ext: r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
ext = 'mp4' video_url, 'video base url', default=None)
if video_url_base:
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
# formats.extend(self._extract_mpd_formats(
# video_url_base + '.ism/video.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_f4m_formats(
video_url_base + '.ism/video.f4m',
video_id, f4m_id='hds', fatal=False))
m3u8_formats = self._extract_m3u8_formats(
video_url_base + '.ism/video.m3u8', video_id,
'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
m3u8_formats))
formats.extend(m3u8_formats)
for i, m3u8_format in enumerate(m3u8_formats, 2):
http_url = '%s-%d.mp4' % (video_url_base, i)
if self._is_valid_url(http_url, video_id):
f = m3u8_format.copy()
f.update({
'url': http_url,
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
else:
formats = [{ formats = [{
'format_id': 'direct', 'format_id': 'direct',
'url': video_url, 'url': video_url,
'ext': ext, 'ext': determine_ext(video_url, 'mp4'),
}] }]
else:
m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -29,7 +29,7 @@ class TwitchBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv' _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
_API_BASE = 'https://api.twitch.tv' _API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net'
_LOGIN_URL = 'http://www.twitch.tv/login' _LOGIN_URL = 'http://www.twitch.tv/login'
_NETRC_MACHINE = 'twitch' _NETRC_MACHINE = 'twitch'

View File

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class URPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
'md5': '15ca67b63fd8fb320ac2bcd854bad7b6',
'info_dict': {
'id': '190031',
'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
urplayer_data = self._parse_json(self._search_regex(
r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id)
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
formats = []
for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
file_rtmp = urplayer_data.get('file_rtmp' + quality_attr)
if file_rtmp:
formats.append({
'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp),
'format_id': quality + '-rtmp',
'ext': 'flv',
'preference': preference,
})
file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
if file_http:
file_http_base_url = 'http://%s/%s' % (host, file_http)
formats.extend(self._extract_f4m_formats(
file_http_base_url + 'manifest.f4m', video_id,
preference, '%s-hds' % quality, fatal=False))
formats.extend(self._extract_m3u8_formats(
file_http_base_url + 'playlist.m3u8', video_id, 'mp4',
'm3u8_native', preference, '%s-hls' % quality, fatal=False))
self._sort_formats(formats)
subtitles = {}
for subtitle in urplayer_data.get('subtitles', []):
subtitle_url = subtitle.get('file')
kind = subtitle.get('kind')
if subtitle_url or kind and kind != 'captions':
continue
subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
'url': subtitle_url,
})
return {
'id': video_id,
'title': urplayer_data['title'],
'description': self._og_search_description(webpage),
'thumbnail': urplayer_data.get('image'),
'series': urplayer_data.get('series_title'),
'subtitles': subtitles,
'formats': formats,
}

View File

@ -0,0 +1,84 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
remove_end,
unified_strdate,
)
class VidbitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2',
'md5': '1a34b7f14defe3b8fafca9796892924d',
'info_dict': {
'id': 'jkL2yDOEq2',
'ext': 'mp4',
'title': 'Intro to VidBit',
'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7',
'thumbnail': 're:https?://.*\.jpg$',
'upload_date': '20160618',
'view_count': int,
'comment_count': int,
}
}, {
'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id)
video_url, title = [None] * 2
config = self._parse_json(self._search_regex(
r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'),
video_id, transform_source=js_to_json)
if config:
if config.get('file'):
video_url = compat_urlparse.urljoin(url, config['file'])
title = config.get('title')
if not video_url:
video_url = compat_urlparse.urljoin(url, self._search_regex(
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video URL', group='url'))
if not title:
title = remove_end(
self._html_search_regex(
(r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'),
webpage, 'title', default=None) or self._og_search_title(webpage),
' - VidBit')
description = self._html_search_meta(
('description', 'og:description', 'twitter:description'),
webpage, 'description')
upload_date = unified_strdate(self._html_search_meta(
'datePublished', webpage, 'upload date'))
view_count = int_or_none(self._search_regex(
r'<strong>(\d+)</strong> views',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'id=["\']cmt_num["\'][^>]*>\((\d+)\)',
webpage, 'comment count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': upload_date,
'view_count': view_count,
'comment_count': comment_count,
}

View File

@ -16,6 +16,7 @@ from ..utils import (
ExtractorError, ExtractorError,
InAdvancePagedList, InAdvancePagedList,
int_or_none, int_or_none,
NO_DEFAULT,
RegexNotFoundError, RegexNotFoundError,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', vuid)
self._download_webpage(login_request, None, False, 'Wrong login info') self._download_webpage(login_request, None, False, 'Wrong login info')
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = urlencode_postdata({
'password': password,
'token': token,
})
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
def _extract_xsrft_and_vuid(self, webpage): def _extract_xsrft_and_vuid(self, webpage):
xsrft = self._search_regex( xsrft = self._search_regex(
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
@ -344,26 +365,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
if mobj: if mobj:
return mobj.group(1) return mobj.group(1)
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = urlencode_postdata({
'password': password,
'token': token,
})
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Referer', url)
self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
def _verify_player_video_password(self, url, video_id): def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword') password = self._downloader.params.get('videopassword')
if password is None: if password is None:
@ -791,12 +792,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'uploader_id': 'user22258446', 'uploader_id': 'user22258446',
} }
}, {
'note': 'Password protected',
'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
'info_dict': {
'id': '138823582',
'ext': 'mp4',
'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
'uploader': 'TMB',
'uploader_id': 'user37284429',
},
'params': {
'videopassword': 'holygrail',
},
}] }]
def _real_initialize(self):
self._login()
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
webpage = self._download_webpage(webpage_url, video_id)
config_url = self._html_search_regex(
r'data-config-url="([^"]+)"', webpage, 'config URL',
default=NO_DEFAULT if video_password_verified else None)
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(
webpage_url, video_id, video_password_verified=True)
return config_url
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
config = self._download_json( config_url = self._get_config_url(url, video_id)
'https://player.vimeo.com/video/%s/config' % video_id, video_id) config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id) info_dict = self._parse_config(config, video_id)
self._vimeo_sort_formats(info_dict['formats']) self._vimeo_sort_formats(info_dict['formats'])
info_dict['id'] = video_id info_dict['id'] = video_id

View File

@ -90,10 +90,12 @@ class VineIE(InfoExtractor):
data = self._parse_json( data = self._parse_json(
self._search_regex( self._search_regex(
r'window\.POST_DATA\s*=\s*{\s*%s\s*:\s*({.+?})\s*};\s*</script>' % video_id, r'window\.POST_DATA\s*=\s*({.+?});\s*</script>',
webpage, 'vine data'), webpage, 'vine data'),
video_id) video_id)
data = data[list(data.keys())[0]]
formats = [{ formats = [{
'format_id': '%(format)s-%(rate)s' % f, 'format_id': '%(format)s-%(rate)s' % f,
'vcodec': f.get('format'), 'vcodec': f.get('format'),

View File

@ -27,12 +27,12 @@ class VKIE(InfoExtractor):
https?:// https?://
(?: (?:
(?: (?:
(?:m\.)?vk\.com/video_| (?:(?:m|new)\.)?vk\.com/video_|
(?:www\.)?daxab.com/ (?:www\.)?daxab.com/
) )
ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)| ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
(?: (?:
(?:m\.)?vk\.com/(?:.+?\?.*?z=)?video| (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
(?:www\.)?daxab.com/embed/ (?:www\.)?daxab.com/embed/
) )
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))? (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
@ -182,6 +182,10 @@ class VKIE(InfoExtractor):
# pladform embed # pladform embed
'url': 'https://vk.com/video-76116461_171554880', 'url': 'https://vk.com/video-76116461_171554880',
'only_matching': True, 'only_matching': True,
},
{
'url': 'http://new.vk.com/video205387401_165548505',
'only_matching': True,
} }
] ]
@ -354,7 +358,7 @@ class VKIE(InfoExtractor):
class VKUserVideosIE(InfoExtractor): class VKUserVideosIE(InfoExtractor):
IE_NAME = 'vk:uservideos' IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos" IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
_TEMPLATE_URL = 'https://vk.com/videos' _TEMPLATE_URL = 'https://vk.com/videos'
_TESTS = [{ _TESTS = [{
'url': 'http://vk.com/videos205387401', 'url': 'http://vk.com/videos205387401',
@ -369,6 +373,12 @@ class VKUserVideosIE(InfoExtractor):
}, { }, {
'url': 'http://vk.com/videos-97664626?section=all', 'url': 'http://vk.com/videos-97664626?section=all',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://m.vk.com/videos205387401',
'only_matching': True,
}, {
'url': 'http://new.vk.com/videos205387401',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -25,7 +25,8 @@ class VRTIE(InfoExtractor):
'timestamp': 1414271750.949, 'timestamp': 1414271750.949,
'upload_date': '20141025', 'upload_date': '20141025',
'duration': 929, 'duration': 929,
} },
'skip': 'HTTP Error 404: Not Found',
}, },
# sporza.be # sporza.be
{ {
@ -39,7 +40,8 @@ class VRTIE(InfoExtractor):
'timestamp': 1413835980.560, 'timestamp': 1413835980.560,
'upload_date': '20141020', 'upload_date': '20141020',
'duration': 3238, 'duration': 3238,
} },
'skip': 'HTTP Error 404: Not Found',
}, },
# cobra.be # cobra.be
{ {
@ -53,16 +55,39 @@ class VRTIE(InfoExtractor):
'timestamp': 1413967500.494, 'timestamp': 1413967500.494,
'upload_date': '20141022', 'upload_date': '20141022',
'duration': 661, 'duration': 661,
} },
'skip': 'HTTP Error 404: Not Found',
}, },
{ {
# YouTube video # YouTube video
'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957', 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
'only_matching': True, 'md5': 'b8b93da1df1cea6c8556255a796b7d61',
'info_dict': {
'id': 'Wji-BZ0oCwg',
'ext': 'mp4',
'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer',
'description': 'md5:8e468944dce15567a786a67f74262583',
'uploader': 'Star Wars',
'uploader_id': 'starwars',
'upload_date': '20160407',
},
'add_ie': ['Youtube'],
}, },
{ {
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
'only_matching': True, 'md5': '',
'info_dict': {
'id': '2377055',
'ext': 'mp4',
'title': 'Cafe Derby',
'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.',
'upload_date': '20150626',
'timestamp': 1435305240.769,
},
'params': {
# m3u8 download
'skip_download': True,
}
} }
] ]
@ -98,6 +123,32 @@ class VRTIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native', src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
src.replace('playlist.m3u8', 'manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'data-video-geoblocking="true"' not in webpage:
rtmp_formats = self._extract_smil_formats(
src.replace('playlist.m3u8', 'jwplayer.smil'),
video_id, fatal=False)
formats.extend(rtmp_formats)
for rtmp_format in rtmp_formats:
rtmp_format_c = rtmp_format.copy()
rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
del rtmp_format_c['play_path']
del rtmp_format_c['ext']
http_format = rtmp_format_c.copy()
http_format.update({
'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
'format_id': rtmp_format['format_id'].replace('rtmp', 'http'),
'protocol': 'http',
})
rtsp_format = rtmp_format_c.copy()
rtsp_format.update({
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'protocol': 'rtsp',
})
formats.extend([http_format, rtsp_format])
else: else:
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
'%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))

View File

@ -4,17 +4,23 @@ import itertools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_duration,
sanitized_Request, sanitized_Request,
str_to_int, str_to_int,
) )
class XTubeIE(InfoExtractor): class XTubeIE(InfoExtractor):
_VALID_URL = r'(?:xtube:|https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-))(?P<id>[^/?&#]+)' _VALID_URL = r'''(?x)
(?:
xtube:|
https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?P<display_id>[^/]+)-)
)
(?P<id>[^/?&#]+)
'''
_TESTS = [{ _TESTS = [{
# old URL schema # old URL schema
@ -27,6 +33,8 @@ class XTubeIE(InfoExtractor):
'description': 'contains:an ET kind of thing', 'description': 'contains:an ET kind of thing',
'uploader': 'greenshowers', 'uploader': 'greenshowers',
'duration': 450, 'duration': 450,
'view_count': int,
'comment_count': int,
'age_limit': 18, 'age_limit': 18,
} }
}, { }, {
@ -51,21 +59,30 @@ class XTubeIE(InfoExtractor):
req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1')
webpage = self._download_webpage(req, display_id) webpage = self._download_webpage(req, display_id)
flashvars = self._parse_json( sources = self._parse_json(self._search_regex(
self._search_regex( r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id)
r'xt\.playerOps\s*=\s*({.+?});', webpage, 'player ops'),
video_id)['flashvars']
title = flashvars.get('title') or self._search_regex( formats = []
r'<h1>([^<]+)</h1>', webpage, 'title') for format_id, format_url in sources.items():
video_url = compat_urllib_parse_unquote(flashvars['video_url']) formats.append({
duration = int_or_none(flashvars.get('video_duration')) 'url': format_url,
'format_id': format_id,
'height': int_or_none(format_id),
})
self._sort_formats(formats)
uploader = self._search_regex( title = self._search_regex(
r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', (r'<h1>(?P<title>[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
webpage, 'uploader', fatal=False) webpage, 'title', group='title')
description = self._search_regex( description = self._search_regex(
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
uploader = self._search_regex(
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
webpage, 'uploader', fatal=False)
duration = parse_duration(self._search_regex(
r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>', r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
@ -76,7 +93,6 @@ class XTubeIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url,
'title': title, 'title': title,
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
@ -84,6 +100,7 @@ class XTubeIE(InfoExtractor):
'view_count': view_count, 'view_count': view_count,
'comment_count': comment_count, 'comment_count': comment_count,
'age_limit': 18, 'age_limit': 18,
'formats': formats,
} }

View File

@ -67,6 +67,20 @@ class XuiteIE(InfoExtractor):
'categories': ['電玩動漫'], 'categories': ['電玩動漫'],
}, },
'skip': 'Video removed', 'skip': 'Video removed',
}, {
# Video with encoded media id
# from http://forgetfulbc.blogspot.com/2016/06/date.html
'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
'info_dict': {
'id': 'cE1xbENoLTI3NDQ3MzM2LmZsdg==',
'ext': 'mp4',
'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)',
'description': 'md5:f0abdcb69df300f522a5442ef3146f2a',
'timestamp': 1466160960,
'upload_date': '20160617',
'uploader': 'B.C. & Lowy',
'uploader_id': '232279340',
},
}, { }, {
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
'only_matching': True, 'only_matching': True,
@ -80,10 +94,9 @@ class XuiteIE(InfoExtractor):
def base64_encode_utf8(data): def base64_encode_utf8(data):
return base64.b64encode(data.encode('utf-8')).decode('utf-8') return base64.b64encode(data.encode('utf-8')).decode('utf-8')
def _extract_flv_config(self, media_id): def _extract_flv_config(self, encoded_media_id):
base64_media_id = self.base64_encode_utf8(media_id)
flv_config = self._download_xml( flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id, 'http://vlog.xuite.net/flash/player?media=%s' % encoded_media_id,
'flv config') 'flv config')
prop_dict = {} prop_dict = {}
for prop in flv_config.findall('./property'): for prop in flv_config.findall('./property'):
@ -108,9 +121,14 @@ class XuiteIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error_msg), '%s returned error: %s' % (self.IE_NAME, error_msg),
expected=True) expected=True)
video_id = self._html_search_regex( encoded_media_id = self._search_regex(
r'data-mediaid="(\d+)"', webpage, 'media id') r'attributes\.name\s*=\s*"([^"]+)"', webpage,
flv_config = self._extract_flv_config(video_id) 'encoded media id', default=None)
if encoded_media_id is None:
video_id = self._html_search_regex(
r'data-mediaid="(\d+)"', webpage, 'media id')
encoded_media_id = self.base64_encode_utf8(video_id)
flv_config = self._extract_flv_config(encoded_media_id)
FORMATS = { FORMATS = {
'audio': 'mp3', 'audio': 'mp3',

View File

@ -19,6 +19,7 @@ from ..utils import (
mimetype2ext, mimetype2ext,
) )
from .brightcove import BrightcoveNewIE
from .nbc import NBCSportsVPlayerIE from .nbc import NBCSportsVPlayerIE
@ -227,7 +228,12 @@ class YahooIE(InfoExtractor):
# Look for NBCSports iframes # Look for NBCSports iframes
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
if nbc_sports_url: if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
# Look for Brightcove New Studio embeds
bc_url = BrightcoveNewIE._extract_url(webpage)
if bc_url:
return self.url_result(bc_url, BrightcoveNewIE.ie_key())
# Query result is often embedded in webpage as JSON. Sometimes explicit requests # Query result is often embedded in webpage as JSON. Sometimes explicit requests
# to video API results in a failure with geo restriction reason therefore using # to video API results in a failure with geo restriction reason therefore using

View File

@ -16,7 +16,6 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
get_element_by_attribute, get_element_by_attribute,
sanitized_Request,
) )
@ -218,14 +217,10 @@ class YoukuIE(InfoExtractor):
headers = { headers = {
'Referer': req_url, 'Referer': req_url,
} }
headers.update(self.geo_verification_headers())
self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com') self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
req = sanitized_Request(req_url, headers=headers)
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') raw_data = self._download_json(req_url, video_id, note=note, headers=headers)
if cn_verification_proxy:
req.add_header('Ytdl-request-proxy', cn_verification_proxy)
raw_data = self._download_json(req, video_id, note=note)
return raw_data['data'] return raw_data['data']

View File

@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'youtube_include_dash_manifest': True, 'youtube_include_dash_manifest': True,
'format': '141', 'format': '141',
}, },
'skip': 'format 141 not served anymore',
}, },
# DASH manifest with encrypted signature # DASH manifest with encrypted signature
{ {
@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}, },
'params': { 'params': {
'youtube_include_dash_manifest': True, 'youtube_include_dash_manifest': True,
'format': '141', 'format': '141/bestaudio[ext=m4a]',
}, },
}, },
# JS player signature function name containing $ # JS player signature function name containing $
@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}, },
'params': { 'params': {
'youtube_include_dash_manifest': True, 'youtube_include_dash_manifest': True,
'format': '141', 'format': '141/bestaudio[ext=m4a]',
}, },
}, },
# Controversy video # Controversy video
@ -618,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic', 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
'license': 'Standard YouTube License', 'license': 'Standard YouTube License',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympics', 'uploader': 'Olympic',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
}, },
'params': { 'params': {
@ -671,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000', 'uploader': 'dorappi2000',
'license': 'Standard YouTube License', 'license': 'Standard YouTube License',
'formats': 'mincount:33', 'formats': 'mincount:32',
}, },
}, },
# DASH manifest with segment_list # DASH manifest with segment_list
@ -691,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': { 'params': {
'youtube_include_dash_manifest': True, 'youtube_include_dash_manifest': True,
'format': '135', # bestvideo 'format': '135', # bestvideo
} },
'skip': 'This live event has ended.',
}, },
{ {
# Multifeed videos (multiple cameras), URL is for Main Camera # Multifeed videos (multiple cameras), URL is for Main Camera
@ -762,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
}, },
'playlist_count': 2, 'playlist_count': 2,
'skip': 'Not multifeed anymore',
}, },
{ {
'url': 'http://vid.plus/FlRa-iH7PGw', 'url': 'http://vid.plus/FlRa-iH7PGw',
@ -814,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'This video does not exist.',
}, },
{ {
# Video licensed under Creative Commons # Video licensed under Creative Commons
@ -1331,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:[a-zA-Z-]+="[^"]*"\s+)*? (?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+ (?:title|href)="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*? (?:[a-zA-Z-]+="[^"]*"\s+)*?
class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*> class="[^"]*"[^>]*>
[^<]+\.{3}\s* [^<]+\.{3}\s*
</a> </a>
''', r'\1', video_description) ''', r'\1', video_description)
@ -1726,6 +1730,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} }
class YoutubeSharedVideoIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:shared'
_TEST = {
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
'info_dict': {
'id': 'uPDB5I9wfp8',
'ext': 'webm',
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
'upload_date': '20160219',
'uploader': 'Pocoyo - Português (BR)',
'uploader_id': 'PocoyoBrazil',
},
'add_ie': ['Youtube'],
'params': {
# There are already too many Youtube downloads
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
real_video_id = self._html_search_meta(
'videoId', webpage, 'YouTube video id', fatal=True)
return self.url_result(real_video_id, YoutubeIE.ie_key())
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com playlists' IE_DESC = 'YouTube.com playlists'
_VALID_URL = r"""(?x)(?: _VALID_URL = r"""(?x)(?:
@ -1958,9 +1995,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
channel_playlist_id = self._html_search_meta( channel_playlist_id = self._html_search_meta(
'channelId', channel_page, 'channel id', default=None) 'channelId', channel_page, 'channel id', default=None)
if not channel_playlist_id: if not channel_playlist_id:
channel_playlist_id = self._search_regex( channel_url = self._html_search_meta(
r'data-(?:channel-external-|yt)id="([^"]+)"', ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
channel_page, 'channel id', default=None) channel_page, 'channel url', default=None)
if channel_url:
channel_playlist_id = self._search_regex(
r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
channel_url, 'channel id', default=None)
if channel_playlist_id and channel_playlist_id.startswith('UC'): if channel_playlist_id and channel_playlist_id.startswith('UC'):
playlist_id = 'UU' + channel_playlist_id[2:] playlist_id = 'UU' + channel_playlist_id[2:]
return self.url_result( return self.url_result(
@ -1983,6 +2024,15 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
for video_id, video_title in self.extract_videos_from_page(channel_page)] for video_id, video_title in self.extract_videos_from_page(channel_page)]
return self.playlist_result(entries, channel_id) return self.playlist_result(entries, channel_id)
try:
next(self._entries(channel_page, channel_id))
except StopIteration:
alert_message = self._html_search_regex(
r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
channel_page, 'alert', default=None, group='alert')
if alert_message:
raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
return self.playlist_result(self._entries(channel_page, channel_id), channel_id) return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
@ -1996,7 +2046,8 @@ class YoutubeUserIE(YoutubeChannelIE):
'url': 'https://www.youtube.com/user/TheLinuxFoundation', 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
'playlist_mincount': 320, 'playlist_mincount': 320,
'info_dict': { 'info_dict': {
'title': 'TheLinuxFoundation', 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
'title': 'Uploads from The Linux Foundation',
} }
}, { }, {
'url': 'ytuser:phihag', 'url': 'ytuser:phihag',
@ -2004,6 +2055,10 @@ class YoutubeUserIE(YoutubeChannelIE):
}, { }, {
'url': 'https://www.youtube.com/c/gametrailers', 'url': 'https://www.youtube.com/c/gametrailers',
'only_matching': True, 'only_matching': True,
}, {
# This channel is not available.
'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
'only_matching': True,
}] }]
@classmethod @classmethod

View File

@ -26,9 +26,7 @@ def parseOpts(overrideArguments=None):
except IOError: except IOError:
return default # silently skip if file is not present return default # silently skip if file is not present
try: try:
res = [] res = compat_shlex_split(optionf.read(), comments=True)
for l in optionf:
res += compat_shlex_split(l, comments=True)
finally: finally:
optionf.close() optionf.close()
return res return res
@ -211,11 +209,16 @@ def parseOpts(overrideArguments=None):
action='store_const', const='::', dest='source_address', action='store_const', const='::', dest='source_address',
help='Make all connections via IPv6 (experimental)', help='Make all connections via IPv6 (experimental)',
) )
network.add_option(
'--geo-verification-proxy',
dest='geo_verification_proxy', default=None, metavar='URL',
help='Use this proxy to verify the IP address for some geo-restricted sites. '
'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
)
network.add_option( network.add_option(
'--cn-verification-proxy', '--cn-verification-proxy',
dest='cn_verification_proxy', default=None, metavar='URL', dest='cn_verification_proxy', default=None, metavar='URL',
help='Use this proxy to verify the IP address for some Chinese sites. ' help=optparse.SUPPRESS_HELP,
'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
) )
selection = optparse.OptionGroup(parser, 'Video Selection') selection = optparse.OptionGroup(parser, 'Video Selection')

View File

@ -76,7 +76,7 @@ class Socks4Error(ProxyError):
CODES = { CODES = {
91: 'request rejected or failed', 91: 'request rejected or failed',
92: 'request rejected becasue SOCKS server cannot connect to identd on the client', 92: 'request rejected because SOCKS server cannot connect to identd on the client',
93: 'request rejected because the client program and identd report different user-ids' 93: 'request rejected because the client program and identd report different user-ids'
} }

View File

@ -110,6 +110,49 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙ
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
DATE_FORMATS = (
'%d %B %Y',
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y/%m/%d',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
)
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
DATE_FORMATS_DAY_FIRST.extend([
'%d-%m-%Y',
'%d.%m.%Y',
'%d.%m.%y',
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
DATE_FORMATS_MONTH_FIRST.extend([
'%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'%m/%d/%Y %H:%M:%S',
])
def preferredencoding(): def preferredencoding():
"""Get preferred encoding. """Get preferred encoding.
@ -975,6 +1018,24 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
https_response = http_response https_response = http_response
def extract_timezone(date_str):
m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
date_str)
if not m:
timezone = datetime.timedelta()
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
timezone = datetime.timedelta()
else:
sign = 1 if m.group('sign') == '+' else -1
timezone = datetime.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
return timezone, date_str
def parse_iso8601(date_str, delimiter='T', timezone=None): def parse_iso8601(date_str, delimiter='T', timezone=None):
""" Return a UNIX timestamp from the given date """ """ Return a UNIX timestamp from the given date """
@ -984,20 +1045,8 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
date_str = re.sub(r'\.[0-9]+', '', date_str) date_str = re.sub(r'\.[0-9]+', '', date_str)
if timezone is None: if timezone is None:
m = re.search( timezone, date_str = extract_timezone(date_str)
r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
date_str)
if not m:
timezone = datetime.timedelta()
else:
date_str = date_str[:-len(m.group(0))]
if not m.group('sign'):
timezone = datetime.timedelta()
else:
sign = 1 if m.group('sign') == '+' else -1
timezone = datetime.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
try: try:
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
dt = datetime.datetime.strptime(date_str, date_format) - timezone dt = datetime.datetime.strptime(date_str, date_format) - timezone
@ -1006,6 +1055,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
pass pass
def date_formats(day_first=True):
return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
def unified_strdate(date_str, day_first=True): def unified_strdate(date_str, day_first=True):
"""Return a string with the date in the format YYYYMMDD""" """Return a string with the date in the format YYYYMMDD"""
@ -1014,53 +1067,11 @@ def unified_strdate(date_str, day_first=True):
upload_date = None upload_date = None
# Replace commas # Replace commas
date_str = date_str.replace(',', ' ') date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone # Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
_, date_str = extract_timezone(date_str)
format_expressions = [ for expression in date_formats(day_first):
'%d %B %Y',
'%d %b %Y',
'%B %d %Y',
'%b %d %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
'%Y/%m/%d',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
'%d.%m.%Y %H:%M',
'%d.%m.%Y %H.%M',
'%Y-%m-%dT%H:%M:%SZ',
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
]
if day_first:
format_expressions.extend([
'%d-%m-%Y',
'%d.%m.%Y',
'%d.%m.%y',
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
])
else:
format_expressions.extend([
'%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',
'%m/%d/%Y %H:%M:%S',
])
for expression in format_expressions:
try: try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except ValueError: except ValueError:
@ -1076,6 +1087,29 @@ def unified_strdate(date_str, day_first=True):
return compat_str(upload_date) return compat_str(upload_date)
def unified_timestamp(date_str, day_first=True):
if date_str is None:
return None
date_str = date_str.replace(',', ' ')
pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0)
timezone, date_str = extract_timezone(date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
for expression in date_formats(day_first):
try:
dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta
return calendar.timegm(dt.timetuple())
except ValueError:
pass
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
return calendar.timegm(timetuple.timetuple())
def determine_ext(url, default_ext='unknown_video'): def determine_ext(url, default_ext='unknown_video'):
if url is None: if url is None:
return default_ext return default_ext
@ -1410,6 +1444,8 @@ def shell_quote(args):
def smuggle_url(url, data): def smuggle_url(url, data):
""" Pass additional data in a URL for internal use. """ """ Pass additional data in a URL for internal use. """
url, idata = unsmuggle_url(url, {})
data.update(idata)
sdata = compat_urllib_parse_urlencode( sdata = compat_urllib_parse_urlencode(
{'__youtubedl_smuggle': json.dumps(data)}) {'__youtubedl_smuggle': json.dumps(data)})
return url + '#' + sdata return url + '#' + sdata
@ -1591,6 +1627,11 @@ class HEADRequest(compat_urllib_request.Request):
return 'HEAD' return 'HEAD'
class PUTRequest(compat_urllib_request.Request):
def get_method(self):
return 'PUT'
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr: if get_attr:
if v is not None: if v is not None:
@ -1626,6 +1667,10 @@ def float_or_none(v, scale=1, invscale=1, default=None):
return default return default
def strip_or_none(v):
return None if v is None else v.strip()
def parse_duration(s): def parse_duration(s):
if not isinstance(s, compat_basestring): if not isinstance(s, compat_basestring):
return None return None
@ -1882,7 +1927,13 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
req_headers.update(headers) req_headers.update(headers)
req_data = data or req.data req_data = data or req.data
req_url = update_url_query(url or req.get_full_url(), query) req_url = update_url_query(url or req.get_full_url(), query)
req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request req_get_method = req.get_method()
if req_get_method == 'HEAD':
req_type = HEADRequest
elif req_get_method == 'PUT':
req_type = PUTRequest
else:
req_type = compat_urllib_request.Request
new_req = req_type( new_req = req_type(
req_url, data=req_data, headers=req_headers, req_url, data=req_data, headers=req_headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
@ -2861,3 +2912,7 @@ def parse_m3u8_attributes(attrib):
val = val[1:-1] val = val[1:-1]
info[key] = val info[key] = val
return info return info
def urshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.06.23.1' __version__ = '2016.07.06'