Compare commits

..

110 Commits

Author SHA1 Message Date
6b4dfa2819 release 2016.11.14.1 2016-11-14 02:48:15 +07:00
9f60134a9d [ChangeLog] Actualize 2016-11-14 02:46:12 +07:00
b3d4bd05f9 release 2016.11.14 2016-11-14 02:39:50 +07:00
dbffd00ba9 [ChangeLog] Actualize 2016-11-14 02:37:21 +07:00
50913b8241 [nrk] Improve geo restriction detection 2016-11-13 22:29:36 +07:00
7e08e2cab0 [nrk] Add X-Forwarded-For HTTP header in info dict 2016-11-13 22:28:29 +07:00
690355551c [downoader/fragment,f4m,hls] Add internal support for custom HTTP headers 2016-11-13 22:22:10 +07:00
754e6c8322 [nrk] Workaround geo restriction and improve error messages 2016-11-13 20:54:34 +07:00
e58609b22c [afreecatv] Add support for vod.afreecatv.com (closes #11174) 2016-11-13 06:02:26 +07:00
4ea4c0bb22 [extractor/common] Fix Bandwidth substitution in media template (closes #11175) 2016-11-13 05:43:34 +07:00
577281b0c6 [cda] Fix and improve extraction
Fixes #10929
2016-11-13 01:01:29 +07:00
3d2729514f [plays] Improve extraction and add support for embed URLs 2016-11-12 23:08:05 +07:00
f076d7972c [extractor/common] Improve thumbnail extraction from JSON-LD 2016-11-12 23:01:05 +07:00
cpm
8b1aeadc33 [plays] Fix extraction 2016-11-12 22:59:39 +07:00
bc40b3a5ba [eagleplatform] Fix extraction (closes #11160) 2016-11-11 03:26:29 +07:00
3eaaa8abac [audioboom] Recognize /posts/ URLs (closes #11149) 2016-11-10 14:52:34 +08:00
db3367f43e release 2016.11.08.1 2016-11-08 22:30:53 +07:00
6590925c27 [ChangeLog] Actualize 2016-11-08 22:29:16 +07:00
4719af097c [extractors] Add forgotten import for espn:article 2016-11-08 22:27:02 +07:00
9946aa5ccf [franceculture] Fix extraction (closes #11140) 2016-11-08 22:26:33 +07:00
c58e07a7aa release 2016.11.08 2016-11-08 22:11:21 +07:00
f700afa24c [ChangeLog] Actualize 2016-11-08 22:09:03 +07:00
5d47b38cf5 [tmz:article] Fix extraction (closes #11052) 2016-11-08 21:53:41 +08:00
ebc7ab1e23 [espn] Fix extraction (closes #11041) 2016-11-08 00:29:12 +07:00
97726317ac [README.md] Mention HTTP headers and alternative way to obtain cookies and headers in -g FAQ 2016-11-07 23:53:22 +07:00
cb882540e8 [mitele] Fix extraction after website redesign (fixes #10824) 2016-11-07 11:13:59 +01:00
98708e6cbd [ard] Remove age restriction check (closes #11129) 2016-11-06 23:20:15 +07:00
b52c9ef165 [extractor/generic] Improve support for pornhub embeds (closes #11100) 2016-11-06 21:52:00 +07:00
e28ed498e6 [extractor/generic] Add support for redtube embds (closes #11099) 2016-11-06 21:42:41 +07:00
5021ca6c13 [redtube] Add support for embed URLs 2016-11-06 21:39:29 +07:00
37e7a71c6c [extractor/generic] Add support for drtuber embds (closes #11098) 2016-11-06 21:33:51 +07:00
f5c4b06f17 [drtuber] Fix title extraction 2016-11-06 21:29:15 +07:00
519d897049 [drtuber] Add support for embed URLs 2016-11-06 21:28:51 +07:00
b61cd51869 [yahoo] Add test and improve some content id regex 2016-11-06 21:16:33 +07:00
f420902a3b [yahoo] Add another content id regex (closes #11088) 2016-11-06 21:14:15 +07:00
de328af362 [toutv] Relax _VALID_URL (closes #11121) 2016-11-05 03:24:42 +07:00
b30e4c2754 release 2016.11.04 2016-11-04 22:07:54 +07:00
09ffe34b00 [ChangeLog] Actualize 2016-11-04 21:59:42 +07:00
640aff1d0c [anvato] Improve formats extraction 2016-11-04 21:45:24 +07:00
c897af8aac [cbslocal] Update test 2016-11-04 21:33:08 +07:00
f3c705f8ec [fox9] Add extractor (closes #11110) 2016-11-04 21:32:30 +07:00
f93ac1d175 [anvato] Extract more metadata 2016-11-04 21:17:56 +07:00
c4c9b8440c [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 manifests (closes #11113) 2016-11-04 05:02:31 +07:00
32f2627aed [vodlocker] Add another removed file pattern (closes #11106) 2016-11-03 22:22:40 +07:00
9d64e1dcdc [downloader/ism] Fix typo 2016-11-03 22:15:09 +07:00
10380e55de [downloader/ism] fix AVC Decoder Configuration Record creation in python 3 2016-11-03 16:08:57 +01:00
22979993e7 [vice] add coding cookie 2016-11-03 16:07:22 +01:00
b47ecd0b74 [vzaar] Add new extractor(closes #11093) 2016-11-03 12:50:41 +01:00
3a86b2c51e Ignore and clean .wav files 2016-11-03 18:55:55 +08:00
b811b4c93b [vice] add support for uplynk preplay videos(#11101) 2016-11-03 10:37:07 +01:00
f4dfa9a5ed [tubitv] fix extraction(closes #11061) 2016-11-03 09:04:20 +01:00
3b4b66b50c [shahid] add support for authentication(closes #11091) 2016-11-03 00:44:12 +01:00
4119a96ce5 [extractor/generic] Skip URLs we came from when delegating ISM extraction 2016-11-02 23:43:41 +07:00
26aae56690 [extractor/generic] Improve ISM extraction 2016-11-02 23:34:37 +07:00
4f9cd4d36f [radiocanada] extract subtitle(closes #11096) 2016-11-02 13:55:40 +01:00
cc99a77ac1 [extractor/generic] Add support for ISM manifests 2016-11-02 03:01:13 +07:00
8956d6608a release 2016.11.02 2016-11-02 02:39:36 +07:00
3365ea8929 [extractor/common] Remove unused code 2016-11-02 02:34:23 +07:00
a18aeee803 [ChangeLog] Actualize 2016-11-02 02:33:17 +07:00
1616f9b452 [extractor/common] Fix typo 2016-11-02 02:30:25 +07:00
02dc0a36b7 [utils] Introduce base_url 2016-11-02 02:30:18 +07:00
639e3b5c99 extract ISM formats in some of the extractors 2016-11-02 01:54:45 +07:00
b2758123c5 add Basic support for Smooth Streaming protocol(#8118) 2016-11-02 01:54:45 +07:00
f449c061d0 [nicknight] Improve extraction (closes #10769) 2016-11-02 01:35:53 +07:00
9c82bba05d [nickde] Improve extraction 2016-11-02 01:29:05 +07:00
e3577722b0 [nicknight] Add extractor 2016-11-02 01:25:59 +07:00
b82c33dd67 [extractor/common] Improve mpd base URL extraction (closes #10909, closes #11079) 2016-11-01 01:15:46 +07:00
e5a088dc4b [utils] Fix --match-filter for int-like strings (closes #11082) 2016-10-31 23:32:08 +07:00
2c6da7df4a release 2016.10.31 2016-10-31 01:36:53 +07:00
7e7a028aa4 [README.md] Fix a typo 2016-10-31 01:12:36 +07:00
e70a5e6566 release 2016.10.30 2016-10-30 18:24:49 +07:00
3bf55be466 [ChangeLog] Actualize 2016-10-30 18:19:29 +07:00
a901fc5fc2 [vessel] Add tests for #11068 2016-10-30 18:17:15 +07:00
cae6bc0118 [vessel] Improve video id extraction 2016-10-30 18:14:51 +07:00
d9ee2e5cf6 [facebook] Remove SWF params so that 1080P are detected
Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params
2016-10-30 18:20:55 +08:00
e1a0b3b81c [imgur] Recognize /r/ URLs (closes #11071) 2016-10-30 17:02:03 +08:00
2a048f9878 [beeg] Fix extraction (closes #11069) 2016-10-30 05:27:50 +07:00
ea331f40e6 Credit @Thor77 for jamendo (#10934) 2016-10-30 05:10:31 +07:00
f02700a1fa [openload] Fix extraction (#10408)
Thanks @TwelveCharzz again for studying openload codes
2016-10-29 18:00:30 +08:00
f3517569f6 [gvsearch] Modernize and fix page result request (closes #11051) 2016-10-28 23:19:59 +07:00
c725333d41 [ard] Fix typo 2016-10-27 03:59:00 +07:00
a5a8877f9c [adultswim] Fix extraction (closes #10979) 2016-10-27 02:16:48 +08:00
43c53a1700 [nobelprize] Add new extractor(closes #9999) 2016-10-26 18:15:23 +01:00
ec8705117a [hornbunny] Fix extraction (#10981) 2016-10-27 00:10:51 +08:00
3d8d44c7b1 [tvp] improve video id extraction(closes #10585) 2016-10-26 16:47:22 +01:00
88839f4380 release 2016.10.26 2016-10-26 19:55:09 +07:00
83e9374464 [ChangeLog] Actualize 2016-10-26 19:53:44 +07:00
773017c648 [rentv] Move rentv test from generic extractor and add only matching tests 2016-10-26 19:52:43 +07:00
777d90dc28 [rentv] Add new extractor(closes #10620) 2016-10-26 10:07:35 +01:00
3791d84acc [ard] Detect unavailable videos (closes #11018) 2016-10-25 21:21:47 +07:00
9305a0dc60 [vk] Fix extraction (closes #11022) 2016-10-25 21:05:29 +07:00
94e08950e3 release 2016.10.25 2016-10-25 03:19:36 +07:00
ee824a8d06 [ChangeLog] Actualize
[ci skip]
2016-10-25 02:51:07 +07:00
d3b6b3b95b [jamendo] Improve 2016-10-25 02:46:48 +07:00
b17422753f [jamendo] Add extractor 2016-10-25 02:43:03 +07:00
b0b28b8241 [ChangeLog] Actualize 2016-10-25 01:53:41 +07:00
81cb7a5978 Credit @azuwis for pandatv (#10736) 2016-10-25 01:51:46 +07:00
d2e96a8ed4 [pandatv] Extract m3u8, document reverse source and PEP 8 2016-10-25 01:51:37 +07:00
2e7c8cab55 [pandatv] Add new extractor 2016-10-25 01:46:02 +07:00
d7d4481c6a [movieclips] Fix _VALID_URL 2016-10-24 23:54:42 +07:00
5ace137bf4 [dotsub] Support vimeo embed (closes #10964) 2016-10-24 15:13:33 +08:00
9dde0e04e6 [litv] Fix extraction (#11006) 2016-10-23 23:23:40 +08:00
f16f8505b1 [vimeo] Delegate ondemand redirects to ondemand extractor (closes #10994) 2016-10-23 18:48:50 +07:00
9dc13a6780 [vivo] Fix extraction (closes #11003) 2016-10-23 18:07:56 +07:00
9aa929d337 [twitch:stream] Add support for rebroadcasts (closes #10995) 2016-10-23 17:20:45 +07:00
425f3fdfcb [pluralsight] Fix subtitles conversion (closes #10990) 2016-10-22 21:15:39 +07:00
e034cbc581 Merge branch 'johnhawkinson-stdin2' 2016-10-22 13:10:27 +08:00
5378f8ce0d [ChangeLog] Update for #10996 2016-10-22 13:08:56 +08:00
b64d04c119 [utils] Clarify for redirecting STDIN in get_exe_version() 2016-10-22 13:04:05 +08:00
00ca755231 [get_exe_version] Do version probes with <&-
When doing version probes for ffmpeg, do the
equivalent of calling it as:

    ffmpeg -version <&-

Where <&- is shell syntax for closing stdin before calling the
program. This is roughly equivalent to </dev/null without actually
opening /dev/null.

This prevents ffmpeg -version from hanging when run in the background.
Fixes #955.

The reason is that ffmpeg tries to manipulate stdin to set up terminal
characteristic, and that causes the kernel to suspend the parent
process (youtube-dl).

Note that closing stdin is achieved by calling subprocess.Popen() with
stdin set to subprocess.PIPE and without passing any input to
Popen.communicate(). This is somewhat subtle.
2016-10-22 00:34:08 -04:00
73 changed files with 1988 additions and 531 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.14.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.21.1** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.14.1**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.10.21.1 [debug] youtube-dl version 2016.11.14.1
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

1
.gitignore vendored
View File

@ -30,6 +30,7 @@ updates_key.pem
*.m4v *.m4v
*.mp3 *.mp3
*.3gp *.3gp
*.wav
*.part *.part
*.swp *.swp
test/testdata test/testdata

View File

@ -188,3 +188,5 @@ Xie Yanbo
Philip Xu Philip Xu
John Hawkinson John Hawkinson
Rich Leeper Rich Leeper
Zhong Jianxin
Thor77

View File

@ -245,7 +245,7 @@ Say `meta` from the previous example has a `title` and you are about to extract
title = meta['title'] title = meta['title']
``` ```
If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:

113
ChangeLog
View File

@ -1,3 +1,116 @@
version 2016.11.14.1
Core
+ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
* [extractor/common] Fix media templates with Bandwidth substitution pattern in
MPD manifests (#11175)
* [extractor/common] Improve thumbnail extraction from JSON-LD
Extractors
+ [nrk] Workaround geo restriction
+ [nrk] Improve error detection and messages
+ [afreecatv] Add support for vod.afreecatv.com (#11174)
* [cda] Fix and improve extraction (#10929, #10936)
* [plays] Fix extraction (#11165)
* [eagleplatform] Fix extraction (#11160)
+ [audioboom] Recognize /posts/ URLs (#11149)
version 2016.11.08.1
Extractors
* [espn:article] Fix support for espn.com articles
* [franceculture] Fix extraction (#11140)
version 2016.11.08
Extractors
* [tmz:article] Fix extraction (#11052)
* [espn] Fix extraction (#11041)
* [mitele] Fix extraction after website redesign (#10824)
- [ard] Remove age restriction check (#11129)
* [generic] Improve support for pornhub.com embeds (#11100)
+ [generic] Add support for redtube.com embeds (#11099)
+ [generic] Add support for drtuber.com embeds (#11098)
+ [redtube] Add support for embed URLs
+ [drtuber] Add support for embed URLs
+ [yahoo] Improve content id extraction (#11088)
* [toutv] Relax URL regular expression (#11121)
version 2016.11.04
Core
* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
manifests (#11113)
* [downloader/ism] Fix AVC Decoder Configuration Record
Extractors
+ [fox9] Add support for fox9.com (#11110)
+ [anvato] Extract more metadata and improve formats extraction
* [vodlocker] Improve removed videos detection (#11106)
+ [vzaar] Add support for vzaar.com (#11093)
+ [vice] Add support for uplynk preplay videos (#11101)
* [tubitv] Fix extraction (#11061)
+ [shahid] Add support for authentication (#11091)
+ [radiocanada] Add subtitles support (#11096)
+ [generic] Add support for ISM manifests
version 2016.11.02
Core
+ Add basic support for Smooth Streaming protocol (#8118, #10969)
* Improve MPD manifest base URL extraction (#10909, #11079)
* Fix --match-filter for int-like strings (#11082)
Extractors
+ [mva] Add support for ISM formats
+ [msn] Add support for ISM formats
+ [onet] Add support for ISM formats
+ [tvp] Add support for ISM formats
+ [nicknight] Add support for nicknight sites (#10769)
version 2016.10.30
Extractors
* [facebook] Improve 1080P video detection (#11073)
* [imgur] Recognize /r/ URLs (#11071)
* [beeg] Fix extraction (#11069)
* [openload] Fix extraction (#10408)
* [gvsearch] Modernize and fix search request (#11051)
* [adultswim] Fix extraction (#10979)
+ [nobelprize] Add support for nobelprize.org (#9999)
* [hornbunny] Fix extraction (#10981)
* [tvp] Improve video id extraction (#10585)
version 2016.10.26
Extractors
+ [rentv] Add support for ren.tv (#10620)
+ [ard] Detect unavailable videos (#11018)
* [vk] Fix extraction (#11022)
version 2016.10.25
Core
* Running youtube-dl in the background is fixed (#10996, #10706, #955)
Extractors
+ [jamendo] Add support for jamendo.com (#10132, #10736)
+ [pandatv] Add support for panda.tv (#10736)
+ [dotsub] Support Vimeo embed (#10964)
* [litv] Fix extraction
+ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994)
* [vivo] Fix extraction (#11003)
+ [twitch:stream] Add support for rebroadcasts (#10995)
* [pluralsight] Fix subtitles conversion (#10990)
version 2016.10.21.1 version 2016.10.21.1
Extractors Extractors

View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete find . -name "*.pyc" -delete
find . -name "*.class" -delete find . -name "*.class" -delete

View File

@ -758,7 +758,7 @@ Once the video is fully downloaded, use any video player, such as [mpv](https://
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule. It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
@ -1083,7 +1083,7 @@ Say `meta` from the previous example has a `title` and you are about to extract
title = meta['title'] title = meta['title']
``` ```
If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:

View File

@ -225,6 +225,7 @@
- **EroProfile** - **EroProfile**
- **Escapist** - **Escapist**
- **ESPN** - **ESPN**
- **ESPNArticle**
- **EsriVideo** - **EsriVideo**
- **Europa** - **Europa**
- **EveryonesMixtape** - **EveryonesMixtape**
@ -247,6 +248,7 @@
- **FootyRoom** - **FootyRoom**
- **Formula1** - **Formula1**
- **FOX** - **FOX**
- **FOX9**
- **Foxgay** - **Foxgay**
- **foxnews**: Fox News and Fox Business Video - **foxnews**: Fox News and Fox Business Video
- **foxnews:article** - **foxnews:article**
@ -332,6 +334,8 @@
- **ivideon**: Ivideon TV - **ivideon**: Ivideon TV
- **Iwara** - **Iwara**
- **Izlesene** - **Izlesene**
- **Jamendo**
- **JamendoAlbum**
- **JeuxVideo** - **JeuxVideo**
- **Jove** - **Jove**
- **jpopsuki.tv** - **jpopsuki.tv**
@ -481,11 +485,13 @@
- **nhl.com:videocenter:category**: NHL videocenter category - **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com** - **nick.com**
- **nick.de** - **nick.de**
- **nicknight**
- **niconico**: ニコニコ動画 - **niconico**: ニコニコ動画
- **NiconicoPlaylist** - **NiconicoPlaylist**
- **Nintendo** - **Nintendo**
- **njoy**: N-JOY - **njoy**: N-JOY
- **njoy:embed** - **njoy:embed**
- **NobelPrize**
- **Noco** - **Noco**
- **Normalboots** - **Normalboots**
- **NosVideo** - **NosVideo**
@ -527,6 +533,7 @@
- **orf:iptv**: iptv.ORF.at - **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1 - **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek - **orf:tvthek**: ORF TVthek
- **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV - **pandora.tv**: 판도라TV
- **parliamentlive.tv**: UK parliament videos - **parliamentlive.tv**: UK parliament videos
- **Patreon** - **Patreon**
@ -586,6 +593,8 @@
- **RDS**: RDS.ca - **RDS**: RDS.ca
- **RedTube** - **RedTube**
- **RegioTV** - **RegioTV**
- **RENTV**
- **RENTVArticle**
- **Restudy** - **Restudy**
- **Reuters** - **Reuters**
- **ReverbNation** - **ReverbNation**
@ -641,7 +650,7 @@
- **ServingSys** - **ServingSys**
- **Sexu** - **Sexu**
- **Shahid** - **Shahid**
- **Shared**: shared.sx and vivo.sx - **Shared**: shared.sx
- **ShareSix** - **ShareSix**
- **Sina** - **Sina**
- **SixPlay** - **SixPlay**
@ -848,6 +857,7 @@
- **Vimple**: Vimple - one-click video hosting - **Vimple**: Vimple - one-click video hosting
- **Vine** - **Vine**
- **vine:user** - **vine:user**
- **Vivo**: vivo.sx
- **vk**: VK - **vk**: VK
- **vk:uservideos**: VK - User's Videos - **vk:uservideos**: VK - User's Videos
- **vk:wallpost** - **vk:wallpost**
@ -862,6 +872,7 @@
- **vube**: Vube.com - **vube**: Vube.com
- **VuClip** - **VuClip**
- **VyboryMos** - **VyboryMos**
- **Vzaar**
- **Walla** - **Walla**
- **washingtonpost** - **washingtonpost**
- **washingtonpost:article** - **washingtonpost:article**

View File

@ -605,6 +605,7 @@ class TestYoutubeDL(unittest.TestCase):
'extractor': 'TEST', 'extractor': 'TEST',
'duration': 30, 'duration': 30,
'filesize': 10 * 1024, 'filesize': 10 * 1024,
'playlist_id': '42',
} }
second = { second = {
'id': '2', 'id': '2',
@ -614,6 +615,7 @@ class TestYoutubeDL(unittest.TestCase):
'duration': 10, 'duration': 10,
'description': 'foo', 'description': 'foo',
'filesize': 5 * 1024, 'filesize': 5 * 1024,
'playlist_id': '43',
} }
videos = [first, second] videos = [first, second]
@ -650,6 +652,10 @@ class TestYoutubeDL(unittest.TestCase):
res = get_videos(f) res = get_videos(f)
self.assertEqual(res, ['1']) self.assertEqual(res, ['1'])
f = match_filter_func('playlist_id = 42')
res = get_videos(f)
self.assertEqual(res, ['1'])
def test_playlist_items_selection(self): def test_playlist_items_selection(self):
entries = [{ entries = [{
'id': compat_str(i), 'id': compat_str(i),

View File

@ -69,6 +69,7 @@ from youtube_dl.utils import (
uppercase_escape, uppercase_escape,
lowercase_escape, lowercase_escape,
url_basename, url_basename,
base_url,
urlencode_postdata, urlencode_postdata,
urshift, urshift,
update_url_query, update_url_query,
@ -437,6 +438,13 @@ class TestUtil(unittest.TestCase):
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
'trailer.mp4') 'trailer.mp4')
def test_base_url(self):
self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
def test_parse_age_limit(self): def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None) self.assertEqual(parse_age_limit(False), None)

View File

@ -1658,7 +1658,7 @@ class YoutubeDL(object):
video_ext, audio_ext = audio.get('ext'), video.get('ext') video_ext, audio_ext = audio.get('ext'), video.get('ext')
if video_ext and audio_ext: if video_ext and audio_ext:
COMPATIBLE_EXTS = ( COMPATIBLE_EXTS = (
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'), ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
('webm') ('webm')
) )
for exts in COMPATIBLE_EXTS: for exts in COMPATIBLE_EXTS:

View File

@ -7,6 +7,7 @@ from .http import HttpFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .dash import DashSegmentsFD from .dash import DashSegmentsFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .ism import IsmFD
from .external import ( from .external import (
get_external_downloader, get_external_downloader,
FFmpegFD, FFmpegFD,
@ -24,6 +25,7 @@ PROTOCOL_MAP = {
'rtsp': RtspFD, 'rtsp': RtspFD,
'f4m': F4mFD, 'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD, 'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
} }

View File

@ -314,7 +314,8 @@ class F4mFD(FragmentFD):
man_url = info_dict['url'] man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr') requested_bitrate = info_dict.get('tbr')
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(man_url)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl() man_url = urlh.geturl()
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
@ -387,7 +388,10 @@ class F4mFD(FragmentFD):
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
frag_filename = '%s-%s' % (ctx['tmpfilename'], name) frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
try: try:
success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()}) success = ctx['dl'].download(frag_filename, {
'url': url_parsed.geturl(),
'http_headers': info_dict.get('http_headers'),
})
if not success: if not success:
return False return False
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb') (down, frag_sanitized) = sanitize_open(frag_filename, 'rb')

View File

@ -9,6 +9,7 @@ from ..utils import (
error_to_compat_str, error_to_compat_str,
encodeFilename, encodeFilename,
sanitize_open, sanitize_open,
sanitized_Request,
) )
@ -37,6 +38,10 @@ class FragmentFD(FileDownloader):
def report_skip_fragment(self, fragment_name): def report_skip_fragment(self, fragment_name):
self.to_screen('[download] Skipping fragment %s...' % fragment_name) self.to_screen('[download] Skipping fragment %s...' % fragment_name)
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url
def _prepare_and_start_frag_download(self, ctx): def _prepare_and_start_frag_download(self, ctx):
self._prepare_frag_download(ctx) self._prepare_frag_download(ctx)
self._start_frag_download(ctx) self._start_frag_download(ctx)

View File

@ -59,7 +59,8 @@ class HlsFD(FragmentFD):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
man_url = info_dict['url'] man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
manifest = self.ydl.urlopen(man_url).read()
manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
s = manifest.decode('utf-8', 'ignore') s = manifest.decode('utf-8', 'ignore')
@ -112,7 +113,10 @@ class HlsFD(FragmentFD):
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success = ctx['dl'].download(frag_filename, {'url': frag_url}) success = ctx['dl'].download(frag_filename, {
'url': frag_url,
'http_headers': info_dict.get('http_headers'),
})
if not success: if not success:
return False return False
down, frag_sanitized = sanitize_open(frag_filename, 'rb') down, frag_sanitized = sanitize_open(frag_filename, 'rb')

View File

@ -0,0 +1,271 @@
from __future__ import unicode_literals
import os
import time
import struct
import binascii
import io
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import (
sanitize_open,
encodeFilename,
)
u8 = struct.Struct(b'>B')
u88 = struct.Struct(b'>Bx')
u16 = struct.Struct(b'>H')
u1616 = struct.Struct(b'>Hxx')
u32 = struct.Struct(b'>I')
u64 = struct.Struct(b'>Q')
s88 = struct.Struct(b'>bx')
s16 = struct.Struct(b'>h')
s1616 = struct.Struct(b'>hxx')
s32 = struct.Struct(b'>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
TRACK_ENABLED = 0x1
TRACK_IN_MOVIE = 0x2
TRACK_IN_PREVIEW = 0x4
SELF_CONTAINED = 0x1
def box(box_type, payload):
return u32.pack(8 + len(payload)) + box_type + payload
def full_box(box_type, version, flags, payload):
return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
def write_piff_header(stream, params):
track_id = params['track_id']
fourcc = params['fourcc']
duration = params['duration']
timescale = params.get('timescale', 10000000)
language = params.get('language', 'und')
height = params.get('height', 0)
width = params.get('width', 0)
is_audio = width == 0 and height == 0
creation_time = modification_time = int(time.time())
ftyp_payload = b'isml' # major brand
ftyp_payload += u32.pack(1) # minor version
ftyp_payload += b'piff' + b'iso2' # compatible brands
stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
mvhd_payload = u64.pack(creation_time)
mvhd_payload += u64.pack(modification_time)
mvhd_payload += u32.pack(timescale)
mvhd_payload += u64.pack(duration)
mvhd_payload += s1616.pack(1) # rate
mvhd_payload += s88.pack(1) # volume
mvhd_payload += u16.pack(0) # reserved
mvhd_payload += u32.pack(0) * 2 # reserved
mvhd_payload += unity_matrix
mvhd_payload += u32.pack(0) * 6 # pre defined
mvhd_payload += u32.pack(0xffffffff) # next track id
moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
tkhd_payload = u64.pack(creation_time)
tkhd_payload += u64.pack(modification_time)
tkhd_payload += u32.pack(track_id) # track id
tkhd_payload += u32.pack(0) # reserved
tkhd_payload += u64.pack(duration)
tkhd_payload += u32.pack(0) * 2 # reserved
tkhd_payload += s16.pack(0) # layer
tkhd_payload += s16.pack(0) # alternate group
tkhd_payload += s88.pack(1 if is_audio else 0) # volume
tkhd_payload += u16.pack(0) # reserved
tkhd_payload += unity_matrix
tkhd_payload += u1616.pack(width)
tkhd_payload += u1616.pack(height)
trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
mdhd_payload = u64.pack(creation_time)
mdhd_payload += u64.pack(modification_time)
mdhd_payload += u32.pack(timescale)
mdhd_payload += u64.pack(duration)
mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
mdhd_payload += u16.pack(0) # pre defined
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
hdlr_payload = u32.pack(0) # pre defined
hdlr_payload += b'soun' if is_audio else b'vide' # handler type
hdlr_payload += u32.pack(0) * 3 # reserved
hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
if is_audio:
smhd_payload = s88.pack(0) # balance
smhd_payload = u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
else:
vmhd_payload = u16.pack(0) # graphics mode
vmhd_payload += u16.pack(0) * 3 # opcolor
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
minf_payload = media_header_box
dref_payload = u32.pack(1) # entry count
dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
minf_payload += box(b'dinf', dinf_payload) # Data Information Box
stsd_payload = u32.pack(1) # entry count
sample_entry_payload = u8.pack(0) * 6 # reserved
sample_entry_payload += u16.pack(1) # data reference index
if is_audio:
sample_entry_payload += u32.pack(0) * 2 # reserved
sample_entry_payload += u16.pack(params.get('channels', 2))
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u1616.pack(params['sampling_rate'])
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
else:
sample_entry_payload = sample_entry_payload
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined
sample_entry_payload += u16.pack(width)
sample_entry_payload += u16.pack(height)
sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
sample_entry_payload += u32.pack(0) # reserved
sample_entry_payload += u16.pack(1) # frame count
sample_entry_payload += u8.pack(0) * 32 # compressor name
sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined
codec_private_data = binascii.unhexlify(params['codec_private_data'])
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
avcc_payload += u16.pack(len(sps))
avcc_payload += sps
avcc_payload += u8.pack(1) # number of pps
avcc_payload += u16.pack(len(pps))
avcc_payload += pps
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
stsd_payload += sample_entry_box
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
stts_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
stsc_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
stco_payload = u32.pack(0) # entry count
stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
mdia_payload += box(b'minf', minf_payload) # Media Information Box
trak_payload += box(b'mdia', mdia_payload) # Media Box
moov_payload += box(b'trak', trak_payload) # Track Box
mehd_payload = u64.pack(duration)
mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
trex_payload = u32.pack(track_id) # track id
trex_payload += u32.pack(1) # default sample description index
trex_payload += u32.pack(0) # default sample duration
trex_payload += u32.pack(0) # default sample size
trex_payload += u32.pack(0) # default sample flags
mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
stream.write(box(b'moov', moov_payload)) # Movie Box
def extract_box_data(data, box_sequence):
data_reader = io.BytesIO(data)
while True:
box_size = u32.unpack(data_reader.read(4))[0]
box_type = data_reader.read(4)
if box_type == box_sequence[0]:
box_data = data_reader.read(box_size - 8)
if len(box_sequence) == 1:
return box_data
return extract_box_data(box_data, box_sequence[1:])
data_reader.seek(box_size - 8, 1)
class IsmFD(FragmentFD):
"""
Download segments in a ISM manifest
"""
FD_NAME = 'ism'
def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
ctx = {
'filename': filename,
'total_frags': len(segments),
}
self._prepare_and_start_frag_download(ctx)
segments_filenames = []
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
track_written = False
for i, segment in enumerate(segments):
segment_url = segment['url']
segment_name = 'Frag%d' % i
target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
count = 0
while count <= fragment_retries:
try:
success = ctx['dl'].download(target_filename, {'url': segment_url})
if not success:
return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
down_data = down.read()
if not track_written:
tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
track_written = True
ctx['dest_stream'].write(down_data)
down.close()
segments_filenames.append(target_sanitized)
break
except compat_urllib_error.HTTPError as err:
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, segment_name, count, fragment_retries)
if count > fragment_retries:
if skip_unavailable_fragments:
self.report_skip_fragment(segment_name)
continue
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False
self._finish_frag_download(ctx)
for segment_file in segments_filenames:
os.remove(encodeFilename(segment_file))
return True

View File

@ -96,6 +96,27 @@ class AdultSwimIE(TurnerBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Unable to download f4m manifest'], 'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
'info_dict': {
'id': 'eYiLsKVgQ6qTC6agD67Sig',
'title': 'Toonami - Friday, October 14th, 2016',
'description': 'md5:99892c96ffc85e159a428de85c30acde',
},
'playlist': [{
'md5': '',
'info_dict': {
'id': 'eYiLsKVgQ6qTC6agD67Sig',
'ext': 'mp4',
'title': 'Toonami - Friday, October 14th, 2016',
'description': 'md5:99892c96ffc85e159a428de85c30acde',
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}] }]
@staticmethod @staticmethod
@ -163,6 +184,8 @@ class AdultSwimIE(TurnerBaseIE):
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
elif video_info.get('videoPlaybackID'): elif video_info.get('videoPlaybackID'):
segment_ids = [video_info['videoPlaybackID']] segment_ids = [video_info['videoPlaybackID']]
elif video_info.get('id'):
segment_ids = [video_info['id']]
else: else:
if video_info.get('auth') is True: if video_info.get('auth') is True:
raise ExtractorError( raise ExtractorError(

View File

@ -11,6 +11,7 @@ from ..compat import (
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
update_url_query,
xpath_element, xpath_element,
xpath_text, xpath_text,
) )
@ -18,12 +19,18 @@ from ..utils import (
class AfreecaTVIE(InfoExtractor): class AfreecaTVIE(InfoExtractor):
IE_DESC = 'afreecatv.com' IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x)^ _VALID_URL = r'''(?x)
https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? https?://
(?: (?:
/app/(?:index|read_ucc_bbs)\.cgi| (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
/player/[Pp]layer\.(?:swf|html)) (?:
\?.*?\bnTitleNo=(?P<id>\d+)''' /app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/PLAYER/STATION/
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor):
}, { }, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor):
info_url = compat_urlparse.urlunparse(parsed_url._replace( info_url = compat_urlparse.urlunparse(parsed_url._replace(
netloc='afbbs.afreecatv.com:8080', netloc='afbbs.afreecatv.com:8080',
path='/api/video/get_video_info.php')) path='/api/video/get_video_info.php'))
video_xml = self._download_xml(info_url, video_id)
video_xml = self._download_xml(
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
if xpath_element(video_xml, './track/video/file') is None: if xpath_element(video_xml, './track/video/file') is None:
raise ExtractorError('Specified AfreecaTV video does not exist', raise ExtractorError('Specified AfreecaTV video does not exist',

View File

@ -157,22 +157,16 @@ class AnvatoIE(InfoExtractor):
video_data_url, video_id, transform_source=strip_jsonp, video_data_url, video_id, transform_source=strip_jsonp,
data=json.dumps(payload).encode('utf-8')) data=json.dumps(payload).encode('utf-8'))
def _extract_anvato_videos(self, webpage, video_id): def _get_anvato_videos(self, access_key, video_id):
anvplayer_data = self._parse_json(self._html_search_regex(
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
'Anvato player data'), video_id)
video_id = anvplayer_data['video']
access_key = anvplayer_data['accessKey']
video_data = self._get_video_json(access_key, video_id) video_data = self._get_video_json(access_key, video_id)
formats = [] formats = []
for published_url in video_data['published_urls']: for published_url in video_data['published_urls']:
video_url = published_url['embed_url'] video_url = published_url['embed_url']
media_format = published_url.get('format')
ext = determine_ext(video_url) ext = determine_ext(video_url)
if ext == 'smil': if ext == 'smil' or media_format == 'smil':
formats.extend(self._extract_smil_formats(video_url, video_id)) formats.extend(self._extract_smil_formats(video_url, video_id))
continue continue
@ -183,7 +177,7 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None, 'tbr': tbr if tbr != 0 else None,
} }
if ext == 'm3u8': if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
# Not using _extract_m3u8_formats here as individual media # Not using _extract_m3u8_formats here as individual media
# playlists are also included in published_urls. # playlists are also included in published_urls.
if tbr is None: if tbr is None:
@ -194,7 +188,7 @@ class AnvatoIE(InfoExtractor):
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4', 'ext': 'mp4',
}) })
elif ext == 'mp3': elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none' a_format['vcodec'] = 'none'
else: else:
a_format.update({ a_format.update({
@ -218,7 +212,19 @@ class AnvatoIE(InfoExtractor):
'formats': formats, 'formats': formats,
'title': video_data.get('def_title'), 'title': video_data.get('def_title'),
'description': video_data.get('def_description'), 'description': video_data.get('def_description'),
'tags': video_data.get('def_tags', '').split(','),
'categories': video_data.get('categories'), 'categories': video_data.get('categories'),
'thumbnail': video_data.get('thumbnail'), 'thumbnail': video_data.get('thumbnail'),
'timestamp': int_or_none(video_data.get(
'ts_published') or video_data.get('ts_added')),
'uploader': video_data.get('mcp_id'),
'duration': int_or_none(video_data.get('duration')),
'subtitles': subtitles, 'subtitles': subtitles,
} }
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(self._html_search_regex(
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
'Anvato player data'), video_id)
return self._get_anvato_videos(
anvplayer_data['accessKey'], anvplayer_data['video'])

View File

@ -174,11 +174,15 @@ class ARDMediathekIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: ERRORS = (
raise ExtractorError('Video %s is no longer available' % video_id, expected=True) ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
'Video %s is no longer available'),
)
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage: for pattern, message in ERRORS:
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True) if pattern in webpage:
raise ExtractorError(message % video_id, expected=True)
if re.search(r'[\?&]rss($|[=&])', url): if re.search(r'[\?&]rss($|[=&])', url):
doc = compat_etree_fromstring(webpage.encode('utf-8')) doc = compat_etree_fromstring(webpage.encode('utf-8'))

View File

@ -6,8 +6,8 @@ from ..utils import float_or_none
class AudioBoomIE(InfoExtractor): class AudioBoomIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0', 'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
'md5': '63a8d73a055c6ed0f1e51921a10a5a76', 'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
'info_dict': { 'info_dict': {
@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor):
'uploader': 'Steve Czaban', 'uploader': 'Steve Czaban',
'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', 'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
} }
} }, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -46,19 +46,19 @@ class BeegIE(InfoExtractor):
self._proto_relative_url(cpl_url), video_id, self._proto_relative_url(cpl_url), video_id,
'Downloading cpl JS', fatal=False) 'Downloading cpl JS', fatal=False)
if cpl: if cpl:
beeg_version = self._search_regex( beeg_version = int_or_none(self._search_regex(
r'beeg_version\s*=\s*(\d+)', cpl, r'beeg_version\s*=\s*([^\b]+)', cpl,
'beeg version', default=None) or self._search_regex( 'beeg version', default=None)) or self._search_regex(
r'/(\d+)\.js', cpl_url, 'beeg version', default=None) r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
beeg_salt = self._search_regex( beeg_salt = self._search_regex(
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt', r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
default=None, group='beeg_salt') default=None, group='beeg_salt')
beeg_version = beeg_version or '1750' beeg_version = beeg_version or '2000'
beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr' beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
video = self._download_json( video = self._download_json(
'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id), 'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
video_id) video_id)
def split(o, e): def split(o, e):

View File

@ -22,6 +22,7 @@ class CBSLocalIE(AnvatoIE):
'thumbnail': 're:^https?://.*', 'thumbnail': 're:^https?://.*',
'timestamp': 1463440500, 'timestamp': 1463440500,
'upload_date': '20160516', 'upload_date': '20160516',
'uploader': 'CBS',
'subtitles': { 'subtitles': {
'en': 'mincount:5', 'en': 'mincount:5',
}, },
@ -35,6 +36,7 @@ class CBSLocalIE(AnvatoIE):
'Syndication\\Curb.tv', 'Syndication\\Curb.tv',
'Content\\News' 'Content\\News'
], ],
'tags': ['CBS 2 News Evening'],
}, },
}, { }, {
# SendtoNews embed # SendtoNews embed

View File

@ -5,14 +5,16 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
decode_packed_codes,
ExtractorError, ExtractorError,
parse_duration float_or_none,
int_or_none,
parse_duration,
) )
class CDAIE(InfoExtractor): class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_BASE_URL = 'http://www.cda.pl/'
_TESTS = [{ _TESTS = [{
'url': 'http://www.cda.pl/video/5749950c', 'url': 'http://www.cda.pl/video/5749950c',
'md5': '6f844bf51b15f31fae165365707ae970', 'md5': '6f844bf51b15f31fae165365707ae970',
@ -21,6 +23,9 @@ class CDAIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'height': 720, 'height': 720,
'title': 'Oto dlaczego przed zakrętem należy zwolnić.', 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
'description': 'md5:269ccd135d550da90d1662651fcb9772',
'thumbnail': 're:^https?://.*\.jpg$',
'average_rating': float,
'duration': 39 'duration': 39
} }
}, { }, {
@ -30,6 +35,11 @@ class CDAIE(InfoExtractor):
'id': '57413289', 'id': '57413289',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Lądowanie na lotnisku na Maderze', 'title': 'Lądowanie na lotnisku na Maderze',
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'crash404',
'view_count': int,
'average_rating': float,
'duration': 137 'duration': 137
} }
}, { }, {
@ -39,31 +49,55 @@ class CDAIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id) self._set_cookie('cda.pl', 'cda.player', 'html5')
webpage = self._download_webpage(
self._BASE_URL + '/video/' + video_id, video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage: if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True) raise ExtractorError('This video is only available for premium users.', expected=True)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
formats = [] formats = []
uploader = self._search_regex(r'''(?x)
<(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
''', webpage, 'uploader', default=None, group='uploader')
view_count = self._search_regex(
r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
'view_count', default=None)
average_rating = self._search_regex(
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
webpage, 'rating', fatal=False, group='rating_value')
info_dict = { info_dict = {
'id': video_id, 'id': video_id,
'title': title, 'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'uploader': uploader,
'view_count': int_or_none(view_count),
'average_rating': float_or_none(average_rating),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats, 'formats': formats,
'duration': None, 'duration': None,
} }
def extract_format(page, version): def extract_format(page, version):
unpacked = decode_packed_codes(page) json_str = self._search_regex(
format_url = self._search_regex( r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked, '%s player_json' % version, fatal=False, group='player_data')
'%s url' % version, fatal=False, group='url') if not json_str:
if not format_url: return
player_data = self._parse_json(
json_str, '%s player_data' % version, fatal=False)
if not player_data:
return
video = player_data.get('video')
if not video or 'file' not in video:
self.report_warning('Unable to extract %s version information' % version)
return return
f = { f = {
'url': format_url, 'url': video['file'],
} }
m = re.search( m = re.search(
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p', r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
@ -75,9 +109,7 @@ class CDAIE(InfoExtractor):
}) })
info_dict['formats'].append(f) info_dict['formats'].append(f)
if not info_dict['duration']: if not info_dict['duration']:
info_dict['duration'] = parse_duration(self._search_regex( info_dict['duration'] = parse_duration(video.get('duration'))
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
unpacked, 'duration', fatal=False, group='duration'))
extract_format(webpage, 'default') extract_format(webpage, 'default')
@ -85,7 +117,8 @@ class CDAIE(InfoExtractor):
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
webpage): webpage):
webpage = self._download_webpage( webpage = self._download_webpage(
href, video_id, 'Downloading %s version information' % resolution, fatal=False) self._BASE_URL + href, video_id,
'Downloading %s version information' % resolution, fatal=False)
if not webpage: if not webpage:
# Manually report warning because empty page is returned when # Manually report warning because empty page is returned when
# invalid version is requested. # invalid version is requested.

View File

@ -30,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
age_restricted, age_restricted,
base_url,
bug_reports_message, bug_reports_message,
clean_html, clean_html,
compiled_regex_type, compiled_regex_type,
@ -885,7 +886,7 @@ class InfoExtractor(object):
'url': e.get('contentUrl'), 'url': e.get('contentUrl'),
'title': unescapeHTML(e.get('name')), 'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')), 'description': unescapeHTML(e.get('description')),
'thumbnail': e.get('thumbnailUrl'), 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
'duration': parse_duration(e.get('duration')), 'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')), 'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')), 'filesize': float_or_none(e.get('contentSize')),
@ -1279,9 +1280,10 @@ class InfoExtractor(object):
} }
resolution = last_info.get('RESOLUTION') resolution = last_info.get('RESOLUTION')
if resolution: if resolution:
width_str, height_str = resolution.split('x') mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
f['width'] = int(width_str) if mobj:
f['height'] = int(height_str) f['width'] = int(mobj.group('width'))
f['height'] = int(mobj.group('height'))
# Unified Streaming Platform # Unified Streaming Platform
mobj = re.search( mobj = re.search(
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url']) r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
@ -1539,7 +1541,7 @@ class InfoExtractor(object):
if res is False: if res is False:
return [] return []
mpd, urlh = res mpd, urlh = res
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group() mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats( return self._parse_mpd_formats(
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
@ -1701,7 +1703,7 @@ class InfoExtractor(object):
representation_ms_info['fragments'] = [{ representation_ms_info['fragments'] = [{
'url': media_template % { 'url': media_template % {
'Number': segment_number, 'Number': segment_number,
'Bandwidth': representation_attrib.get('bandwidth'), 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
}, },
'duration': segment_duration, 'duration': segment_duration,
} for segment_number in range( } for segment_number in range(
@ -1719,7 +1721,7 @@ class InfoExtractor(object):
def add_segment_url(): def add_segment_url():
segment_url = media_template % { segment_url = media_template % {
'Time': segment_time, 'Time': segment_time,
'Bandwidth': representation_attrib.get('bandwidth'), 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
'Number': segment_number, 'Number': segment_number,
} }
representation_ms_info['fragments'].append({ representation_ms_info['fragments'].append({
@ -1780,6 +1782,105 @@ class InfoExtractor(object):
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats return formats
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
res = self._download_webpage_handle(
ism_url, video_id,
note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal)
if res is False:
return []
ism, urlh = res
return self._parse_ism_formats(
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
return []
duration = int(ism_doc.attrib['Duration'])
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
formats = []
for stream in ism_doc.findall('StreamIndex'):
stream_type = stream.get('Type')
if stream_type not in ('video', 'audio'):
continue
url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name')
for track in stream.findall('QualityLevel'):
fourcc = track.get('FourCC')
# TODO: add support for WVC1 and WMAP
if fourcc not in ('H264', 'AVC1', 'AACL'):
self.report_warning('%s is not a supported codec' % fourcc)
continue
tbr = int(track.attrib['Bitrate']) // 1000
width = int_or_none(track.get('MaxWidth'))
height = int_or_none(track.get('MaxHeight'))
sampling_rate = int_or_none(track.get('SamplingRate'))
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
fragments = []
fragment_ctx = {
'time': 0,
}
stream_fragments = stream.findall('c')
for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
if not fragment_ctx['duration']:
try:
next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
except IndexError:
next_fragment_time = duration
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
for _ in range(fragment_repeat):
fragments.append({
'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
'duration': fragment_ctx['duration'] / stream_timescale,
})
fragment_ctx['time'] += fragment_ctx['duration']
format_id = []
if ism_id:
format_id.append(ism_id)
if stream_name:
format_id.append(stream_name)
format_id.append(compat_str(tbr))
formats.append({
'format_id': '-'.join(format_id),
'url': ism_url,
'manifest_url': ism_url,
'ext': 'ismv' if stream_type == 'video' else 'isma',
'width': width,
'height': height,
'tbr': tbr,
'asr': sampling_rate,
'vcodec': 'none' if stream_type == 'audio' else fourcc,
'acodec': 'none' if stream_type == 'video' else fourcc,
'protocol': 'ism',
'fragments': fragments,
'_download_params': {
'duration': duration,
'timescale': stream_timescale,
'width': width or 0,
'height': height or 0,
'fourcc': fourcc,
'codec_private_data': track.get('CodecPrivateData'),
'sampling_rate': sampling_rate,
'channels': int_or_none(track.get('Channels', 2)),
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
},
})
return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
def absolute_url(video_url): def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url) return compat_urlparse.urljoin(base_url, video_url)

View File

@ -9,7 +9,7 @@ from ..utils import (
class DotsubIE(InfoExtractor): class DotsubIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
_TEST = { _TESTS = [{
'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09', 'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
'md5': '21c7ff600f545358134fea762a6d42b6', 'md5': '21c7ff600f545358134fea762a6d42b6',
'info_dict': { 'info_dict': {
@ -24,7 +24,24 @@ class DotsubIE(InfoExtractor):
'upload_date': '20131130', 'upload_date': '20131130',
'view_count': int, 'view_count': int,
} }
} }, {
'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6',
'md5': '2bb4a83896434d5c26be868c609429a3',
'info_dict': {
'id': '168006778',
'ext': 'mp4',
'title': 'Apartments and flats in Raipur the white symphony',
'description': 'md5:784d0639e6b7d1bc29530878508e38fe',
'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
'duration': 290,
'timestamp': 1476767794.2809999,
'upload_date': '20160525',
'uploader': 'parthivi001',
'uploader_id': 'user52596202',
'view_count': int,
},
'add_ie': ['Vimeo'],
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -37,12 +54,23 @@ class DotsubIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._search_regex( video_url = self._search_regex(
[r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'], [r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
webpage, 'video url') webpage, 'video url', default=None)
info_dict = {
'id': video_id,
'url': video_url,
'ext': 'flv',
}
return { if not video_url:
'id': video_id, setup_data = self._parse_json(self._html_search_regex(
'url': video_url, r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1',
'ext': 'flv', webpage, 'setup data', group='content'), video_id)
info_dict = {
'_type': 'url_transparent',
'url': setup_data['src'],
}
info_dict.update({
'title': info['title'], 'title': info['title'],
'description': info.get('description'), 'description': info.get('description'),
'thumbnail': info.get('screenshotURI'), 'thumbnail': info.get('screenshotURI'),
@ -50,4 +78,6 @@ class DotsubIE(InfoExtractor):
'uploader': info.get('user'), 'uploader': info.get('user'),
'timestamp': float_or_none(info.get('dateCreated'), 1000), 'timestamp': float_or_none(info.get('dateCreated'), 1000),
'view_count': int_or_none(info.get('numberOfViews')), 'view_count': int_or_none(info.get('numberOfViews')),
} })
return info_dict

View File

@ -10,8 +10,8 @@ from ..utils import (
class DrTuberIE(InfoExtractor): class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_TEST = { _TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf', 'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd', 'md5': '93e680cf2536ad0dfb7e74d94a89facd',
'info_dict': { 'info_dict': {
@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg$', 'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18, 'age_limit': 18,
} }
} }, {
'url': 'http://www.drtuber.com/embed/489939',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(
'http://www.drtuber.com/video/%s' % video_id, display_id)
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'<source src="([^"]+)"', webpage, 'video URL') r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex( title = self._html_search_regex(
(r'class="title_watch"[^>]*><p>([^<]+)<', (r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<p[^>]+class="title_substrate">([^<]+)</p>',
r'<title>([^<]+) - \d+'), r'<title>([^<]+) - \d+'),
webpage, 'title') webpage, 'title')

View File

@ -4,11 +4,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
url_basename,
) )
@ -77,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
if status != 200: if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True) raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
try: try:
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note) response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
except ExtractorError as ee: except ExtractorError as ee:
@ -116,29 +118,38 @@ class EaglePlatformIE(InfoExtractor):
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
'mp4', entry_protocol='m3u8_native', m3u8_id='hls') m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
mp4_url = self._get_video_url( m3u8_formats_dict = {}
for f in m3u8_formats:
if f.get('height') is not None:
m3u8_formats_dict[f['height']] = f
mp4_data = self._download_json(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from # Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js # http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8), re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
video_id, 'Downloading mp4 JSON') video_id, 'Downloading mp4 JSON', fatal=False)
mp4_url_basename = url_basename(mp4_url) if mp4_data:
for m3u8_format in m3u8_formats: for format_id, format_url in mp4_data.get('data', {}).items():
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url']) if not isinstance(format_url, compat_str):
if mobj:
http_format = m3u8_format.copy()
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
if not self._is_valid_url(video_url, video_id):
continue continue
http_format.update({ height = int_or_none(format_id)
'url': video_url, if height is not None and m3u8_formats_dict.get(height):
'format_id': m3u8_format['format_id'].replace('hls', 'http'), f = m3u8_formats_dict[height].copy()
'protocol': 'http', f.update({
}) 'format_id': f['format_id'].replace('hls', 'http'),
formats.append(http_format) 'protocol': 'http',
})
else:
f = {
'format_id': 'http-%s' % format_id,
'height': int_or_none(format_id),
}
f['url'] = format_url
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -1,38 +1,117 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import remove_end from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
unified_timestamp,
)
class ESPNIE(InfoExtractor): class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079', 'url': 'http://espn.go.com/video/clip?id=10365079',
'md5': '60e5d097a523e767d06479335d1bdc58',
'info_dict': { 'info_dict': {
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'id': '10365079',
'ext': 'mp4', 'ext': 'mp4',
'title': '30 for 30 Shorts: Judging Jewell', 'title': '30 for 30 Shorts: Judging Jewell',
'description': None, 'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
'timestamp': 1390936111,
'upload_date': '20140128',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['OoyalaExternal'],
}, { }, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663', 'url': 'http://espn.go.com/video/clip?id=2743663',
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
'info_dict': { 'info_dict': {
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', 'id': '2743663',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season', 'title': 'Must-See Moments: Best of the MLS season',
'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
'timestamp': 1449446454,
'upload_date': '20151207',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['OoyalaExternal'], 'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip/_/id/17989860',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
clip = self._download_json(
'http://api-app.espn.com/v1/video/clips/%s' % video_id,
video_id)['videos'][0]
title = clip['headline']
format_urls = set()
formats = []
def traverse_source(source, base_source_id=None):
for source_id, source in source.items():
if isinstance(source, compat_str):
extract_source(source, base_source_id)
elif isinstance(source, dict):
traverse_source(
source,
'%s-%s' % (base_source_id, source_id)
if base_source_id else source_id)
def extract_source(source_url, source_id=None):
if source_url in format_urls:
return
format_urls.add(source_url)
ext = determine_ext(source_url)
if ext == 'smil':
formats.extend(self._extract_smil_formats(
source_url, video_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
source_url, video_id, f4m_id=source_id, fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=source_id, fatal=False))
else:
formats.append({
'url': source_url,
'format_id': source_id,
})
traverse_source(clip['links']['source'])
self._sort_formats(formats)
description = clip.get('caption') or clip.get('description')
thumbnail = clip.get('thumbnail')
duration = int_or_none(clip.get('duration'))
timestamp = unified_timestamp(clip.get('originalPublishDate'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
class ESPNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True, 'only_matching': True,
}, { }, {
@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
}, { }, {
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)', r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
webpage, 'video id', group='id') webpage, 'video id', group='id')
cms = 'espn' return self.url_result(
if 'data-source="intl"' in webpage: 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
cms = 'intl'
player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
player = self._download_webpage(
player_url, video_id)
pcode = self._search_regex(
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
title = remove_end(
self._og_search_title(webpage),
'- ESPN Video').strip()
return {
'_type': 'url_transparent',
'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
'ie_key': 'OoyalaExternal',
'title': title,
}

View File

@ -267,7 +267,10 @@ from .engadget import EngadgetIE
from .eporner import EpornerIE from .eporner import EpornerIE
from .eroprofile import EroProfileIE from .eroprofile import EroProfileIE
from .escapist import EscapistIE from .escapist import EscapistIE
from .espn import ESPNIE from .espn import (
ESPNIE,
ESPNArticleIE,
)
from .esri import EsriVideoIE from .esri import EsriVideoIE
from .europa import EuropaIE from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE from .everyonesmixtape import EveryonesMixtapeIE
@ -296,6 +299,7 @@ from .footyroom import FootyRoomIE
from .formula1 import Formula1IE from .formula1 import Formula1IE
from .fourtube import FourTubeIE from .fourtube import FourTubeIE
from .fox import FOXIE from .fox import FOXIE
from .fox9 import FOX9IE
from .foxgay import FoxgayIE from .foxgay import FoxgayIE
from .foxnews import ( from .foxnews import (
FoxNewsIE, FoxNewsIE,
@ -408,6 +412,10 @@ from .ivi import (
from .ivideon import IvideonIE from .ivideon import IvideonIE
from .iwara import IwaraIE from .iwara import IwaraIE
from .izlesene import IzleseneIE from .izlesene import IzleseneIE
from .jamendo import (
JamendoIE,
JamendoAlbumIE,
)
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE from .jove import JoveIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
@ -592,6 +600,7 @@ from .nhl import (
from .nick import ( from .nick import (
NickIE, NickIE,
NickDeIE, NickDeIE,
NickNightIE,
) )
from .niconico import NiconicoIE, NiconicoPlaylistIE from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import ( from .ninecninemedia import (
@ -601,6 +610,7 @@ from .ninecninemedia import (
from .ninegag import NineGagIE from .ninegag import NineGagIE
from .ninenow import NineNowIE from .ninenow import NineNowIE
from .nintendo import NintendoIE from .nintendo import NintendoIE
from .nobelprize import NobelPrizeIE
from .noco import NocoIE from .noco import NocoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE from .nosvideo import NosVideoIE
@ -667,6 +677,7 @@ from .orf import (
ORFFM4IE, ORFFM4IE,
ORFIPTVIE, ORFIPTVIE,
) )
from .pandatv import PandaTVIE
from .pandoratv import PandoraTVIE from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE from .patreon import PatreonIE
@ -740,6 +751,10 @@ from .rbmaradio import RBMARadioIE
from .rds import RDSIE from .rds import RDSIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .regiotv import RegioTVIE from .regiotv import RegioTVIE
from .rentv import (
RENTVIE,
RENTVArticleIE,
)
from .restudy import RestudyIE from .restudy import RestudyIE
from .reuters import ReutersIE from .reuters import ReutersIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
@ -796,7 +811,10 @@ from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE from .sexu import SexuIE
from .shahid import ShahidIE from .shahid import ShahidIE
from .shared import SharedIE from .shared import (
SharedIE,
VivoIE,
)
from .sharesix import ShareSixIE from .sharesix import ShareSixIE
from .sina import SinaIE from .sina import SinaIE
from .sixplay import SixPlayIE from .sixplay import SixPlayIE
@ -1087,6 +1105,7 @@ from .vrt import VRTIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE
from .vyborymos import VyboryMosIE from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .walla import WallaIE from .walla import WallaIE
from .washingtonpost import ( from .washingtonpost import (
WashingtonPostIE, WashingtonPostIE,

View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
import socket import socket
@ -100,7 +99,8 @@ class FacebookIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '"What are you doing running in the snow?"', 'title': '"What are you doing running in the snow?"',
'uploader': 'FailArmy', 'uploader': 'FailArmy',
} },
'skip': 'Video gone',
}, { }, {
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903', 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3', 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
@ -110,6 +110,7 @@ class FacebookIE(InfoExtractor):
'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog', 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
'uploader': 'S. Saint', 'uploader': 'S. Saint',
}, },
'skip': 'Video gone',
}, { }, {
'note': 'swf params escaped', 'note': 'swf params escaped',
'url': 'https://www.facebook.com/barackobama/posts/10153664894881749', 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
@ -119,6 +120,18 @@ class FacebookIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Facebook video #10153664894881749', 'title': 'Facebook video #10153664894881749',
}, },
}, {
# have 1080P, but only up to 720p in swf params
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': '0d9813160b146b3bc8744e006027fcc6',
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
'title': 'Holocaust survivor becomes US citizen',
'timestamp': 1477818095,
'upload_date': '20161030',
'uploader': 'CNN',
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -227,43 +240,13 @@ class FacebookIE(InfoExtractor):
video_data = None video_data = None
BEFORE = '{swf.addParam(param[0], param[1]);});' server_js_data = self._parse_json(self._search_regex(
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER) for item in server_js_data.get('instances', []):
if item[1][0] == 'VideoConfig':
for m in re.findall(PATTERN, webpage): video_data = item[2][0]['videoData']
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
data = dict(json.loads(swf_params))
params_raw = compat_urllib_parse_unquote(data['params'])
video_data_candidate = json.loads(params_raw)['video_data']
for _, f in video_data_candidate.items():
if not f:
continue
if isinstance(f, dict):
f = [f]
if not isinstance(f, list):
continue
if f[0].get('video_id') == video_id:
video_data = video_data_candidate
break
if video_data:
break break
def video_data_list2dict(video_data):
ret = {}
for item in video_data:
format_id = item['stream_type']
ret.setdefault(format_id, []).append(item)
return ret
if not video_data:
server_js_data = self._parse_json(self._search_regex(
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
for item in server_js_data.get('instances', []):
if item[1][0] == 'VideoConfig':
video_data = video_data_list2dict(item[2][0]['videoData'])
break
if not video_data: if not video_data:
if not fatal_if_no_video: if not fatal_if_no_video:
return webpage, False return webpage, False
@ -276,7 +259,8 @@ class FacebookIE(InfoExtractor):
raise ExtractorError('Cannot parse data') raise ExtractorError('Cannot parse data')
formats = [] formats = []
for format_id, f in video_data.items(): for f in video_data:
format_id = f['stream_type']
if f and isinstance(f, dict): if f and isinstance(f, dict):
f = [f] f = [f]
if not f or not isinstance(f, list): if not f or not isinstance(f, list):

View File

@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .anvato import AnvatoIE
from ..utils import js_to_json
class FOX9IE(AnvatoIE):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
_TESTS = [{
'url': 'http://www.fox9.com/news/215123287-story',
'md5': 'd6e1b2572c3bab8a849c9103615dd243',
'info_dict': {
'id': '314473',
'ext': 'mp4',
'title': 'Bear climbs tree in downtown Duluth',
'description': 'md5:6a36bfb5073a411758a752455408ac90',
'duration': 51,
'timestamp': 1478123580,
'upload_date': '20161102',
'uploader': 'EPFOX',
'categories': ['News', 'Sports'],
'tags': ['news', 'video'],
},
}, {
'url': 'http://www.fox9.com/news/investigators/214070684-story',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._parse_json(
self._search_regex(
r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;',
webpage, 'anvato playlist'),
video_id, transform_source=js_to_json)[0]['video']
return self._get_anvato_videos(
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
video_id)

View File

@ -29,7 +29,7 @@ class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_url = self._search_regex( video_url = self._search_regex(
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"', r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
webpage, 'video path') webpage, 'video path')
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
@ -38,7 +38,7 @@ class FranceCultureIE(InfoExtractor):
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<', '(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
webpage, 'upload date', fatal=False)) webpage, 'upload date', fatal=False))
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"', r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>', r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',

View File

@ -47,6 +47,8 @@ from .svt import SVTIE
from .pornhub import PornHubIE from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE
from .redtube import RedTubeIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .dailymotion import ( from .dailymotion import (
DailymotionIE, DailymotionIE,
@ -1208,20 +1210,6 @@ class GenericIE(InfoExtractor):
'duration': 51690, 'duration': 51690,
}, },
}, },
# JWPlayer with M3U8
{
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
'info_dict': {
'id': 'playlist',
'ext': 'mp4',
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
'uploader': 'ren.tv',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
},
# Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions' # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
# This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
{ {
@ -1648,6 +1636,10 @@ class GenericIE(InfoExtractor):
doc = compat_etree_fromstring(webpage.encode('utf-8')) doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss': if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc) return self._extract_rss(url, video_id, doc)
elif doc.tag == 'SmoothStreamingMedia':
info_dict['formats'] = self._parse_ism_formats(doc, url)
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id) smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats']) self._sort_formats(smil['formats'])
@ -1991,11 +1983,6 @@ class GenericIE(InfoExtractor):
if sportbox_urls: if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
# Look for embedded PornHub player
pornhub_url = PornHubIE._extract_url(webpage)
if pornhub_url:
return self.url_result(pornhub_url, 'PornHub')
# Look for embedded XHamster player # Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls: if xhamster_urls:
@ -2006,6 +1993,21 @@ class GenericIE(InfoExtractor):
if tnaflix_urls: if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
# Look for embedded PornHub player
pornhub_urls = PornHubIE._extract_urls(webpage)
if pornhub_urls:
return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
# Look for embedded DrTuber player
drtuber_urls = DrTuberIE._extract_urls(webpage)
if drtuber_urls:
return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
# Look for embedded RedTube player
redtube_urls = RedTubeIE._extract_urls(webpage)
if redtube_urls:
return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
# Look for embedded Tvigle player # Look for embedded Tvigle player
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@ -2463,6 +2465,21 @@ class GenericIE(InfoExtractor):
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m': elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
# Just matching .ism/manifest is not enough to be reliably sure
# whether it's actually an ISM manifest or some other streaming
# manifest since there are various streaming URL formats
# possible (see [1]) as well as some other shenanigans like
# .smil/manifest URLs that actually serve an ISM (see [2]) and
# so on.
# Thus the most reasonable way to solve this is to delegate
# to generic extractor in order to look into the contents of
# the manifest itself.
# 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
# 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
entry_info_dict = self.url_result(
smuggle_url(video_url, {'to_generic': True}),
GenericIE.ie_key())
else: else:
entry_info_dict['url'] = video_url entry_info_dict['url'] = video_url

View File

@ -4,9 +4,6 @@ import itertools
import re import re
from .common import SearchInfoExtractor from .common import SearchInfoExtractor
from ..compat import (
compat_urllib_parse,
)
class GoogleSearchIE(SearchInfoExtractor): class GoogleSearchIE(SearchInfoExtractor):
@ -34,13 +31,16 @@ class GoogleSearchIE(SearchInfoExtractor):
} }
for pagenum in itertools.count(): for pagenum in itertools.count():
result_url = (
'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en'
% (compat_urllib_parse.quote_plus(query), pagenum * 10))
webpage = self._download_webpage( webpage = self._download_webpage(
result_url, 'gvsearch:' + query, 'http://www.google.com/search',
note='Downloading result page ' + str(pagenum + 1)) 'gvsearch:' + query,
note='Downloading result page %s' % (pagenum + 1),
query={
'tbm': 'vid',
'q': query,
'start': pagenum * 10,
'hl': 'en',
})
for hit_idx, mobj in enumerate(re.finditer( for hit_idx, mobj in enumerate(re.finditer(
r'<h3 class="r"><a href="([^"]+)"', webpage)): r'<h3 class="r"><a href="([^"]+)"', webpage)):

View File

@ -1,8 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@ -14,29 +12,24 @@ class HornBunnyIE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html' _VALID_URL = r'http?://(?:www\.)?hornbunny\.com/videos/(?P<title_dash>[a-z-]+)-(?P<id>\d+)\.html'
_TEST = { _TEST = {
'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html', 'url': 'http://hornbunny.com/videos/panty-slut-jerk-off-instruction-5227.html',
'md5': '95e40865aedd08eff60272b704852ad7', 'md5': 'e20fd862d1894b67564c96f180f43924',
'info_dict': { 'info_dict': {
'id': '5227', 'id': '5227',
'ext': 'flv', 'ext': 'mp4',
'title': 'panty slut jerk off instruction', 'title': 'panty slut jerk off instruction',
'duration': 550, 'duration': 550,
'age_limit': 18, 'age_limit': 18,
'view_count': int,
'thumbnail': 're:^https?://.*\.jpg$',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage( webpage = self._download_webpage(url, video_id)
url, video_id, note='Downloading initial webpage') title = self._og_search_title(webpage)
title = self._html_search_regex( info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
r'class="title">(.*?)</h2>', webpage, 'title')
redirect_url = self._html_search_regex(
r'pg&settings=(.*?)\|0"\);', webpage, 'title')
webpage2 = self._download_webpage(redirect_url, video_id)
video_url = self._html_search_regex(
r'flvMask:(.*?);', webpage2, 'video_url')
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
r'<strong>Runtime:</strong>\s*([0-9:]+)</div>', r'<strong>Runtime:</strong>\s*([0-9:]+)</div>',
@ -45,12 +38,12 @@ class HornBunnyIE(InfoExtractor):
r'<strong>Views:</strong>\s*(\d+)</div>', r'<strong>Views:</strong>\s*(\d+)</div>',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
return { info_dict.update({
'id': video_id, 'id': video_id,
'url': video_url,
'title': title, 'title': title,
'ext': 'flv',
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'age_limit': 18, 'age_limit': 18,
} })
return info_dict

View File

@ -13,7 +13,7 @@ from ..utils import (
class ImgurIE(InfoExtractor): class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
_TESTS = [{ _TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv', 'url': 'https://i.imgur.com/A61SaA1.gifv',
@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
}, { }, {
'url': 'http://imgur.com/topic/Funny/N8rOudd', 'url': 'http://imgur.com/topic/Funny/N8rOudd',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://imgur.com/r/aww/VQcQPhM',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,107 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from ..compat import compat_urlparse
from .common import InfoExtractor
class JamendoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
_TEST = {
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
'info_dict': {
'id': '196219',
'display_id': 'stories-from-emona-i',
'ext': 'flac',
'title': 'Stories from Emona I',
'thumbnail': 're:^https?://.*\.jpg'
}
}
def _real_extract(self, url):
mobj = self._VALID_URL_RE.match(url)
track_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_meta('name', webpage, 'title')
formats = [{
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
% (sub_domain, track_id, format_id),
'format_id': format_id,
'ext': ext,
'quality': quality,
} for quality, (format_id, sub_domain, ext) in enumerate((
('mp31', 'mp3l', 'mp3'),
('mp32', 'mp3d', 'mp3'),
('ogg1', 'ogg', 'ogg'),
('flac', 'flac', 'flac'),
))]
self._sort_formats(formats)
thumbnail = self._html_search_meta(
'image', webpage, 'thumbnail', fatal=False)
return {
'id': track_id,
'display_id': display_id,
'thumbnail': thumbnail,
'title': title,
'formats': formats
}
class JamendoAlbumIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
_TEST = {
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
'info_dict': {
'id': '121486',
'title': 'Duck On Cover'
},
'playlist': [{
'md5': 'e1a2fcb42bda30dfac990212924149a8',
'info_dict': {
'id': '1032333',
'ext': 'flac',
'title': 'Warmachine'
}
}, {
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
'info_dict': {
'id': '1032330',
'ext': 'flac',
'title': 'Without Your Ghost'
}
}],
'params': {
'playlistend': 2
}
}
def _real_extract(self, url):
mobj = self._VALID_URL_RE.match(url)
album_id = mobj.group('id')
webpage = self._download_webpage(url, mobj.group('display_id'))
title = self._html_search_meta('name', webpage, 'title')
entries = [
self.url_result(
compat_urlparse.urljoin(url, m.group('path')),
ie=JamendoIE.ie_key(),
video_id=self._search_regex(
r'/track/(\d+)', m.group('path'),
'track id', default=None))
for m in re.finditer(
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
webpage)
]
return self.playlist_result(entries, album_id, title)

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -52,8 +51,8 @@ class LiTVIE(InfoExtractor):
'skip': 'Georestricted to Taiwan', 'skip': 'Georestricted to Taiwan',
}] }]
def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True): def _extract_playlist(self, season_list, video_id, program_info, prompt=True):
episode_title = view_data['title'] episode_title = program_info['title']
content_id = season_list['contentId'] content_id = season_list['contentId']
if prompt: if prompt:
@ -61,7 +60,7 @@ class LiTVIE(InfoExtractor):
all_episodes = [ all_episodes = [
self.url_result(smuggle_url( self.url_result(smuggle_url(
self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']), self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']),
{'force_noplaylist': True})) # To prevent infinite recursion {'force_noplaylist': True})) # To prevent infinite recursion
for episode in season_list['episode']] for episode in season_list['episode']]
@ -80,19 +79,15 @@ class LiTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
view_data = dict(map(lambda t: (t[0], t[2]), re.findall( program_info = self._parse_json(self._search_regex(
r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2', 'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
webpage)))
vod_data = self._parse_json(self._search_regex(
'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
video_id) video_id)
season_list = list(vod_data.get('seasonList', {}).values()) season_list = list(program_info.get('seasonList', {}).values())
if season_list: if season_list:
if not noplaylist: if not noplaylist:
return self._extract_playlist( return self._extract_playlist(
season_list[0], video_id, vod_data, view_data, season_list[0], video_id, program_info,
prompt=noplaylist_prompt) prompt=noplaylist_prompt)
if noplaylist_prompt: if noplaylist_prompt:
@ -102,8 +97,8 @@ class LiTVIE(InfoExtractor):
# endpoint gives the same result as the data embedded in the webpage. # endpoint gives the same result as the data embedded in the webpage.
# If georestricted, there are no embedded data, so an extra request is # If georestricted, there are no embedded data, so an extra request is
# necessary to get the error code # necessary to get the error code
if 'assetId' not in view_data: if 'assetId' not in program_info:
view_data = self._download_json( program_info = self._download_json(
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
query={'contentId': video_id}, query={'contentId': video_id},
headers={'Accept': 'application/json'}) headers={'Accept': 'application/json'})
@ -112,9 +107,9 @@ class LiTVIE(InfoExtractor):
webpage, 'video data', default='{}'), video_id) webpage, 'video data', default='{}'), video_id)
if not video_data: if not video_data:
payload = { payload = {
'assetId': view_data['assetId'], 'assetId': program_info['assetId'],
'watchDevices': view_data['watchDevices'], 'watchDevices': program_info['watchDevices'],
'contentType': view_data['contentType'], 'contentType': program_info['contentType'],
} }
video_data = self._download_json( video_data = self._download_json(
'https://www.litv.tv/vod/getMainUrl', video_id, 'https://www.litv.tv/vod/getMainUrl', video_id,
@ -136,11 +131,11 @@ class LiTVIE(InfoExtractor):
# LiTV HLS segments doesn't like compressions # LiTV HLS segments doesn't like compressions
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True
title = view_data['title'] + view_data.get('secondaryMark', '') title = program_info['title'] + program_info.get('secondaryMark', '')
description = view_data.get('description') description = program_info.get('description')
thumbnail = view_data.get('imageFile') thumbnail = program_info.get('imageFile')
categories = [item['name'] for item in vod_data.get('category', [])] categories = [item['name'] for item in program_info.get('category', [])]
episode = int_or_none(view_data.get('episode')) episode = int_or_none(program_info.get('episode'))
return { return {
'id': video_id, 'id': video_id,

View File

@ -71,12 +71,15 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
formats = [] formats = []
for sources in settings.findall(compat_xpath('.//MediaSources')): for sources in settings.findall(compat_xpath('.//MediaSources')):
if sources.get('videoType') == 'smoothstreaming': sources_type = sources.get('videoType')
continue
for source in sources.findall(compat_xpath('./MediaSource')): for source in sources.findall(compat_xpath('./MediaSource')):
video_url = source.text video_url = source.text
if not video_url or not video_url.startswith('http'): if not video_url or not video_url.startswith('http'):
continue continue
if sources_type == 'smoothstreaming':
formats.extend(self._extract_ism_formats(
video_url, video_id, 'mss', fatal=False))
continue
video_mode = source.get('videoMode') video_mode = source.get('videoMode')
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', video_mode or '', 'height', default=None)) r'^(\d+)[pP]$', video_mode or '', 'height', default=None))

View File

@ -1,19 +1,20 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
get_element_by_attribute,
int_or_none, int_or_none,
remove_start,
extract_attributes, extract_attributes,
determine_ext, determine_ext,
smuggle_url,
parse_duration,
) )
@ -72,16 +73,14 @@ class MiTeleBaseIE(InfoExtractor):
} }
class MiTeleIE(MiTeleBaseIE): class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
# MD5 is unstable
'info_dict': { 'info_dict': {
'id': '0NF1jJnxS1Wu3pHrmvFyw2', 'id': '57b0dfb9c715da65618b4afa',
'display_id': 'programa-144',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Tor, la web invisible', 'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
@ -91,57 +90,71 @@ class MiTeleIE(MiTeleBaseIE):
'thumbnail': 're:(?i)^https?://.*\.jpg$', 'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913, 'duration': 2913,
}, },
'add_ie': ['Ooyala'],
}, { }, {
# no explicit title # no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
'info_dict': { 'info_dict': {
'id': 'eLZSwoEd1S3pVyUm8lc6F', 'id': '57b0de3dc915da14058b4876',
'display_id': 'programa-226',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Cuarto Milenio - Temporada 6 - Programa 226', 'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
'series': 'Cuarto Milenio', 'series': 'Cuarto Milenio',
'season': 'Temporada 6', 'season': 'Temporada 6',
'episode': 'Programa 226', 'episode': 'Programa 226',
'thumbnail': 're:(?i)^https?://.*\.jpg$', 'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 7312, 'duration': 7313,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['Ooyala'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, display_id) gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key
appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable')
appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey')
uid = compat_str(uuid.uuid4())
session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid)
session_json = self._download_json(session_url, video_id, 'Downloading session keys')
sessionKey = compat_str(session_json['sessionKey'])
info = self._get_player_info(url, webpage) paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey
paths = self._download_json(paths_url, video_id, 'Downloading paths JSON')
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
data_p = (
'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] +
'/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full')
data = self._download_json(data_p, video_id, 'Downloading data JSON')
source = data['hits']['hits'][0]['_source']
embedCode = source['offers'][0]['embed_codes'][0]
title = self._search_regex( titles = source['localizable_titles'][0]
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', title = titles.get('title_medium') or titles['title_long']
webpage, 'title', default=None) episode = titles['title_sort_name']
description = titles['summary_long']
titles_series = source['localizable_titles_series'][0]
series = titles_series['title_long']
titles_season = source['localizable_titles_season'][0]
season = titles_season['title_medium']
duration = parse_duration(source['videos'][0]['duration'])
mobj = re.search(r'''(?sx) return {
class="Destacado-text"[^>]*>.*?<h1>\s* '_type': 'url_transparent',
<span>(?P<series>[^<]+)</span>\s* # for some reason only HLS is supported
<span>(?P<season>[^<]+)</span>\s* 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
<span>(?P<episode>[^<]+)</span>''', webpage) 'id': video_id,
series, season, episode = mobj.groups() if mobj else [None] * 3
if not title:
if mobj:
title = '%s - %s - %s' % (series, season, episode)
else:
title = remove_start(self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
info.update({
'display_id': display_id,
'title': title, 'title': title,
'description': get_element_by_attribute('class', 'text', webpage), 'description': description,
'series': series, 'series': series,
'season': season, 'season': season,
'episode': episode, 'episode': episode,
}) 'duration': duration,
return info 'thumbnail': source['images'][0]['url'],
}

View File

@ -11,7 +11,7 @@ from ..utils import (
class MovieClipsIE(InfoExtractor): class MovieClipsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)' _VALID_URL = r'https?://(?:www\.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)'
_TEST = { _TEST = {
'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597', 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597',
'md5': '42b5a0352d4933a7bd54f2104f481244', 'md5': '42b5a0352d4933a7bd54f2104f481244',

View File

@ -69,10 +69,9 @@ class MSNIE(InfoExtractor):
if not format_url: if not format_url:
continue continue
ext = determine_ext(format_url) ext = determine_ext(format_url)
# .ism is not yet supported (see
# https://github.com/rg3/youtube-dl/issues/8118)
if ext == 'ism': if ext == 'ism':
continue formats.extend(self._extract_ism_formats(
format_url + '/Manifest', display_id, 'mss', fatal=False))
if 'm3u8' in format_url: if 'm3u8' in format_url:
# m3u8_native should not be used here until # m3u8_native should not be used here until
# https://github.com/rg3/youtube-dl/issues/9913 is fixed # https://github.com/rg3/youtube-dl/issues/9913 is fixed

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
from ..utils import update_url_query from ..utils import update_url_query
@ -69,7 +71,7 @@ class NickIE(MTVServicesInfoExtractor):
class NickDeIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de' IE_NAME = 'nick.de'
_VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.nl)/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True, 'only_matching': True,
@ -79,15 +81,43 @@ class NickDeIE(MTVServicesInfoExtractor):
}, { }, {
'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit', 'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
'only_matching': True,
}] }]
def _extract_mrss_url(self, webpage, host):
return update_url_query(self._search_regex(
r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
{'siteKey': host})
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
mrss_url = update_url_query(self._search_regex( mrss_url = self._extract_mrss_url(webpage, host)
r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'),
{'siteKey': 'nick.de'})
return self._get_videos_info_from_url(mrss_url, video_id) return self._get_videos_info_from_url(mrss_url, video_id)
class NickNightIE(NickDeIE):
IE_NAME = 'nicknight'
_VALID_URL = r'https?://(?:www\.)(?P<host>nicknight\.(?:de|at|tv))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nicknight.at/shows/977-awkward/videos/85987-nimmer-beste-freunde',
'only_matching': True,
}, {
'url': 'http://www.nicknight.at/shows/977-awkward',
'only_matching': True,
}, {
'url': 'http://www.nicknight.at/shows/1900-faking-it',
'only_matching': True,
}]
def _extract_mrss_url(self, webpage, *args):
return self._search_regex(
r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
'mrss url', group='url')

View File

@ -0,0 +1,62 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
js_to_json,
mimetype2ext,
determine_ext,
update_url_query,
get_element_by_attribute,
int_or_none,
)
class NobelPrizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
_TEST = {
'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
'md5': '04c81e5714bb36cc4e2232fee1d8157f',
'info_dict': {
'id': '2636',
'ext': 'mp4',
'title': 'Announcement of the 2016 Nobel Prize in Physics',
'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
media = self._parse_json(self._search_regex(
r'(?s)var\s*config\s*=\s*({.+?});', webpage,
'config'), video_id, js_to_json)['media']
title = media['title']
formats = []
for source in media.get('source', []):
source_src = source.get('src')
if not source_src:
continue
ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_src, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
update_url_query(source_src, {'hdcore': '3.7.0'}),
video_id, f4m_id='hds', fatal=False))
else:
formats.append({
'url': source_src,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': get_element_by_attribute('itemprop', 'description', webpage),
'duration': int_or_none(media.get('duration')),
'formats': formats,
}

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -14,6 +15,25 @@ from ..utils import (
class NRKBaseIE(InfoExtractor): class NRKBaseIE(InfoExtractor):
_faked_ip = None
def _download_webpage_handle(self, *args, **kwargs):
# NRK checks X-Forwarded-For HTTP header in order to figure out the
# origin of the client behind proxy. This allows to bypass geo
# restriction by faking this header's value to some Norway IP.
# We will do so once we encounter any geo restriction error.
if self._faked_ip:
# NB: str is intentional
kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
def _fake_ip(self):
# Use fake IP from 37.191.128.0/17 in order to workaround geo
# restriction
def octet(lb=0, ub=255):
return random.randint(lb, ub)
self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -24,6 +44,8 @@ class NRKBaseIE(InfoExtractor):
title = data.get('fullTitle') or data.get('mainTitle') or data['title'] title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id video_id = data.get('id') or video_id
http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
entries = [] entries = []
media_assets = data.get('mediaAssets') media_assets = data.get('mediaAssets')
@ -54,6 +76,7 @@ class NRKBaseIE(InfoExtractor):
'duration': duration, 'duration': duration,
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
'http_headers': http_headers,
}) })
if not entries: if not entries:
@ -70,10 +93,23 @@ class NRKBaseIE(InfoExtractor):
}] }]
if not entries: if not entries:
if data.get('usageRights', {}).get('isGeoBlocked'): message_type = data.get('messageType', '')
raise ExtractorError( # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', if 'IsGeoBlocked' in message_type and not self._faked_ip:
expected=True) self.report_warning(
'Video is geo restricted, trying to fake IP')
self._fake_ip()
return self._real_extract(url)
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
expected=True)
conviva = data.get('convivaStatistics') or {} conviva = data.get('convivaStatistics') or {}
series = conviva.get('seriesName') or data.get('seriesTitle') series = conviva.get('seriesName') or data.get('seriesTitle')

View File

@ -56,8 +56,8 @@ class OnetBaseIE(InfoExtractor):
continue continue
ext = determine_ext(video_url) ext = determine_ext(video_url)
if format_id == 'ism': if format_id == 'ism':
# TODO: Support Microsoft Smooth Streaming formats.extend(self._extract_ism_formats(
continue video_url, video_id, 'mss', fatal=False))
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False)) video_url, video_id, mpd_id='dash', fatal=False))

View File

@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
def _extract(self, content_tree_url, video_id, domain='example.org'): def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
content_tree = self._download_json(content_tree_url, video_id)['content_tree'] content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]] metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code'] embed_code = metadata['embed_code']
@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor):
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
compat_urllib_parse_urlencode({ compat_urllib_parse_urlencode({
'domain': domain, 'domain': domain,
'supportedFormats': 'mp4,rtmp,m3u8,hds', 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
}), video_id) }), video_id)
cur_auth_data = auth_data['authorization_data'][embed_code] cur_auth_data = auth_data['authorization_data'][embed_code]
@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url) embed_code = self._match_id(url)
domain = smuggled_data.get('domain') domain = smuggled_data.get('domain')
supportedformats = smuggled_data.get('supportedformats')
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
return self._extract(content_tree_url, embed_code, domain) return self._extract(content_tree_url, embed_code, domain, supportedformats)
class OoyalaExternalIE(OoyalaBaseIE): class OoyalaExternalIE(OoyalaBaseIE):

View File

@ -82,7 +82,7 @@ class OpenloadIE(InfoExtractor):
if j >= 33 and j <= 126: if j >= 33 and j <= 126:
j = ((j + 14) % 94) + 33 j = ((j + 14) % 94) + 33
if idx == len(enc_data) - 1: if idx == len(enc_data) - 1:
j += 2 j += 3
video_url_chars += compat_chr(j) video_url_chars += compat_chr(j)
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)

View File

@ -0,0 +1,91 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
qualities,
)
class PandaTVIE(InfoExtractor):
IE_DESC = '熊猫TV'
_VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.panda.tv/10091',
'info_dict': {
'id': '10091',
'title': 're:.+',
'uploader': '囚徒',
'ext': 'flv',
'is_live': True,
},
'params': {
'skip_download': True,
},
'skip': 'Live stream is offline',
}
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(
'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
error_code = config.get('errno', 0)
if error_code is not 0:
raise ExtractorError(
'%s returned error %s: %s'
% (self.IE_NAME, error_code, config['errmsg']),
expected=True)
data = config['data']
video_info = data['videoinfo']
# 2 = live, 3 = offline
if video_info.get('status') != '2':
raise ExtractorError(
'Live stream is offline', expected=True)
title = data['roominfo']['name']
uploader = data.get('hostinfo', {}).get('name')
room_key = video_info['room_key']
stream_addr = video_info.get(
'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'})
# Reverse engineered from web player swf
# (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of
# writing).
plflag0, plflag1 = video_info['plflag'].split('_')
plflag0 = int(plflag0) - 1
if plflag1 == '21':
plflag0 = 10
plflag1 = '4'
live_panda = 'live_panda' if plflag0 < 1 else ''
quality_key = qualities(['OD', 'HD', 'SD'])
suffix = ['_small', '_mid', '']
formats = []
for k, v in stream_addr.items():
if v != '1':
continue
quality = quality_key(k)
if quality <= 0:
continue
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
formats.append({
'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
'format_id': '%s-%s' % (k, ext),
'quality': quality,
'source_preference': pref,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._live_title(title),
'uploader': uploader,
'formats': formats,
'is_live': True,
}

View File

@ -8,30 +8,31 @@ from ..utils import int_or_none
class PlaysTVIE(InfoExtractor): class PlaysTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})' _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
_TEST = { _TESTS = [{
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
'md5': 'dfeac1198506652b5257a62762cec7bc', 'md5': 'dfeac1198506652b5257a62762cec7bc',
'info_dict': { 'info_dict': {
'id': '56af17f56c95335490', 'id': '56af17f56c95335490',
'ext': 'mp4', 'ext': 'mp4',
'title': 'When you outplay the Azir wall', 'title': 'Bjergsen - When you outplay the Azir wall',
'description': 'Posted by Bjergsen', 'description': 'Posted by Bjergsen',
} }
} }, {
'url': 'https://plays.tv/embeds/56af17f56c95335490',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(
'https://plays.tv/video/%s' % video_id, video_id)
info = self._search_json_ld(webpage, video_id,)
title = self._og_search_title(webpage)
content = self._parse_json(
self._search_regex(
r'R\.bindContent\(({.+?})\);', webpage,
'content'), video_id)['content']
mpd_url, sources = re.search( mpd_url, sources = re.search(
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
content).groups() webpage).groups()
formats = self._extract_mpd_formats( formats = self._extract_mpd_formats(
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources): for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
@ -42,10 +43,11 @@ class PlaysTVIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { info.update({
'id': video_id, 'id': video_id,
'title': title,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
'formats': formats, 'formats': formats,
} })
return info

View File

@ -11,6 +11,7 @@ from ..compat import (
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
dict_get,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
@ -119,14 +120,17 @@ class PluralsightIE(PluralsightBaseIE):
@staticmethod @staticmethod
def _convert_subtitles(duration, subs): def _convert_subtitles(duration, subs):
srt = '' srt = ''
TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset')
TEXT_KEYS = ('text', 'Text')
for num, current in enumerate(subs): for num, current in enumerate(subs):
current = subs[num] current = subs[num]
start, text = float_or_none( start, text = (
current.get('DisplayTimeOffset')), current.get('Text') float_or_none(dict_get(current, TIME_OFFSET_KEYS)),
dict_get(current, TEXT_KEYS))
if start is None or text is None: if start is None or text is None:
continue continue
end = duration if num == len(subs) - 1 else float_or_none( end = duration if num == len(subs) - 1 else float_or_none(
subs[num + 1].get('DisplayTimeOffset')) dict_get(subs[num + 1], TIME_OFFSET_KEYS))
if end is None: if end is None:
continue continue
srt += os.linesep.join( srt += os.linesep.join(

View File

@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/ (?:www\.)?thumbzilla\.com/video/
) )
(?P<id>[0-9a-z]+) (?P<id>[\da-z]+)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@classmethod @staticmethod
def _extract_url(cls, webpage): def _extract_urls(webpage):
mobj = re.search( return re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage) r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
if mobj: webpage)
return mobj.group('url')
def _extract_count(self, pattern, webpage, name): def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex( return str_to_int(self._search_regex(

View File

@ -125,6 +125,14 @@ class RadioCanadaIE(InfoExtractor):
f4m_id='hds', fatal=False)) f4m_id='hds', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
if closed_caption_url:
subtitles['fr'] = [{
'url': closed_caption_url,
'ext': determine_ext(closed_caption_url, 'vtt'),
}]
return { return {
'id': video_id, 'id': video_id,
'title': get_meta('Title'), 'title': get_meta('Title'),
@ -135,6 +143,7 @@ class RadioCanadaIE(InfoExtractor):
'season_number': int_or_none('SrcSaison'), 'season_number': int_or_none('SrcSaison'),
'episode_number': int_or_none('SrcEpisode'), 'episode_number': int_or_none('SrcEpisode'),
'upload_date': unified_strdate(get_meta('Date')), 'upload_date': unified_strdate(get_meta('Date')),
'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -10,8 +12,8 @@ from ..utils import (
class RedTubeIE(InfoExtractor): class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?redtube\.com/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.redtube.com/66418', 'url': 'http://www.redtube.com/66418',
'md5': '7b8c22b5e7098a3e1c09709df1126d2d', 'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': { 'info_dict': {
@ -23,11 +25,21 @@ class RedTubeIE(InfoExtractor):
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
} }
} }, {
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
webpage)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(
'http://www.redtube.com/%s' % video_id, video_id)
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
raise ExtractorError('Video %s has been removed' % video_id, expected=True) raise ExtractorError('Video %s has been removed' % video_id, expected=True)

View File

@ -0,0 +1,76 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .jwplatform import JWPlatformBaseIE
from ..compat import compat_str
class RENTVIE(JWPlatformBaseIE):
_VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://ren.tv/video/epizod/118577',
'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
'info_dict': {
'id': '118577',
'ext': 'mp4',
'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"'
}
}, {
'url': 'http://ren.tv/player/118577',
'only_matching': True,
}, {
'url': 'rentv:118577',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id)
jw_config = self._parse_json(self._search_regex(
r'config\s*=\s*({.+});', webpage, 'jw config'), video_id)
return self._parse_jwplayer_data(jw_config, video_id, m3u8_id='hls')
class RENTVArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
'md5': 'ebd63c4680b167693745ab91343df1d6',
'info_dict': {
'id': '136472',
'ext': 'mp4',
'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла',
'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.',
}
}, {
# TODO: invalid m3u8
'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
'info_dict': {
'id': 'playlist',
'ext': 'mp4',
'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
'uploader': 'ren.tv',
},
'params': {
# m3u8 downloads
'skip_download': True,
},
'skip': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
drupal_settings = self._parse_json(self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings'), display_id)
entries = []
for config_profile in drupal_settings.get('ren_jwplayer', {}).values():
media_id = config_profile.get('mediaid')
if not media_id:
continue
media_id = compat_str(media_id)
entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id))
return self.playlist_result(entries, display_id)

View File

@ -1,17 +1,24 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
urlencode_postdata,
clean_html,
) )
class ShahidIE(InfoExtractor): class ShahidIE(InfoExtractor):
_VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?' _NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
'info_dict': { 'info_dict': {
@ -27,18 +34,54 @@ class ShahidIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} }
}, {
'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
'only_matching': True
}, { }, {
# shahid plus subscriber only # shahid plus subscriber only
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
'only_matching': True 'only_matching': True
}] }]
def _call_api(self, path, video_id, note): def _real_initialize(self):
data = self._download_json( email, password = self._get_login_info()
'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={ if email is None:
'apiKey': 'sh@hid0nlin3', return
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}).get('data', {}) try:
user_data = self._download_json(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
'email': email,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
'Content-Type': 'application/json; charset=UTF-8',
})['user']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
fail_data = self._parse_json(
e.cause.read().decode('utf-8'), None, fatal=False)
if fail_data:
faults = fail_data.get('faults', [])
faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
if faults_message:
raise ExtractorError(faults_message, expected=True)
raise
self._download_webpage(
'https://shahid.mbc.net/populateContext',
None, 'Populate Context', data=urlencode_postdata({
'firstName': user_data['firstName'],
'lastName': user_data['lastName'],
'userName': user_data['email'],
'csg_user_name': user_data['email'],
'subscriberId': user_data['id'],
'sessionId': user_data['sessionId'],
}))
def _get_api_data(self, response):
data = response.get('data', {})
error = data.get('error') error = data.get('error')
if error: if error:
@ -49,11 +92,11 @@ class ShahidIE(InfoExtractor):
return data return data
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) page_type, video_id = re.match(self._VALID_URL, url).groups()
player = self._call_api( player = self._get_api_data(self._download_json(
'Content/Episode/%s' % video_id, 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
video_id, 'Downloading player JSON') video_id, 'Downloading player JSON'))
if player.get('drm'): if player.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
@ -61,9 +104,12 @@ class ShahidIE(InfoExtractor):
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)
video = self._call_api( video = self._get_api_data(self._download_json(
'episode/%s' % video_id, video_id, 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
'Downloading video JSON')['episode'] video_id, 'Downloading video JSON', query={
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}))[page_type]
title = video['title'] title = video['title']
categories = [ categories = [

View File

@ -10,11 +10,38 @@ from ..utils import (
) )
class SharedIE(InfoExtractor): class SharedBaseIE(InfoExtractor):
IE_DESC = 'shared.sx and vivo.sx' def _real_extract(self, url):
_VALID_URL = r'https?://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})' video_id = self._match_id(url)
_TESTS = [{ webpage, urlh = self._download_webpage_handle(url, video_id)
if self._FILE_NOT_FOUND in webpage:
raise ExtractorError(
'Video %s does not exist' % video_id, expected=True)
video_url = self._extract_video_url(webpage, video_id, url)
title = base64.b64decode(self._html_search_meta(
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False))
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'filesize': filesize,
'title': title,
}
class SharedIE(SharedBaseIE):
IE_DESC = 'shared.sx'
_VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
_FILE_NOT_FOUND = '>File does not exist<'
_TEST = {
'url': 'http://shared.sx/0060718775', 'url': 'http://shared.sx/0060718775',
'md5': '106fefed92a8a2adb8c98e6a0652f49b', 'md5': '106fefed92a8a2adb8c98e6a0652f49b',
'info_dict': { 'info_dict': {
@ -23,7 +50,32 @@ class SharedIE(InfoExtractor):
'title': 'Bmp4', 'title': 'Bmp4',
'filesize': 1720110, 'filesize': 1720110,
}, },
}, { }
def _extract_video_url(self, webpage, video_id, url):
download_form = self._hidden_inputs(webpage)
video_page = self._download_webpage(
url, video_id, 'Downloading video page',
data=urlencode_postdata(download_form),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': url,
})
video_url = self._html_search_regex(
r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
video_page, 'video URL', group='url')
return video_url
class VivoIE(SharedBaseIE):
IE_DESC = 'vivo.sx'
_VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})'
_FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
_TEST = {
'url': 'http://vivo.sx/d7ddda0e78', 'url': 'http://vivo.sx/d7ddda0e78',
'md5': '15b3af41be0b4fe01f4df075c2678b2c', 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
'info_dict': { 'info_dict': {
@ -32,43 +84,13 @@ class SharedIE(InfoExtractor):
'title': 'Chicken', 'title': 'Chicken',
'filesize': 528031, 'filesize': 528031,
}, },
}] }
def _real_extract(self, url): def _extract_video_url(self, webpage, video_id, *args):
video_id = self._match_id(url) return self._parse_json(
self._search_regex(
webpage, urlh = self._download_webpage_handle(url, video_id) r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'stream', group='url'),
if '>File does not exist<' in webpage: video_id,
raise ExtractorError( transform_source=lambda x: base64.b64decode(
'Video %s does not exist' % video_id, expected=True) x.encode('ascii')).decode('utf-8'))[0]
download_form = self._hidden_inputs(webpage)
video_page = self._download_webpage(
urlh.geturl(), video_id, 'Downloading video page',
data=urlencode_postdata(download_form),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': urlh.geturl(),
})
video_url = self._html_search_regex(
r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
video_page, 'video URL', group='url')
title = base64.b64decode(self._html_search_meta(
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False))
thumbnail = self._html_search_regex(
r'data-poster=(["\'])(?P<url>(?:(?!\1).)+)\1',
video_page, 'thumbnail', default=None, group='url')
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'filesize': filesize,
'title': title,
'thumbnail': thumbnail,
}

View File

@ -32,12 +32,15 @@ class TMZArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?' _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P<id>[^/]+)/?'
_TEST = { _TEST = {
'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert',
'md5': 'e482a414a38db73087450e3a6ce69d00', 'md5': '3316ff838ae5bb7f642537825e1e90d2',
'info_dict': { 'info_dict': {
'id': '0_6snoelag', 'id': '0_6snoelag',
'ext': 'mp4', 'ext': 'mov',
'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake',
'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."',
'timestamp': 1429467813,
'upload_date': '20150419',
'uploader_id': 'batchUser',
} }
} }
@ -45,12 +48,9 @@ class TMZArticleIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
embedded_video_info_str = self._html_search_regex( embedded_video_info = self._parse_json(self._html_search_regex(
r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info') r'tmzVideoEmbed\(({.+?})\);', webpage, 'embedded video info'),
video_id)
embedded_video_info = self._parse_json(
embedded_video_info_str, video_id,
transform_source=lambda s: s.replace('\\', ''))
return self.url_result( return self.url_result(
'http://www.tmz.com/videos/%s/' % embedded_video_info['id']) 'http://www.tmz.com/videos/%s/' % embedded_video_info['id'])

View File

@ -15,11 +15,11 @@ from ..utils import (
class TouTvIE(InfoExtractor): class TouTvIE(InfoExtractor):
_NETRC_MACHINE = 'toutv' _NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv' IE_NAME = 'tou.tv'
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+E[0-9]+)?)'
_access_token = None _access_token = None
_claims = None _claims = None
_TEST = { _TESTS = [{
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
'info_dict': { 'info_dict': {
'id': '122017', 'id': '122017',
@ -33,7 +33,10 @@ class TouTvIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'skip': '404 Not Found', 'skip': '404 Not Found',
} }, {
'url': 'http://ici.tou.tv/hackers',
'only_matching': True,
}]
def _real_initialize(self): def _real_initialize(self):
email, password = self._get_login_info() email, password = self._get_login_info()

View File

@ -9,7 +9,6 @@ from ..utils import (
int_or_none, int_or_none,
sanitized_Request, sanitized_Request,
urlencode_postdata, urlencode_postdata,
parse_iso8601,
) )
@ -19,17 +18,13 @@ class TubiTvIE(InfoExtractor):
_NETRC_MACHINE = 'tubitv' _NETRC_MACHINE = 'tubitv'
_TEST = { _TEST = {
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320',
'info_dict': { 'info_dict': {
'id': '283829', 'id': '283829',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Comedian at The Friday', 'title': 'The Comedian at The Friday',
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
'uploader': 'Indie Rights Films', 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
'upload_date': '20160111',
'timestamp': 1452555979,
},
'params': {
'skip_download': 'HLS download',
}, },
} }
@ -58,19 +53,28 @@ class TubiTvIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id) 'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
title = video_data['n'] title = video_data['title']
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
video_data['mh'], video_id, 'mp4', 'm3u8_native') self._proto_relative_url(video_data['url']),
video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = []
for thumbnail_url in video_data.get('thumbnails', []):
if not thumbnail_url:
continue
thumbnails.append({
'url': self._proto_relative_url(thumbnail_url),
})
subtitles = {} subtitles = {}
for sub in video_data.get('sb', []): for sub in video_data.get('subtitles', []):
sub_url = sub.get('u') sub_url = sub.get('url')
if not sub_url: if not sub_url:
continue continue
subtitles.setdefault(sub.get('l', 'en'), []).append({ subtitles.setdefault(sub.get('lang', 'English'), []).append({
'url': sub_url, 'url': self._proto_relative_url(sub_url),
}) })
return { return {
@ -78,9 +82,8 @@ class TubiTvIE(InfoExtractor):
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnail': video_data.get('ph'), 'thumbnails': thumbnails,
'description': video_data.get('d'), 'description': video_data.get('description'),
'duration': int_or_none(video_data.get('s')), 'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('u')), 'uploader_id': video_data.get('publisher_id'),
'uploader': video_data.get('on'),
} }

View File

@ -69,7 +69,8 @@ class TVPIE(InfoExtractor):
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
video_id = self._search_regex([ video_id = self._search_regex([
r'<iframe[^>]+src="[^"]*?object_id=(\d+)', r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
"object_id\s*:\s*'(\d+)'"], webpage, 'video id') r"object_id\s*:\s*'(\d+)'",
r'data-video-id="(\d+)"'], webpage, 'video id', default=page_id)
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'tvp:' + video_id, 'url': 'tvp:' + video_id,
@ -138,6 +139,9 @@ class TVPEmbedIE(InfoExtractor):
# formats.extend(self._extract_mpd_formats( # formats.extend(self._extract_mpd_formats(
# video_url_base + '.ism/video.mpd', # video_url_base + '.ism/video.mpd',
# video_id, mpd_id='dash', fatal=False)) # video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_ism_formats(
video_url_base + '.ism/Manifest',
video_id, 'mss', fatal=False))
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
video_url_base + '.ism/video.f4m', video_url_base + '.ism/video.f4m',
video_id, f4m_id='hds', fatal=False)) video_id, f4m_id='hds', fatal=False))

View File

@ -398,7 +398,7 @@ class TwitchStreamIE(TwitchBaseIE):
channel_id = self._match_id(url) channel_id = self._match_id(url)
stream = self._call_api( stream = self._call_api(
'kraken/streams/%s' % channel_id, channel_id, 'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
'Downloading stream JSON').get('stream') 'Downloading stream JSON').get('stream')
if not stream: if not stream:
@ -417,6 +417,7 @@ class TwitchStreamIE(TwitchBaseIE):
query = { query = {
'allow_source': 'true', 'allow_source': 'true',
'allow_audio_only': 'true', 'allow_audio_only': 'true',
'allow_spectre': 'true',
'p': random.randint(1000000, 10000000), 'p': random.randint(1000000, 10000000),
'player': 'twitchweb', 'player': 'twitchweb',
'segment_preference': '4', 'segment_preference': '4',

View File

@ -13,7 +13,7 @@ from ..utils import (
class VesselIE(InfoExtractor): class VesselIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)' _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)'
_API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
_LOGIN_URL = 'https://www.vessel.com/api/account/login' _LOGIN_URL = 'https://www.vessel.com/api/account/login'
_NETRC_MACHINE = 'vessel' _NETRC_MACHINE = 'vessel'
@ -32,12 +32,18 @@ class VesselIE(InfoExtractor):
}, { }, {
'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.vessel.com/videos/F01_dsLj1',
'only_matching': True,
}, {
'url': 'https://www.vessel.com/videos/RRX-sir-J',
'only_matching': True,
}] }]
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return [url for _, url in re.findall( return [url for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1', r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1',
webpage)] webpage)]
@staticmethod @staticmethod

View File

@ -1,12 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import time
import hashlib
import json
from .adobepass import AdobePassIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..compat import compat_HTTPError
from ..utils import (
int_or_none,
parse_age_limit,
str_or_none,
parse_duration,
ExtractorError,
extract_attributes,
)
class ViceIE(InfoExtractor): class ViceBaseIE(AdobePassIE):
def _extract_preplay_video(self, url, webpage):
watch_hub_data = extract_attributes(self._search_regex(
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
video_id = watch_hub_data['vms-id']
title = watch_hub_data['video-title']
query = {}
is_locked = watch_hub_data.get('video-locked') == '1'
if is_locked:
resource = self._get_mvpd_resource(
'VICELAND', title, video_id,
watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
exp = int(time.time()) + 14400
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
})
try:
host = 'www.viceland' if is_locked else self._PREPLAY_HOST
preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
raise
video_data = preplay['video']
base = video_data['base']
uplynk_preplay_url = preplay['preplayURL']
episode = video_data.get('episode', {})
channel = video_data.get('channel', {})
subtitles = {}
cc_url = preplay.get('ccURL')
if cc_url:
subtitles['en'] = [{
'url': cc_url,
}]
return {
'_type': 'url_transparent',
'url': uplynk_preplay_url,
'id': video_id,
'title': title,
'description': base.get('body'),
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
'timestamp': int_or_none(video_data.get('created_at')),
'age_limit': parse_age_limit(video_data.get('video_rating')),
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(watch_hub_data.get('season')),
'season_id': str_or_none(episode.get('season_id')),
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
}
class ViceIE(ViceBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
@ -21,7 +102,7 @@ class ViceIE(InfoExtractor):
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
}, { }, {
'url': 'http://www.vice.com/video/how-to-hack-a-car', 'url': 'http://www.vice.com/video/how-to-hack-a-car',
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': { 'info_dict': {
'id': '3jstaBeXgAs', 'id': '3jstaBeXgAs',
'ext': 'mp4', 'ext': 'mp4',
@ -32,6 +113,22 @@ class ViceIE(InfoExtractor):
'upload_date': '20140529', 'upload_date': '20140529',
}, },
'add_ie': ['Youtube'], 'add_ie': ['Youtube'],
}, {
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
'md5': '',
'info_dict': {
'id': '5816510690b70e6c5fd39a56',
'ext': 'mp4',
'uploader': 'Waypoint',
'title': 'The Signal From Tölva',
'uploader_id': '57f7d621e05ca860fa9ccaf9',
'timestamp': 1477941983938,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['UplynkPreplay'],
}, { }, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'only_matching': True, 'only_matching': True,
@ -42,21 +139,21 @@ class ViceIE(InfoExtractor):
'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', 'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
'only_matching': True, 'only_matching': True,
}] }]
_PREPLAY_HOST = 'video.vice'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage, urlh = self._download_webpage_handle(url, video_id)
try: embed_code = self._search_regex(
embed_code = self._search_regex( r'embedCode=([^&\'"]+)', webpage,
r'embedCode=([^&\'"]+)', webpage, 'ooyala embed code', default=None)
'ooyala embed code', default=None) if embed_code:
if embed_code: return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
return self.url_result('ooyala:%s' % embed_code, 'Ooyala') youtube_id = self._search_regex(
youtube_id = self._search_regex( r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
r'data-youtube-id="([^"]+)"', webpage, 'youtube id') if youtube_id:
return self.url_result(youtube_id, 'Youtube') return self.url_result(youtube_id, 'Youtube')
except ExtractorError: return self._extract_preplay_video(urlh.geturl(), webpage)
raise ExtractorError('The page doesn\'t contain a video', expected=True)
class ViceShowIE(InfoExtractor): class ViceShowIE(InfoExtractor):

View File

@ -1,23 +1,10 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import time from .vice import ViceBaseIE
import hashlib
import json
from .adobepass import AdobePassIE
from ..compat import compat_HTTPError
from ..utils import (
int_or_none,
parse_age_limit,
str_or_none,
parse_duration,
ExtractorError,
extract_attributes,
)
class VicelandIE(AdobePassIE): class VicelandIE(ViceBaseIE):
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)' _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
_TEST = { _TEST = {
'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
@ -38,70 +25,9 @@ class VicelandIE(AdobePassIE):
}, },
'add_ie': ['UplynkPreplay'], 'add_ie': ['UplynkPreplay'],
} }
_PREPLAY_HOST = 'www.viceland'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
watch_hub_data = extract_attributes(self._search_regex( return self._extract_preplay_video(url, webpage)
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
video_id = watch_hub_data['vms-id']
title = watch_hub_data['video-title']
query = {}
if watch_hub_data.get('video-locked') == '1':
resource = self._get_mvpd_resource(
'VICELAND', title, video_id,
watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
exp = int(time.time()) + 14400
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
})
try:
preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
raise
video_data = preplay['video']
base = video_data['base']
uplynk_preplay_url = preplay['preplayURL']
episode = video_data.get('episode', {})
channel = video_data.get('channel', {})
subtitles = {}
cc_url = preplay.get('ccURL')
if cc_url:
subtitles['en'] = [{
'url': cc_url,
}]
return {
'_type': 'url_transparent',
'url': uplynk_preplay_url,
'id': video_id,
'title': title,
'description': base.get('body'),
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
'timestamp': int_or_none(video_data.get('created_at')),
'age_limit': parse_age_limit(video_data.get('video_rating')),
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(watch_hub_data.get('season')),
'season_id': str_or_none(episode.get('season_id')),
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
}

View File

@ -322,6 +322,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
}, },
'expected_warnings': ['Unable to download JSON metadata'], 'expected_warnings': ['Unable to download JSON metadata'],
}, },
{
# redirects to ondemand extractor and should be passed throught it
# for successful extraction
'url': 'https://vimeo.com/73445910',
'info_dict': {
'id': '73445910',
'ext': 'mp4',
'title': 'The Reluctant Revolutionary',
'uploader': '10Ft Films',
'uploader_url': 're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
'uploader_id': 'tenfootfilms',
},
'params': {
'skip_download': True,
},
},
{ {
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
'only_matching': True, 'only_matching': True,
@ -414,7 +430,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
request = sanitized_Request(url, headers=headers) request = sanitized_Request(url, headers=headers)
try: try:
webpage = self._download_webpage(request, video_id) webpage, urlh = self._download_webpage_handle(request, video_id)
# Some URLs redirect to ondemand can't be extracted with
# this extractor right away thus should be passed through
# ondemand extractor (e.g. https://vimeo.com/73445910)
if VimeoOndemandIE.suitable(urlh.geturl()):
return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key())
except ExtractorError as ee: except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read() errmsg = ee.cause.read()

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import collections import collections
import re import re
import json
import sys import sys
from .common import InfoExtractor from .common import InfoExtractor
@ -369,8 +368,18 @@ class VKIE(VKBaseIE):
opts_url = 'http:' + opts_url opts_url = 'http:' + opts_url
return self.url_result(opts_url) return self.url_result(opts_url)
data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars') # vars does not look to be served anymore since 24.10.2016
data = json.loads(data_json) data = self._parse_json(
self._search_regex(
r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'),
video_id, fatal=False)
# <!json> is served instead
if not data:
data = self._parse_json(
self._search_regex(
r'<!json>\s*({.+?})\s*<!>', info_page, 'json'),
video_id)['player']['params'][0]
title = unescapeHTML(data['md_title']) title = unescapeHTML(data['md_title'])

View File

@ -31,7 +31,8 @@ class VodlockerIE(InfoExtractor):
if any(p in webpage for p in ( if any(p in webpage for p in (
'>THIS FILE WAS DELETED<', '>THIS FILE WAS DELETED<',
'>File Not Found<', '>File Not Found<',
'The file you were looking for could not be found, sorry for any inconvenience.<')): 'The file you were looking for could not be found, sorry for any inconvenience.<',
'>The file was removed')):
raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
fields = self._hidden_inputs(webpage) fields = self._hidden_inputs(webpage)

View File

@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
float_or_none,
)
class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
_TESTS = [{
'url': 'https://vzaar.com/videos/1152805',
'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
'info_dict': {
'id': '1152805',
'ext': 'mp4',
'title': 'sample video (public)',
},
}, {
'url': 'https://view.vzaar.com/27272/player',
'md5': '3b50012ac9bbce7f445550d54e0508f2',
'info_dict': {
'id': '27272',
'ext': 'mp3',
'title': 'MP3',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
source_url = video_data['sourceUrl']
info = {
'id': video_id,
'title': video_data['videoTitle'],
'url': source_url,
'thumbnail': self._proto_relative_url(video_data.get('poster')),
'duration': float_or_none(video_data.get('videoDuration')),
}
if 'audio' in source_url:
info.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
info.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
})
return info

View File

@ -201,6 +201,32 @@ class YahooIE(InfoExtractor):
}, },
'skip': 'redirect to https://www.yahoo.com/music', 'skip': 'redirect to https://www.yahoo.com/music',
}, },
{
# yahoo://article/
'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
'info_dict': {
'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
'ext': 'mp4',
'title': "'True Story' Trailer",
'description': 'True Story',
},
'params': {
'skip_download': True,
},
},
{
# ytwnews://cavideo/
'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
'info_dict': {
'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
'ext': 'mp4',
'title': '單車天使 - 中文版預',
'description': '中文版預',
},
'params': {
'skip_download': True,
},
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -269,7 +295,8 @@ class YahooIE(InfoExtractor):
r'"first_videoid"\s*:\s*"([^"]+)"', r'"first_videoid"\s*:\s*"([^"]+)"',
r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
r'<article[^>]data-uuid=["\']([^"\']+)', r'<article[^>]data-uuid=["\']([^"\']+)',
r'yahoo://article/view\?.*\buuid=([^&"\']+)', r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
] ]
video_id = self._search_regex( video_id = self._search_regex(
CONTENT_ID_REGEXES, webpage, 'content ID') CONTENT_ID_REGEXES, webpage, 'content ID')

View File

@ -1691,6 +1691,10 @@ def url_basename(url):
return path.strip('/').split('/')[-1] return path.strip('/').split('/')[-1]
def base_url(url):
return re.match(r'https?://[^?#&]+/', url).group()
class HEADRequest(compat_urllib_request.Request): class HEADRequest(compat_urllib_request.Request):
def get_method(self): def get_method(self):
return 'HEAD' return 'HEAD'
@ -1818,8 +1822,12 @@ def get_exe_version(exe, args=['--version'],
""" Returns the version of the specified executable, """ Returns the version of the specified executable,
or False if the executable is not present """ or False if the executable is not present """
try: try:
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if youtube-dl is run in the background.
# See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
out, _ = subprocess.Popen( out, _ = subprocess.Popen(
[encodeArgument(exe)] + args, [encodeArgument(exe)] + args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
except OSError: except OSError:
return False return False
@ -2341,11 +2349,18 @@ def _match_one(filter_part, dct):
m = operator_rex.search(filter_part) m = operator_rex.search(filter_part)
if m: if m:
op = COMPARISON_OPERATORS[m.group('op')] op = COMPARISON_OPERATORS[m.group('op')]
if m.group('strval') is not None: actual_value = dct.get(m.group('key'))
if (m.group('strval') is not None or
# If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field
# and process comparison value as a string (see
# https://github.com/rg3/youtube-dl/issues/11082).
actual_value is not None and m.group('intval') is not None and
isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='): if m.group('op') not in ('=', '!='):
raise ValueError( raise ValueError(
'Operator %s does not support string values!' % m.group('op')) 'Operator %s does not support string values!' % m.group('op'))
comparison_value = m.group('strval') comparison_value = m.group('strval') or m.group('intval')
else: else:
try: try:
comparison_value = int(m.group('intval')) comparison_value = int(m.group('intval'))
@ -2357,7 +2372,6 @@ def _match_one(filter_part, dct):
raise ValueError( raise ValueError(
'Invalid integer value %r in filter part %r' % ( 'Invalid integer value %r in filter part %r' % (
m.group('intval'), filter_part)) m.group('intval'), filter_part))
actual_value = dct.get(m.group('key'))
if actual_value is None: if actual_value is None:
return m.group('none_inclusive') return m.group('none_inclusive')
return op(actual_value, comparison_value) return op(actual_value, comparison_value)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.10.21.1' __version__ = '2016.11.14.1'